From airlied at kemper.freedesktop.org Wed Apr 1 04:10:50 2015 From: airlied at kemper.freedesktop.org (Dave Airlie) Date: Tue, 31 Mar 2015 21:10:50 -0700 (PDT) Subject: Mesa (master): egl: add initial EGL_MESA_image_dma_buf_export v2.4 Message-ID: <20150401041050.AA27576336@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 8f7338f284cdb1fef64c85e3293d2200d0cc6387 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8f7338f284cdb1fef64c85e3293d2200d0cc6387 Author: Dave Airlie Date: Mon Mar 3 13:57:16 2014 +1000 egl: add initial EGL_MESA_image_dma_buf_export v2.4 At the moment to get an EGL image to a dma-buf file descriptor, you have to use EGL_MESA_drm_image, and then use libdrm to convert this to a file descriptor. This extension just provides an API modelled on EGL_MESA_drm_image, to return a dma-buf file descriptor. v2: update spec for new API proposal add internal queries to get the fourcc back from intel driver. v2.1: add gallium pieces. v2.2: add offsets to spec and API, rename fd->fds, stride->strides in API. rewrite spec a bit more, add some q/a v2.3: add modifiers to query interface and 64-bit type for that (Daniel Stone) specifiy what happens to num fds vs num planes differences. (Chad Versace) v2.4: fix grammar (Daniel Stone) Signed-off-by: Dave Airlie --- docs/specs/MESA_image_dma_buf_export.txt | 142 ++++++++++++++++++++++++++++++ include/EGL/eglmesaext.h | 8 ++ include/GL/internal/dri_interface.h | 4 +- src/egl/drivers/dri2/egl_dri2.c | 59 ++++++++++++- src/egl/main/eglapi.c | 50 +++++++++++ src/egl/main/eglapi.h | 10 +++ src/egl/main/egldisplay.h | 2 + src/egl/main/eglfallbacks.c | 5 ++ src/gallium/state_trackers/dri/dri2.c | 32 ++++++- src/mesa/drivers/dri/i965/intel_screen.c | 25 +++++- 10 files changed, 332 insertions(+), 5 deletions(-) diff --git a/docs/specs/MESA_image_dma_buf_export.txt b/docs/specs/MESA_image_dma_buf_export.txt new file mode 100644 index 0000000..c3794ee --- /dev/null +++ b/docs/specs/MESA_image_dma_buf_export.txt @@ -0,0 +1,142 @@ +Name + + MESA_image_dma_buf_export + +Name Strings + + EGL_MESA_image_dma_buf_export + +Contributors + + Dave Airlie + +Contact + + Dave Airlie (airlied 'at' redhat 'dot' com) + +Status + + Proposal + +Version + + Version 2, Mar 30, 2015 + +Number + + EGL Extension #not assigned + +Dependencies + + Reguires EGL 1.4 or later. This extension is written against the + wording of the EGL 1.4 specification. + + EGL_KHR_base_image is required. + + The EGL implementation must be running on a Linux kernel supporting the + dma_buf buffer sharing mechanism. + +Overview + + This extension provides entry points for integrating EGLImage with the + dma-buf infrastructure. The extension allows creating a Linux dma_buf + file descriptor or multiple file descriptors, in the case of multi-plane + YUV image, from an EGLImage. + + It is designed to provide the complementary functionality to EGL_EXT_image_dma_buf_import. + +IP Status + + Open-source; freely implementable. + +New Types + + This is a 64 bit unsigned integer. + + typedef khronos_uint64_t EGLuint64MESA; + + +New Procedures and Functions + + EGLBoolean eglExportDMABUFImageQueryMESA(EGLDisplay dpy, + EGLImageKHR image, + int *fourcc, + int *num_planes, + EGLuint64MESA *modifiers); + + EGLBoolean eglExportDMABUFImageMESA(EGLDisplay dpy, + EGLImageKHR image, + int *fds, + EGLint *strides, + EGLint *offsets); + +New Tokens + + None + + +Additions to the EGL 1.4 Specification: + + To mirror the import extension, this extension attempts to return + enough information to enable an exported dma-buf to be imported + via eglCreateImageKHR and EGL_LINUX_DMA_BUF_EXT token. + + Retrieving the information is a two step process, so two APIs + are required. + + The first entrypoint + EGLBoolean eglExportDMABUFImageQueryMESA(EGLDisplay dpy, + EGLImageKHR image, + int *fourcc, + int *num_planes, + EGLuint64MESA *modifiers); + + is used to retrieve the pixel format of the buffer, as specified by + drm_fourcc.h, the number of planes in the image and the Linux + drm modifiers. , and may be NULL, + in which case no value is retrieved. + + The second entrypoint retrieves the dma_buf file descriptors, + strides and offsets for the image. The caller should pass + arrays sized according to the num_planes values retrieved previously. + Passing arrays of the wrong size will have undefined results. + If the number of fds is less than the number of planes, then + subsequent fd slots should contain -1. + + EGLBoolean eglExportDMABUFImageMESA(EGLDisplay dpy, + EGLImageKHR image, + int *fds, + EGLint *strides, + EGLint *offsets); + + , , can be NULL if the infomatation isn't + required by the caller. + +Issues + +1. Should the API look more like an attribute getting API? + +ANSWER: No, from a user interface pov, having to iterate across calling +the API up to 12 times using attribs seems like the wrong solution. + +2. Should the API take a plane and just get the fd/stride/offset for that + plane? + +ANSWER: UNKNOWN,this might be just as valid an API. + +3. Does ownership of the file descriptor remain with the app? + +ANSWER: Yes, the app is responsible for closing any fds retrieved. + +4. If number of planes and number of fds differ what should we do? + +ANSWER: Return -1 for the secondary slots, as this avoids having +to dup the fd extra times to make the interface sane. + +Revision History + + Version 2, March, 2015 + Add a query interface (Dave Airlie) + Version 1, June 3, 2014 + Initial draft (Dave Airlie) + diff --git a/include/EGL/eglmesaext.h b/include/EGL/eglmesaext.h index 5fcc527..595babd 100644 --- a/include/EGL/eglmesaext.h +++ b/include/EGL/eglmesaext.h @@ -170,6 +170,14 @@ typedef EGLBoolean (EGLAPIENTRYP PFNEGLSWAPBUFFERSREGIONNOK) (EGLDisplay dpy, EG #define EGL_NO_CONFIG_MESA ((EGLConfig)0) #endif +#if KHRONOS_SUPPORT_INT64 +#ifndef EGL_MESA_image_dma_buf_export +#define EGL_MESA_image_dma_buf_export 1 +typedef khronos_uint64_t EGLuint64MESA; +EGLAPI EGLBoolean EGLAPIENTRY eglExportDMABUFImageQueryMESA (EGLDisplay dpy, EGLImageKHR image, EGLint *fourcc, EGLint *nplanes, EGLuint64MESA *modifiers); +EGLAPI EGLBoolean EGLAPIENTRY eglExportDMABUFImageMESA (EGLDisplay dpy, EGLImageKHR image, int *fds, EGLint *strides, EGLint *offsets); +#endif +#endif #ifdef __cplusplus } #endif diff --git a/include/GL/internal/dri_interface.h b/include/GL/internal/dri_interface.h index 1d670b1..eb7da23 100644 --- a/include/GL/internal/dri_interface.h +++ b/include/GL/internal/dri_interface.h @@ -1006,7 +1006,7 @@ struct __DRIdri2ExtensionRec { * extensions. */ #define __DRI_IMAGE "DRI_IMAGE" -#define __DRI_IMAGE_VERSION 10 +#define __DRI_IMAGE_VERSION 11 /** * These formats correspond to the similarly named MESA_FORMAT_* @@ -1097,6 +1097,8 @@ struct __DRIdri2ExtensionRec { #define __DRI_IMAGE_ATTRIB_FD 0x2007 /* available in versions * 7+. Each query will return a * new fd. */ +#define __DRI_IMAGE_ATTRIB_FOURCC 0x2008 /* available in versions 11 */ +#define __DRI_IMAGE_ATTRIB_NUM_PLANES 0x2009 /* available in versions 11 */ enum __DRIYUVColorSpace { __DRI_YUV_COLOR_SPACE_UNDEFINED = 0, diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index d503196..a428f28 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -525,8 +525,14 @@ dri2_setup_screen(_EGLDisplay *disp) capabilities = dri2_dpy->image->getCapabilities(dri2_dpy->dri_screen); disp->Extensions.MESA_drm_image = (capabilities & __DRI_IMAGE_CAP_GLOBAL_NAMES) != 0; - } else + + if (dri2_dpy->image->base.version >= 11) + disp->Extensions.MESA_image_dma_buf_export = EGL_TRUE; + } else { disp->Extensions.MESA_drm_image = EGL_TRUE; + if (dri2_dpy->image->base.version >= 11) + disp->Extensions.MESA_image_dma_buf_export = EGL_TRUE; + } disp->Extensions.KHR_image_base = EGL_TRUE; disp->Extensions.KHR_gl_renderbuffer_image = EGL_TRUE; @@ -1965,6 +1971,55 @@ dri2_export_drm_image_mesa(_EGLDriver *drv, _EGLDisplay *disp, _EGLImage *img, return EGL_TRUE; } + +static EGLBoolean +dri2_export_dma_buf_image_query_mesa(_EGLDriver *drv, _EGLDisplay *disp, + _EGLImage *img, + EGLint *fourcc, EGLint *nplanes, + EGLuint64MESA *modifiers) +{ + struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); + struct dri2_egl_image *dri2_img = dri2_egl_image(img); + + (void) drv; + + + if (nplanes) + dri2_dpy->image->queryImage(dri2_img->dri_image, + __DRI_IMAGE_ATTRIB_NUM_PLANES, nplanes); + if (fourcc) + dri2_dpy->image->queryImage(dri2_img->dri_image, + __DRI_IMAGE_ATTRIB_FOURCC, fourcc); + + if (modifiers) + *modifiers = 0; + + return EGL_TRUE; +} + +static EGLBoolean +dri2_export_dma_buf_image_mesa(_EGLDriver *drv, _EGLDisplay *disp, _EGLImage *img, + int *fds, EGLint *strides, EGLint *offsets) +{ + struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp); + struct dri2_egl_image *dri2_img = dri2_egl_image(img); + + (void) drv; + + /* rework later to provide multiple fds/strides/offsets */ + if (fds) + dri2_dpy->image->queryImage(dri2_img->dri_image, + __DRI_IMAGE_ATTRIB_FD, fds); + + if (strides) + dri2_dpy->image->queryImage(dri2_img->dri_image, + __DRI_IMAGE_ATTRIB_STRIDE, strides); + + if (offsets) + offsets[0] = 0; + + return EGL_TRUE; +} #endif #ifdef HAVE_WAYLAND_PLATFORM @@ -2219,6 +2274,8 @@ _eglBuiltInDriverDRI2(const char *args) #ifdef HAVE_LIBDRM dri2_drv->base.API.CreateDRMImageMESA = dri2_create_drm_image_mesa; dri2_drv->base.API.ExportDRMImageMESA = dri2_export_drm_image_mesa; + dri2_drv->base.API.ExportDMABUFImageQueryMESA = dri2_export_dma_buf_image_query_mesa; + dri2_drv->base.API.ExportDMABUFImageMESA = dri2_export_dma_buf_image_mesa; #endif #ifdef HAVE_WAYLAND_PLATFORM dri2_drv->base.API.BindWaylandDisplayWL = dri2_bind_wayland_display_wl; diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c index 6031a7a..ea2ee73 100644 --- a/src/egl/main/eglapi.c +++ b/src/egl/main/eglapi.c @@ -423,6 +423,8 @@ _eglCreateExtensionsString(_EGLDisplay *dpy) _EGL_CHECK_EXTENSION(EXT_image_dma_buf_import); _EGL_CHECK_EXTENSION(NV_post_sub_buffer); + + _EGL_CHECK_EXTENSION(MESA_image_dma_buf_export); #undef _EGL_CHECK_EXTENSION } @@ -1239,6 +1241,10 @@ eglGetProcAddress(const char *procname) { "eglCreatePlatformWindowSurfaceEXT", (_EGLProc) eglCreatePlatformWindowSurfaceEXT }, { "eglCreatePlatformPixmapSurfaceEXT", (_EGLProc) eglCreatePlatformPixmapSurfaceEXT }, { "eglGetSyncValuesCHROMIUM", (_EGLProc) eglGetSyncValuesCHROMIUM }, +#ifdef EGL_MESA_drm_buf_image_export + { "eglExportDMABUFImageQueryMESA", (_EGLProc) eglExportDMABUFImageQueryMESA }, + { "eglExportDMABUFImageMESA", (_EGLProc) eglExportDMABUFImageMESA }, +#endif { NULL, NULL } }; EGLint i; @@ -1926,3 +1932,47 @@ eglGetSyncValuesCHROMIUM(EGLDisplay display, EGLSurface surface, RETURN_EGL_EVAL(disp, ret); } + +#ifdef EGL_MESA_image_dma_buf_export +EGLBoolean EGLAPIENTRY +eglExportDMABUFImageQueryMESA(EGLDisplay dpy, EGLImageKHR image, + EGLint *fourcc, EGLint *nplanes, + EGLuint64MESA *modifiers) +{ + _EGLDisplay *disp = _eglLockDisplay(dpy); + _EGLImage *img = _eglLookupImage(image, disp); + _EGLDriver *drv; + EGLBoolean ret; + + _EGL_CHECK_DISPLAY(disp, EGL_FALSE, drv); + assert(disp->Extensions.MESA_image_dma_buf_export); + + if (!img) + RETURN_EGL_ERROR(disp, EGL_BAD_PARAMETER, EGL_FALSE); + + ret = drv->API.ExportDMABUFImageQueryMESA(drv, disp, img, fourcc, nplanes, + modifiers); + + RETURN_EGL_EVAL(disp, ret); +} + +EGLBoolean EGLAPIENTRY +eglExportDMABUFImageMESA(EGLDisplay dpy, EGLImageKHR image, + int *fds, EGLint *strides, EGLint *offsets) +{ + _EGLDisplay *disp = _eglLockDisplay(dpy); + _EGLImage *img = _eglLookupImage(image, disp); + _EGLDriver *drv; + EGLBoolean ret; + + _EGL_CHECK_DISPLAY(disp, EGL_FALSE, drv); + assert(disp->Extensions.MESA_image_dma_buf_export); + + if (!img) + RETURN_EGL_ERROR(disp, EGL_BAD_PARAMETER, EGL_FALSE); + + ret = drv->API.ExportDMABUFImageMESA(drv, disp, img, fds, strides, offsets); + + RETURN_EGL_EVAL(disp, ret); +} +#endif diff --git a/src/egl/main/eglapi.h b/src/egl/main/eglapi.h index 0626719..eb5f58e 100644 --- a/src/egl/main/eglapi.h +++ b/src/egl/main/eglapi.h @@ -140,6 +140,11 @@ typedef EGLBoolean (*SwapBuffersWithDamageEXT_t) (_EGLDriver *drv, _EGLDisplay * typedef EGLBoolean (*GetSyncValuesCHROMIUM_t) (_EGLDisplay *dpy, _EGLSurface *surface, EGLuint64KHR *ust, EGLuint64KHR *msc, EGLuint64KHR *sbc); +#ifdef EGL_MESA_image_dma_buf_export +typedef EGLBoolean (*ExportDMABUFImageQueryMESA_t)(_EGLDriver *drv, _EGLDisplay *disp, _EGLImage *img, EGLint *fourcc, EGLint *stride, EGLuint64MESA *modifiers); +typedef EGLBoolean (*ExportDMABUFImageMESA_t)(_EGLDriver *drv, _EGLDisplay *disp, _EGLImage *img, EGLint *fds, EGLint *strides, EGLint *offsets); +#endif + /** * The API dispatcher jumps through these functions */ @@ -226,6 +231,11 @@ struct _egl_api QueryBufferAge_t QueryBufferAge; GetSyncValuesCHROMIUM_t GetSyncValuesCHROMIUM; + +#ifdef EGL_MESA_image_dma_buf_export + ExportDMABUFImageQueryMESA_t ExportDMABUFImageQueryMESA; + ExportDMABUFImageMESA_t ExportDMABUFImageMESA; +#endif }; #endif /* EGLAPI_INCLUDED */ diff --git a/src/egl/main/egldisplay.h b/src/egl/main/egldisplay.h index d7f5dba..4a1fb4a 100644 --- a/src/egl/main/egldisplay.h +++ b/src/egl/main/egldisplay.h @@ -123,6 +123,8 @@ struct _egl_extensions EGLBoolean EXT_buffer_age; EGLBoolean EXT_swap_buffers_with_damage; EGLBoolean EXT_image_dma_buf_import; + + EGLBoolean MESA_image_dma_buf_export; }; diff --git a/src/egl/main/eglfallbacks.c b/src/egl/main/eglfallbacks.c index be59643..c108ca7 100644 --- a/src/egl/main/eglfallbacks.c +++ b/src/egl/main/eglfallbacks.c @@ -118,4 +118,9 @@ _eglInitDriverFallbacks(_EGLDriver *drv) #ifdef EGL_NOK_swap_region drv->API.SwapBuffersRegionNOK = NULL; #endif + +#ifdef EGL_MESA_dma_buf_image_export + drv->API.ExportDMABUFImageQueryMESA = NULL; + drv->API.ExportDMABUFImageMESA = NULL; +#endif } diff --git a/src/gallium/state_trackers/dri/dri2.c b/src/gallium/state_trackers/dri/dri2.c index 7d65ba3..f8f4ecf 100644 --- a/src/gallium/state_trackers/dri/dri2.c +++ b/src/gallium/state_trackers/dri/dri2.c @@ -76,6 +76,30 @@ static int convert_fourcc(int format, int *dri_components_p) return format; } +static int convert_to_fourcc(int format) +{ + switch(format) { + case __DRI_IMAGE_FORMAT_RGB565: + format = __DRI_IMAGE_FOURCC_RGB565; + break; + case __DRI_IMAGE_FORMAT_ARGB8888: + format = __DRI_IMAGE_FOURCC_ARGB8888; + break; + case __DRI_IMAGE_FORMAT_XRGB8888: + format = __DRI_IMAGE_FOURCC_XRGB8888; + break; + case __DRI_IMAGE_FORMAT_ABGR8888: + format = __DRI_IMAGE_FOURCC_ABGR8888; + break; + case __DRI_IMAGE_FORMAT_XBGR8888: + format = __DRI_IMAGE_FOURCC_XBGR8888; + break; + default: + return -1; + } + return format; +} + /** * DRI2 flush extension. */ @@ -909,6 +933,12 @@ dri2_query_image(__DRIimage *image, int attrib, int *value) return GL_FALSE; *value = image->dri_components; return GL_TRUE; + case __DRI_IMAGE_ATTRIB_FOURCC: + *value = convert_to_fourcc(image->dri_format); + return GL_TRUE; + case __DRI_IMAGE_ATTRIB_NUM_PLANES: + *value = 1; + return GL_TRUE; default: return GL_FALSE; } @@ -1203,7 +1233,7 @@ dri2_get_capabilities(__DRIscreen *_screen) /* The extension is modified during runtime if DRI_PRIME is detected */ static __DRIimageExtension dri2ImageExtension = { - .base = { __DRI_IMAGE, 10 }, + .base = { __DRI_IMAGE, 11 }, .createImageFromName = dri2_create_image_from_name, .createImageFromRenderbuffer = dri2_create_image_from_renderbuffer, diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index 3640b67..cb9710f 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -300,6 +300,17 @@ intel_image_format_lookup(int fourcc) return f; } +static boolean intel_lookup_fourcc(int dri_format, int *fourcc) +{ + for (unsigned i = 0; i < ARRAY_SIZE(intel_image_formats); i++) { + if (intel_image_formats[i].planes[0].dri_format == dri_format) { + *fourcc = intel_image_formats[i].fourcc; + return true; + } + } + return false; +} + static __DRIimage * intel_allocate_image(int dri_format, void *loaderPrivate) { @@ -559,6 +570,14 @@ intel_query_image(__DRIimage *image, int attrib, int *value) if (drm_intel_bo_gem_export_to_prime(image->bo, value) == 0) return true; return false; + case __DRI_IMAGE_ATTRIB_FOURCC: + if (intel_lookup_fourcc(image->dri_format, value)) + return true; + return false; + case __DRI_IMAGE_ATTRIB_NUM_PLANES: + *value = 1; + return true; + default: return false; } @@ -784,7 +803,7 @@ intel_from_planar(__DRIimage *parent, int plane, void *loaderPrivate) } static const __DRIimageExtension intelImageExtension = { - .base = { __DRI_IMAGE, 8 }, + .base = { __DRI_IMAGE, 11 }, .createImageFromName = intel_create_image_from_name, .createImageFromRenderbuffer = intel_create_image_from_renderbuffer, @@ -797,7 +816,9 @@ static const __DRIimageExtension intelImageExtension = { .fromPlanar = intel_from_planar, .createImageFromTexture = intel_create_image_from_texture, .createImageFromFds = intel_create_image_from_fds, - .createImageFromDmaBufs = intel_create_image_from_dma_bufs + .createImageFromDmaBufs = intel_create_image_from_dma_bufs, + .blitImage = NULL, + .getCapabilities = NULL }; static int From mperes at kemper.freedesktop.org Wed Apr 1 06:37:25 2015 From: mperes at kemper.freedesktop.org (Martin Peres) Date: Tue, 31 Mar 2015 23:37:25 -0700 (PDT) Subject: Mesa (master): mesa/fbo: lock ctx->Shared-> Mutex when allocating renderbuffers Message-ID: <20150401063725.E3E7B7635A@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 59af7ed28cd1b44b525a7d6a324c4e00092104b6 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=59af7ed28cd1b44b525a7d6a324c4e00092104b6 Author: Martin Peres Date: Mon Mar 30 10:34:20 2015 +0300 mesa/fbo: lock ctx->Shared->Mutex when allocating renderbuffers This mutex is used to make sure the shared context does not change while some shared code is looking into it. Calling BindRenderbufferEXT BindRenderbuffer with a gles context would not take the mutex before allocating an entry. Commit a34669b then moved out the allocation out of bind_renderbuffer into allocate_renderbuffer before using it for the CreateRenderBuffer entry point. This thus also made this entry point unsafe. The issue has been hinted by Ilia Mirkin. Reviewed-by: Brian Paul Signed-off-by: Martin Peres --- src/mesa/main/fbobject.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 3808b56..8032585 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -1218,8 +1218,10 @@ allocate_renderbuffer(struct gl_context *ctx, GLuint renderbuffer, return NULL; } assert(newRb->AllocStorage); + mtx_lock(&ctx->Shared->Mutex); _mesa_HashInsert(ctx->Shared->RenderBuffers, renderbuffer, newRb); newRb->RefCount = 1; /* referenced by hash table */ + mtx_unlock(&ctx->Shared->Mutex); return newRb; } From mperes at kemper.freedesktop.org Wed Apr 1 06:37:25 2015 From: mperes at kemper.freedesktop.org (Martin Peres) Date: Tue, 31 Mar 2015 23:37:25 -0700 (PDT) Subject: Mesa (master): mesa/fbo: do not assign a value that is never read later on Message-ID: <20150401063725.D8BD676336@kemper.freedesktop.org> Module: Mesa Branch: master Commit: fa3832155100850da8346faa64c3cb30c4e86e39 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=fa3832155100850da8346faa64c3cb30c4e86e39 Author: Martin Peres Date: Wed Mar 25 16:28:03 2015 +0200 mesa/fbo: do not assign a value that is never read later on The issue has been detected by coverty. v2: - move the declaration of obj to the else clause (Brian Paul) v3: Review by Brian Paul - get rid of the obj declaration in favor of a direct reference Reviewed-by: Brian Paul Signed-off-by: Martin Peres --- src/mesa/main/fbobject.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 072e1a8..3808b56 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -1400,7 +1400,6 @@ create_render_buffers(struct gl_context *ctx, GLsizei n, GLuint *renderbuffers, bool dsa) { const char *func = dsa ? "glCreateRenderbuffers" : "glGenRenderbuffers"; - struct gl_renderbuffer *obj; GLuint first; GLint i; @@ -1419,13 +1418,11 @@ create_render_buffers(struct gl_context *ctx, GLsizei n, GLuint *renderbuffers, renderbuffers[i] = name; if (dsa) { - obj = allocate_renderbuffer(ctx, name, func); + allocate_renderbuffer(ctx, name, func); } else { - obj = &DummyRenderbuffer; - - /* insert the object into the hash table */ + /* insert a dummy renderbuffer into the hash table */ mtx_lock(&ctx->Shared->Mutex); - _mesa_HashInsert(ctx->Shared->RenderBuffers, name, obj); + _mesa_HashInsert(ctx->Shared->RenderBuffers, name, &DummyRenderbuffer); mtx_unlock(&ctx->Shared->Mutex); } } From sroland at kemper.freedesktop.org Wed Apr 1 11:28:46 2015 From: sroland at kemper.freedesktop.org (Roland Scheidegger) Date: Wed, 1 Apr 2015 04:28:46 -0700 (PDT) Subject: Mesa (master): gallivm: (trivial) fix the logic deciding if function call should be used... Message-ID: <20150401112846.4644B76336@kemper.freedesktop.org> Module: Mesa Branch: master Commit: e3252defd2eabb6bcc8eb14f16d534e964fd8884 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e3252defd2eabb6bcc8eb14f16d534e964fd8884 Author: Roland Scheidegger Date: Wed Apr 1 13:25:56 2015 +0200 gallivm: (trivial) fix the logic deciding if function call should be used... Copy and paste bug with the img filter decision. Since there's only 2 different filters anyway just drop this bit. --- src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 378c562..1a60ca9 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -3330,9 +3330,7 @@ lp_build_sample_soa(const struct lp_static_texture_state *static_texture_state, op_type != LP_SAMPLER_OP_TEXTURE || ((static_sampler_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE || static_texture_state->level_zero_only == TRUE) && - static_sampler_state->min_img_filter == static_sampler_state->mag_img_filter && - (static_sampler_state->min_img_filter == PIPE_TEX_FILTER_NEAREST || - static_sampler_state->min_img_filter == PIPE_TEX_FILTER_NEAREST)); + static_sampler_state->min_img_filter == static_sampler_state->mag_img_filter); use_tex_func = format_desc && !(simple_format && simple_tex); } From itoral at kemper.freedesktop.org Wed Apr 1 13:37:28 2015 From: itoral at kemper.freedesktop.org (Iago Toral Quiroga) Date: Wed, 1 Apr 2015 06:37:28 -0700 (PDT) Subject: Mesa (master): i965: Handle scratch accesses where reladdr also points to scratch space Message-ID: <20150401133728.B719776332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 3818dfcf3c2d03809774bba613d7dd92752b36db URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3818dfcf3c2d03809774bba613d7dd92752b36db Author: Iago Toral Quiroga Date: Tue Mar 17 10:48:04 2015 +0100 i965: Handle scratch accesses where reladdr also points to scratch space This is a problem when we have IR like this: (array_ref (var_ref temps) (swiz x (expression ivec4 bitcast_f2i (swiz xxxx (array_ref (var_ref temps) (constant int (2)) ) )) )) ) ) where we are indexing an array with the result of an expression that accesses the same array. In this scenario, temps will be moved to scratch space and we will need to add scratch reads/writes for all accesses to temps, however, the current implementation does not consider the case where a reladdr pointer (obtained by indexing into temps trough a expression) points to a register that is also stored in scratch space (as in this case, where the expression used to index temps access temps[2]), and thus, requires a scratch read before it is accessed. v2 (Francisco Jerez): - Handle also recursive reladdr addressing. - Do not memcpy dst_reg into src_reg when rewriting reladdr. v3 (Francisco Jerez): - Reduce complexity by moving recursive reladdr scratch access handling to a separate recursive function. - Do not skip demoting reladdr index registers to scratch space if the top level GRF has already been visited. v4 (Francisco Jerez) - Remove redundant checks. - Simplify code by making emit_resolve_reladdr return a register with the original src data except for reg, reg_offset and reladdr. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89508 Reviewed-by: Francisco Jerez --- src/mesa/drivers/dri/i965/brw_vec4.h | 2 + src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 100 ++++++++++++++++++------ 2 files changed, 76 insertions(+), 26 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 33297ae..6ec00d5 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -367,6 +367,8 @@ public: dst_reg dst, src_reg orig_src, int base_offset); + src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block, + vec4_instruction *inst, src_reg src); bool try_emit_mad(ir_expression *ir); bool try_emit_b2f_of_compare(ir_expression *ir); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 26a3b9f..ca1a995 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -3352,6 +3352,39 @@ vec4_visitor::emit_scratch_write(bblock_t *block, vec4_instruction *inst, } /** + * Checks if \p src and/or \p src.reladdr require a scratch read, and if so, + * adds the scratch read(s) before \p inst. The function also checks for + * recursive reladdr scratch accesses, issuing the corresponding scratch + * loads and rewriting reladdr references accordingly. + * + * \return \p src if it did not require a scratch load, otherwise, the + * register holding the result of the scratch load that the caller should + * use to rewrite src. + */ +src_reg +vec4_visitor::emit_resolve_reladdr(int scratch_loc[], bblock_t *block, + vec4_instruction *inst, src_reg src) +{ + /* Resolve recursive reladdr scratch access by calling ourselves + * with src.reladdr + */ + if (src.reladdr) + *src.reladdr = emit_resolve_reladdr(scratch_loc, block, inst, + *src.reladdr); + + /* Now handle scratch access on src */ + if (src.file == GRF && scratch_loc[src.reg] != -1) { + dst_reg temp = dst_reg(this, glsl_type::vec4_type); + emit_scratch_read(block, inst, temp, src, scratch_loc[src.reg]); + src.reg = temp.reg; + src.reg_offset = temp.reg_offset; + src.reladdr = NULL; + } + + return src; +} + +/** * We can't generally support array access in GRF space, because a * single instruction's destination can only span 2 contiguous * registers. So, we send all GRF arrays that get variable index @@ -3368,20 +3401,31 @@ vec4_visitor::move_grf_array_access_to_scratch() * scratch. */ foreach_block_and_inst(block, vec4_instruction, inst, cfg) { - if (inst->dst.file == GRF && inst->dst.reladdr && - scratch_loc[inst->dst.reg] == -1) { - scratch_loc[inst->dst.reg] = c->last_scratch; - c->last_scratch += this->alloc.sizes[inst->dst.reg]; + if (inst->dst.file == GRF && inst->dst.reladdr) { + if (scratch_loc[inst->dst.reg] == -1) { + scratch_loc[inst->dst.reg] = c->last_scratch; + c->last_scratch += this->alloc.sizes[inst->dst.reg]; + } + + for (src_reg *iter = inst->dst.reladdr; + iter->reladdr; + iter = iter->reladdr) { + if (iter->file == GRF && scratch_loc[iter->reg] == -1) { + scratch_loc[iter->reg] = c->last_scratch; + c->last_scratch += this->alloc.sizes[iter->reg]; + } + } } for (int i = 0 ; i < 3; i++) { - src_reg *src = &inst->src[i]; - - if (src->file == GRF && src->reladdr && - scratch_loc[src->reg] == -1) { - scratch_loc[src->reg] = c->last_scratch; - c->last_scratch += this->alloc.sizes[src->reg]; - } + for (src_reg *iter = &inst->src[i]; + iter->reladdr; + iter = iter->reladdr) { + if (iter->file == GRF && scratch_loc[iter->reg] == -1) { + scratch_loc[iter->reg] = c->last_scratch; + c->last_scratch += this->alloc.sizes[iter->reg]; + } + } } } @@ -3395,23 +3439,27 @@ vec4_visitor::move_grf_array_access_to_scratch() base_ir = inst->ir; current_annotation = inst->annotation; - if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) { - emit_scratch_write(block, inst, scratch_loc[inst->dst.reg]); - } - - for (int i = 0 ; i < 3; i++) { - if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1) - continue; - - dst_reg temp = dst_reg(this, glsl_type::vec4_type); + /* First handle scratch access on the dst. Notice we have to handle + * the case where the dst's reladdr also points to scratch space. + */ + if (inst->dst.reladdr) + *inst->dst.reladdr = emit_resolve_reladdr(scratch_loc, block, inst, + *inst->dst.reladdr); - emit_scratch_read(block, inst, temp, inst->src[i], - scratch_loc[inst->src[i].reg]); + /* Now that we have handled any (possibly recursive) reladdr scratch + * accesses for dst we can safely do the scratch write for dst itself + */ + if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) + emit_scratch_write(block, inst, scratch_loc[inst->dst.reg]); - inst->src[i].file = temp.file; - inst->src[i].reg = temp.reg; - inst->src[i].reg_offset = temp.reg_offset; - inst->src[i].reladdr = NULL; + /* Now handle scratch access on any src. In this case, since inst->src[i] + * already is a src_reg, we can just call emit_resolve_reladdr with + * inst->src[i] and it will take care of handling scratch loads for + * both src and src.reladdr (recursively). + */ + for (int i = 0 ; i < 3; i++) { + inst->src[i] = emit_resolve_reladdr(scratch_loc, block, inst, + inst->src[i]); } } } From evelikov at kemper.freedesktop.org Wed Apr 1 13:44:26 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Wed, 1 Apr 2015 06:44:26 -0700 (PDT) Subject: Mesa (master): docs: note that classic osmesa/ libEGL no longer builds with scons Message-ID: <20150401134426.0C82A76332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 1d36c52f5db1e7ed651d6cea24fe87e80c7c1565 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1d36c52f5db1e7ed651d6cea24fe87e80c7c1565 Author: Emil Velikov Date: Wed Mar 25 20:12:02 2015 +0000 docs: note that classic osmesa/libEGL no longer builds with scons Plus nuke the final reference to osmesa from README.WIN32. Reviewed-by: Jose Fonseca Signed-off-by: Emil Velikov --- docs/README.WIN32 | 4 ---- docs/relnotes/10.6.0.html | 2 ++ 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/docs/README.WIN32 b/docs/README.WIN32 index e0e5b9b..94e1d6f 100644 --- a/docs/README.WIN32 +++ b/docs/README.WIN32 @@ -11,10 +11,6 @@ no longer shipped or supported. Run - scons osmesa - -to build classic osmesa driver; or - scons libgl-gdi to build gallium based GDI driver. diff --git a/docs/relnotes/10.6.0.html b/docs/relnotes/10.6.0.html index 00aaaa5..3233637 100644 --- a/docs/relnotes/10.6.0.html +++ b/docs/relnotes/10.6.0.html @@ -65,6 +65,8 @@ TBD.
  • Removed OpenVG support.
  • Removed the galahad gallium driver.
  • Removed the identity gallium driver.
  • +
  • Removed the EGL loader from the Windows SCons build.
  • +
  • Removed the classic osmesa from the Windows SCons build.
  • From evelikov at kemper.freedesktop.org Wed Apr 1 13:44:26 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Wed, 1 Apr 2015 06:44:26 -0700 (PDT) Subject: Mesa (master): nir: add nir_builder.h to the tarball Message-ID: <20150401134426.39C5176332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 5604d7675ed40227df9fc6a500eb4f0d2421f4ca URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5604d7675ed40227df9fc6a500eb4f0d2421f4ca Author: Emil Velikov Date: Tue Mar 31 10:54:34 2015 +0100 nir: add nir_builder.h to the tarball The header was added with commit 2a135c470e3(nir: Add an ALU op builder kind of like ir_builder.h) but did not made it into to the sources list. Fortunately it remained unused until a recent commit faf6106c6f6(nir: Implement a Mesa IR -> NIR translator.) v2: Remove the bogus dependency. Tweak commit message. Signed-off-by: Emil Velikov Reviewed-by: Eric Anholt Reviewed-by: Matt Turner --- src/glsl/Makefile.sources | 1 + 1 file changed, 1 insertion(+) diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index 8d29c55..c3b63d1 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -22,6 +22,7 @@ NIR_FILES = \ nir/glsl_to_nir.h \ nir/nir.c \ nir/nir.h \ + nir/nir_builder.h \ nir/nir_constant_expressions.h \ nir/nir_dominance.c \ nir/nir_from_ssa.c \ From evelikov at kemper.freedesktop.org Wed Apr 1 13:44:26 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Wed, 1 Apr 2015 06:44:26 -0700 (PDT) Subject: Mesa (master): osmesa: don't try to bundle osmesa.def SConscript Message-ID: <20150401134426.161437635A@kemper.freedesktop.org> Module: Mesa Branch: master Commit: c07df0f2014636b601cdbaff63214296599b1ad5 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c07df0f2014636b601cdbaff63214296599b1ad5 Author: Emil Velikov Date: Sun Mar 29 00:56:09 2015 +0000 osmesa: don't try to bundle osmesa.def SConscript Both of which were removed with commit 69db422218b(scons: Don't build osmesa.) Signed-off-by: Emil Velikov Reviewed-by: Jose Fonseca --- src/mesa/drivers/osmesa/Makefile.am | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/mesa/drivers/osmesa/Makefile.am b/src/mesa/drivers/osmesa/Makefile.am index 60048cc..9a388d6 100644 --- a/src/mesa/drivers/osmesa/Makefile.am +++ b/src/mesa/drivers/osmesa/Makefile.am @@ -21,8 +21,6 @@ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. -EXTRA_DIST = osmesa.def SConscript - AM_CPPFLAGS = \ -I$(top_srcdir)/include \ -I$(top_srcdir)/src \ From evelikov at kemper.freedesktop.org Wed Apr 1 13:44:26 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Wed, 1 Apr 2015 06:44:26 -0700 (PDT) Subject: Mesa (master): xmlpool: don't forget to ship the MOS Message-ID: <20150401134426.2178B7635B@kemper.freedesktop.org> Module: Mesa Branch: master Commit: a665b9b3c89095923cf2251895afc69c9f79aafe URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a665b9b3c89095923cf2251895afc69c9f79aafe Author: Emil Velikov Date: Sun Mar 29 13:46:31 2015 +0100 xmlpool: don't forget to ship the MOS This will allow us to finally remove python from the build time dependencies list. Considering that you're building from a release tarball of course :-) Cc: Bernd Kuhls Reported-by: Bernd Kuhls Cc: "10.5" Signed-off-by: Emil Velikov Reviewed-by: Matt Turner --- src/mesa/drivers/dri/common/xmlpool/Makefile.am | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/common/xmlpool/Makefile.am b/src/mesa/drivers/dri/common/xmlpool/Makefile.am index 5557716..9700499 100644 --- a/src/mesa/drivers/dri/common/xmlpool/Makefile.am +++ b/src/mesa/drivers/dri/common/xmlpool/Makefile.am @@ -52,7 +52,14 @@ POT=xmlpool.pot .PHONY: all clean pot po mo -EXTRA_DIST = gen_xmlpool.py options.h t_options.h $(POS) SConscript +EXTRA_DIST = \ + gen_xmlpool.py \ + options.h \ + t_options.h \ + $(POS) \ + $(MOS) \ + SConscript + BUILT_SOURCES = options.h CLEANFILES = $(MOS) options.h From evelikov at kemper.freedesktop.org Wed Apr 1 13:44:26 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Wed, 1 Apr 2015 06:44:26 -0700 (PDT) Subject: Mesa (master): xmlpool: remove the clean target Message-ID: <20150401134426.2DA0376332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 4984cb7ef866572c56981dab3e10b0ad62dc9782 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4984cb7ef866572c56981dab3e10b0ad62dc9782 Author: Emil Velikov Date: Sun Mar 29 13:46:32 2015 +0100 xmlpool: remove the clean target ... by folding it into CLEANFILES. Don't worry about $(LANG) as it is essentially the first folder of $(POS). With the latter already handled. Signed-off-by: Emil Velikov Reviewed-by: Matt Turner --- src/mesa/drivers/dri/common/xmlpool/Makefile.am | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/mesa/drivers/dri/common/xmlpool/Makefile.am b/src/mesa/drivers/dri/common/xmlpool/Makefile.am index 9700499..a6f1652 100644 --- a/src/mesa/drivers/dri/common/xmlpool/Makefile.am +++ b/src/mesa/drivers/dri/common/xmlpool/Makefile.am @@ -61,12 +61,10 @@ EXTRA_DIST = \ SConscript BUILT_SOURCES = options.h -CLEANFILES = $(MOS) options.h - -# All generated files are cleaned up. -clean: - -rm -f $(POT) options.h *~ - -rm -rf $(LANGS) +CLEANFILES = \ + options.h + $(POS) \ + $(MOS) # Default target options.h options.h: LOCALEDIR := . From anholt at kemper.freedesktop.org Wed Apr 1 18:02:24 2015 From: anholt at kemper.freedesktop.org (Eric Anholt) Date: Wed, 1 Apr 2015 11:02:24 -0700 (PDT) Subject: Mesa (master): vc4: Tell shader-db how big our UBOs are, if present. Message-ID: <20150401180224.BF678760E6@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 486dcfbbd955e01ff1d254cc533c3cc4692ad54b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=486dcfbbd955e01ff1d254cc533c3cc4692ad54b Author: Eric Anholt Date: Tue Mar 31 11:39:45 2015 -0700 vc4: Tell shader-db how big our UBOs are, if present. I had regressed them for a while with the NIR work. --- src/gallium/drivers/vc4/vc4_program.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 9e145e5..d8726ca 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -2280,6 +2280,12 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage, j++; } } + if (shader->ubo_size) { + fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d UBO uniforms\n", + qir_get_stage_name(c->stage), + c->program_id, c->variant_id, + shader->ubo_size / 4); + } qir_compile_destroy(c); From anholt at kemper.freedesktop.org Wed Apr 1 18:02:24 2015 From: anholt at kemper.freedesktop.org (Eric Anholt) Date: Wed, 1 Apr 2015 11:02:24 -0700 (PDT) Subject: Mesa (master): gallium: Add tgsi_to_nir to get a nir_shader for a TGSI shader. Message-ID: <20150401180224.C96E77635B@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 783ad697d25e754ab719ab6c715969c35dbe867b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=783ad697d25e754ab719ab6c715969c35dbe867b Author: Eric Anholt Date: Wed Oct 29 14:32:16 2014 -0700 gallium: Add tgsi_to_nir to get a nir_shader for a TGSI shader. This will be used by the VC4 driver for doing device-independent optimization, and hopefully eventually replacing its whole IR. It also may be useful to other drivers for the same reason. v2: Add all of the instructions I was relying on tgsi_lowering to remove, and more. v3: Rebase on SSA rework of the builder. v4: Use the NIR ineg operation instead of doing a src modifier. v5: Don't use ineg for fnegs. (infer_src_type on MOV doesn't do what I expect, again). v6: Fix handling of multi-channel KILL_IF sources. v7: Make ttn_get_f() return a swizzle of a scalar load_const, rather than a vector load_const. CSE doesn't recognize that srcs out of those channels are actually all the same. v8: Rebase on nir_builder auto-sizing, make the scalar arguments to non-ALU instructions actually be scalars. v9: Add support for if/loop instructions, additional texture targets, and untested support for indirect addressing on temps. v10: Rebase on master, drop bad comment about control flow and just choose the X channel, use int comparison opcodes in LIT for now, drop unused pipe_context argument.. v11: Fix translation of LRP (previously missed because I mis-translated back out), use nir_builder init helpers. v12: Rebase on master, adding explicit include of mtypes.h to get INTERP_QUALIFIER_* v13: Rebase on variables being in lists instead of hash tables, drop use of mtypes.h in favor of util/pipeline.h. Use Ken's nir_builder swizzle and fmov/imov_alu helpers, drop "struct" in front of nir_builder, use nir_builder directly as the function arg in a lot of cases, drop redundant members of ttn_compile that are also in nir_builder, drop some half-baked malloc failure handling. v14: The indirect uniform src0 should be scalar, not vector (noticed as odd by robclark, confirmed by cwabbott). Apply Ken's review to initialize s->num_uniforms and friends, skip ttn_channel for dot products, and use the simpler discard_if intrinsic. Reviewed-by: Kenneth Graunke (v13) Acked-by: Rob Clark --- src/gallium/auxiliary/Makefile.sources | 1 + src/gallium/auxiliary/nir/tgsi_to_nir.c | 1423 +++++++++++++++++++++++++++++++ src/gallium/auxiliary/nir/tgsi_to_nir.h | 30 + 3 files changed, 1454 insertions(+) diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources index 09496fa..08e4e4c 100644 --- a/src/gallium/auxiliary/Makefile.sources +++ b/src/gallium/auxiliary/Makefile.sources @@ -69,6 +69,7 @@ C_SOURCES := \ indices/u_indices_priv.h \ indices/u_primconvert.c \ indices/u_primconvert.h \ + nir/tgsi_to_nir.c \ os/os_memory_aligned.h \ os/os_memory_debug.h \ os/os_memory_stdc.h \ diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c new file mode 100644 index 0000000..4935f6c --- /dev/null +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -0,0 +1,1423 @@ +/* + * Copyright ? 2014-2015 Broadcom + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "util/ralloc.h" +#include "glsl/nir/nir.h" +#include "glsl/nir/nir_builder.h" +#include "glsl/list.h" +#include "glsl/shader_enums.h" + +#include "nir/tgsi_to_nir.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_info.h" +#include "tgsi/tgsi_scan.h" + +#define SWIZ(X, Y, Z, W) (unsigned[4]){ \ + TGSI_SWIZZLE_##X, \ + TGSI_SWIZZLE_##Y, \ + TGSI_SWIZZLE_##Z, \ + TGSI_SWIZZLE_##W, \ + } + +struct ttn_reg_info { + /** nir register containing this TGSI index. */ + nir_register *reg; + /** Offset (in vec4s) from the start of var for this TGSI index. */ + int offset; +}; + +struct ttn_compile { + union tgsi_full_token *token; + nir_builder build; + struct nir_shader *s; + struct tgsi_shader_info *scan; + + struct ttn_reg_info *output_regs; + struct ttn_reg_info *temp_regs; + nir_ssa_def **imm_defs; + + nir_register *addr_reg; + + /** + * Stack of cf_node_lists where instructions should be pushed as we pop + * back out of the control flow stack. + * + * For each IF/ELSE/ENDIF block, if_stack[if_stack_pos] has where the else + * instructions should be placed, and if_stack[if_stack_pos - 1] has where + * the next instructions outside of the if/then/else block go. + */ + struct exec_list **if_stack; + unsigned if_stack_pos; + + /** + * Stack of cf_node_lists where instructions should be pushed as we pop + * back out of the control flow stack. + * + * loop_stack[loop_stack_pos - 1] contains the cf_node_list for the outside + * of the loop. + */ + struct exec_list **loop_stack; + unsigned loop_stack_pos; + + /* How many TGSI_FILE_IMMEDIATE vec4s have been parsed so far. */ + unsigned next_imm; +}; + +#define ttn_swizzle(b, src, x, y, z, w) \ + nir_swizzle(b, src, SWIZ(x, y, z, w), 4, false) +#define ttn_channel(b, src, swiz) \ + nir_swizzle(b, src, SWIZ(swiz, swiz, swiz, swiz), 1, false) + +static nir_ssa_def * +ttn_src_for_dest(nir_builder *b, nir_alu_dest *dest) +{ + nir_alu_src src; + memset(&src, 0, sizeof(src)); + + if (dest->dest.is_ssa) + src.src = nir_src_for_ssa(&dest->dest.ssa); + else { + assert(!dest->dest.reg.indirect); + src.src = nir_src_for_reg(dest->dest.reg.reg); + src.src.reg.base_offset = dest->dest.reg.base_offset; + } + + for (int i = 0; i < 4; i++) + src.swizzle[i] = i; + + return nir_fmov_alu(b, src, 4); +} + +static void +ttn_emit_declaration(struct ttn_compile *c) +{ + nir_builder *b = &c->build; + struct tgsi_full_declaration *decl = &c->token->FullDeclaration; + unsigned array_size = decl->Range.Last - decl->Range.First + 1; + unsigned file = decl->Declaration.File; + unsigned i; + + if (file == TGSI_FILE_TEMPORARY) { + nir_register *reg; + if (c->scan->indirect_files & (1 << file)) { + reg = nir_local_reg_create(b->impl); + reg->num_components = 4; + reg->num_array_elems = array_size; + + for (i = 0; i < array_size; i++) { + c->temp_regs[decl->Range.First + i].reg = reg; + c->temp_regs[decl->Range.First + i].offset = i; + } + } else { + for (i = 0; i < array_size; i++) { + reg = nir_local_reg_create(b->impl); + reg->num_components = 4; + c->temp_regs[decl->Range.First + i].reg = reg; + c->temp_regs[decl->Range.First + i].offset = 0; + } + } + } else if (file == TGSI_FILE_ADDRESS) { + c->addr_reg = nir_local_reg_create(b->impl); + c->addr_reg->num_components = 4; + } else if (file == TGSI_FILE_SAMPLER) { + /* Nothing to record for samplers. */ + } else { + nir_variable *var; + assert(file == TGSI_FILE_INPUT || + file == TGSI_FILE_OUTPUT || + file == TGSI_FILE_CONSTANT); + + var = rzalloc(b->shader, nir_variable); + var->data.driver_location = decl->Range.First; + + var->type = glsl_vec4_type(); + if (array_size > 1) + var->type = glsl_array_type(var->type, array_size); + + switch (file) { + case TGSI_FILE_INPUT: + var->data.read_only = true; + var->data.mode = nir_var_shader_in; + var->name = ralloc_asprintf(var, "in_%d", decl->Range.First); + + /* We should probably translate to a VERT_ATTRIB_* or VARYING_SLOT_* + * instead, but nothing in NIR core is looking at the value + * currently, and this is less change to drivers. + */ + var->data.location = decl->Semantic.Name; + var->data.index = decl->Semantic.Index; + + /* We definitely need to translate the interpolation field, because + * nir_print will decode it. + */ + switch (decl->Interp.Interpolate) { + case TGSI_INTERPOLATE_CONSTANT: + var->data.interpolation = INTERP_QUALIFIER_FLAT; + break; + case TGSI_INTERPOLATE_LINEAR: + var->data.interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; + break; + case TGSI_INTERPOLATE_PERSPECTIVE: + var->data.interpolation = INTERP_QUALIFIER_SMOOTH; + break; + } + + exec_list_push_tail(&b->shader->inputs, &var->node); + break; + case TGSI_FILE_OUTPUT: { + /* Since we can't load from outputs in the IR, we make temporaries + * for the outputs and emit stores to the real outputs at the end of + * the shader. + */ + nir_register *reg = nir_local_reg_create(b->impl); + reg->num_components = 4; + if (array_size > 1) + reg->num_array_elems = array_size; + + var->data.mode = nir_var_shader_out; + var->name = ralloc_asprintf(var, "out_%d", decl->Range.First); + + var->data.location = decl->Semantic.Name; + var->data.index = decl->Semantic.Index; + + for (i = 0; i < array_size; i++) { + c->output_regs[decl->Range.First + i].offset = i; + c->output_regs[decl->Range.First + i].reg = reg; + } + + exec_list_push_tail(&b->shader->outputs, &var->node); + } + break; + case TGSI_FILE_CONSTANT: + var->data.mode = nir_var_uniform; + var->name = ralloc_asprintf(var, "uniform_%d", decl->Range.First); + + exec_list_push_tail(&b->shader->uniforms, &var->node); + break; + default: + unreachable("bad declaration file"); + return; + } + + } +} + +static void +ttn_emit_immediate(struct ttn_compile *c) +{ + nir_builder *b = &c->build; + struct tgsi_full_immediate *tgsi_imm = &c->token->FullImmediate; + nir_load_const_instr *load_const; + int i; + + load_const = nir_load_const_instr_create(b->shader, 4); + c->imm_defs[c->next_imm] = &load_const->def; + c->next_imm++; + + for (i = 0; i < 4; i++) + load_const->value.u[i] = tgsi_imm->u[i].Uint; + + nir_instr_insert_after_cf_list(b->cf_node_list, &load_const->instr); +} + +static nir_src * +ttn_src_for_indirect(struct ttn_compile *c, struct tgsi_ind_register *indirect); + +static nir_src +ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index, + struct tgsi_ind_register *indirect) +{ + nir_builder *b = &c->build; + nir_src src; + + memset(&src, 0, sizeof(src)); + + switch (file) { + case TGSI_FILE_TEMPORARY: + src.reg.reg = c->temp_regs[index].reg; + src.reg.base_offset = c->temp_regs[index].offset; + if (indirect) + src.reg.indirect = ttn_src_for_indirect(c, indirect); + break; + + case TGSI_FILE_ADDRESS: + src.reg.reg = c->addr_reg; + break; + + case TGSI_FILE_IMMEDIATE: + src = nir_src_for_ssa(c->imm_defs[index]); + assert(!indirect); + break; + + case TGSI_FILE_INPUT: + case TGSI_FILE_CONSTANT: { + nir_intrinsic_instr *load; + + switch (file) { + case TGSI_FILE_INPUT: + load = nir_intrinsic_instr_create(b->shader, + indirect ? + nir_intrinsic_load_input_indirect : + nir_intrinsic_load_input); + break; + case TGSI_FILE_CONSTANT: + load = nir_intrinsic_instr_create(b->shader, + indirect ? + nir_intrinsic_load_uniform_indirect : + nir_intrinsic_load_uniform); + break; + default: + unreachable("No other load files supported"); + break; + } + + load->num_components = 4; + load->const_index[0] = index; + load->const_index[1] = 1; + if (indirect) { + nir_alu_src indirect_address; + memset(&indirect_address, 0, sizeof(indirect_address)); + indirect_address.src = nir_src_for_reg(c->addr_reg); + for (int i = 0; i < 4; i++) + indirect_address.swizzle[i] = indirect->Swizzle; + load->src[0] = nir_src_for_ssa(nir_imov_alu(b, indirect_address, 1)); + } + nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL); + nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr); + + src = nir_src_for_ssa(&load->dest.ssa); + break; + } + + default: + unreachable("bad src file"); + } + + + return src; +} + +static nir_src * +ttn_src_for_indirect(struct ttn_compile *c, struct tgsi_ind_register *indirect) +{ + nir_builder *b = &c->build; + nir_alu_src src; + memset(&src, 0, sizeof(src)); + for (int i = 0; i < 4; i++) + src.swizzle[i] = indirect->Swizzle; + src.src = ttn_src_for_file_and_index(c, + indirect->File, + indirect->Index, NULL); + nir_src *result = ralloc(b->shader, nir_src); + *result = nir_src_for_ssa(nir_imov_alu(b, src, 1)); + return result; +} + +static nir_alu_dest +ttn_get_dest(struct ttn_compile *c, struct tgsi_full_dst_register *tgsi_fdst) +{ + struct tgsi_dst_register *tgsi_dst = &tgsi_fdst->Register; + nir_alu_dest dest; + + memset(&dest, 0, sizeof(dest)); + + if (tgsi_dst->File == TGSI_FILE_TEMPORARY) { + dest.dest.reg.reg = c->temp_regs[tgsi_dst->Index].reg; + dest.dest.reg.base_offset = c->temp_regs[tgsi_dst->Index].offset; + } else if (tgsi_dst->File == TGSI_FILE_OUTPUT) { + dest.dest.reg.reg = c->output_regs[tgsi_dst->Index].reg; + dest.dest.reg.base_offset = c->output_regs[tgsi_dst->Index].offset; + } else if (tgsi_dst->File == TGSI_FILE_ADDRESS) { + assert(tgsi_dst->Index == 0); + dest.dest.reg.reg = c->addr_reg; + } + + dest.write_mask = tgsi_dst->WriteMask; + dest.saturate = false; + + if (tgsi_dst->Indirect) + dest.dest.reg.indirect = ttn_src_for_indirect(c, &tgsi_fdst->Indirect); + + return dest; +} + +static nir_ssa_def * +ttn_get_src(struct ttn_compile *c, struct tgsi_full_src_register *tgsi_fsrc) +{ + nir_builder *b = &c->build; + struct tgsi_src_register *tgsi_src = &tgsi_fsrc->Register; + unsigned tgsi_opcode = c->token->FullInstruction.Instruction.Opcode; + unsigned tgsi_src_type = tgsi_opcode_infer_src_type(tgsi_opcode); + bool src_is_float = !(tgsi_src_type == TGSI_TYPE_SIGNED || + tgsi_src_type == TGSI_TYPE_UNSIGNED); + nir_alu_src src; + + memset(&src, 0, sizeof(src)); + + if (tgsi_src->File == TGSI_FILE_NULL) { + return nir_imm_float(b, 0.0); + } else if (tgsi_src->File == TGSI_FILE_SAMPLER) { + /* Only the index of the sampler gets used in texturing, and it will + * handle looking that up on its own instead of using the nir_alu_src. + */ + assert(!tgsi_src->Indirect); + return NULL; + } else { + src.src = ttn_src_for_file_and_index(c, + tgsi_src->File, + tgsi_src->Index, + (tgsi_src->Indirect ? + &tgsi_fsrc->Indirect : NULL)); + } + + src.swizzle[0] = tgsi_src->SwizzleX; + src.swizzle[1] = tgsi_src->SwizzleY; + src.swizzle[2] = tgsi_src->SwizzleZ; + src.swizzle[3] = tgsi_src->SwizzleW; + + nir_ssa_def *def = nir_fmov_alu(b, src, 4); + + if (tgsi_src->Absolute) { + if (src_is_float) + def = nir_fabs(b, def); + else + def = nir_iabs(b, def); + } + + if (tgsi_src->Negate) { + if (src_is_float) + def = nir_fneg(b, def); + else + def = nir_ineg(b, def); + } + + return def; +} + +static void +ttn_alu(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src) +{ + unsigned num_srcs = nir_op_infos[op].num_inputs; + nir_alu_instr *instr = nir_alu_instr_create(b->shader, op); + unsigned i; + + for (i = 0; i < num_srcs; i++) + instr->src[i].src = nir_src_for_ssa(src[i]); + + instr->dest = dest; + nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr); +} + +static void +ttn_move_dest_masked(nir_builder *b, nir_alu_dest dest, + nir_ssa_def *def, unsigned write_mask) +{ + if (!(dest.write_mask & write_mask)) + return; + + nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_imov); + mov->dest = dest; + mov->dest.write_mask &= write_mask; + mov->src[0].src = nir_src_for_ssa(def); + for (unsigned i = def->num_components; i < 4; i++) + mov->src[0].swizzle[i] = def->num_components - 1; + nir_instr_insert_after_cf_list(b->cf_node_list, &mov->instr); +} + +static void +ttn_move_dest(nir_builder *b, nir_alu_dest dest, nir_ssa_def *def) +{ + ttn_move_dest_masked(b, dest, def, TGSI_WRITEMASK_XYZW); +} + +static void +ttn_arl(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src) +{ + ttn_move_dest(b, dest, nir_f2i(b, nir_ffloor(b, src[0]))); +} + +/* EXP - Approximate Exponential Base 2 + * dst.x = 2^{\lfloor src.x\rfloor} + * dst.y = src.x - \lfloor src.x\rfloor + * dst.z = 2^{src.x} + * dst.w = 1.0 + */ +static void +ttn_exp(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src) +{ + nir_ssa_def *srcx = ttn_channel(b, src[0], X); + + ttn_move_dest_masked(b, dest, nir_fexp2(b, nir_ffloor(b, srcx)), + TGSI_WRITEMASK_X); + ttn_move_dest_masked(b, dest, nir_fsub(b, srcx, nir_ffloor(b, srcx)), + TGSI_WRITEMASK_Y); + ttn_move_dest_masked(b, dest, nir_fexp2(b, srcx), TGSI_WRITEMASK_Z); + ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_W); +} + +/* LOG - Approximate Logarithm Base 2 + * dst.x = \lfloor\log_2{|src.x|}\rfloor + * dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}} + * dst.z = \log_2{|src.x|} + * dst.w = 1.0 + */ +static void +ttn_log(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src) +{ + nir_ssa_def *abs_srcx = nir_fabs(b, ttn_channel(b, src[0], X)); + nir_ssa_def *log2 = nir_flog2(b, abs_srcx); + + ttn_move_dest_masked(b, dest, nir_ffloor(b, log2), TGSI_WRITEMASK_X); + ttn_move_dest_masked(b, dest, + nir_fdiv(b, abs_srcx, nir_fexp2(b, nir_ffloor(b, log2))), + TGSI_WRITEMASK_Y); + ttn_move_dest_masked(b, dest, nir_flog2(b, abs_srcx), TGSI_WRITEMASK_Z); + ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_W); +} + +/* DST - Distance Vector + * dst.x = 1.0 + * dst.y = src0.y \times src1.y + * dst.z = src0.z + * dst.w = src1.w + */ +static void +ttn_dst(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src) +{ + ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_X); + ttn_move_dest_masked(b, dest, nir_fmul(b, src[0], src[1]), TGSI_WRITEMASK_Y); + ttn_move_dest_masked(b, dest, nir_fmov(b, src[0]), TGSI_WRITEMASK_Z); + ttn_move_dest_masked(b, dest, nir_fmov(b, src[1]), TGSI_WRITEMASK_W); +} + +/* LIT - Light Coefficients + * dst.x = 1.0 + * dst.y = max(src.x, 0.0) + * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0 + * dst.w = 1.0 + */ +static void +ttn_lit(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src) +{ + ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_XW); + + ttn_move_dest_masked(b, dest, nir_fmax(b, ttn_channel(b, src[0], X), + nir_imm_float(b, 0.0)), TGSI_WRITEMASK_Y); + + if (dest.write_mask & TGSI_WRITEMASK_Z) { + nir_ssa_def *src0_y = ttn_channel(b, src[0], Y); + nir_ssa_def *wclamp = nir_fmax(b, nir_fmin(b, ttn_channel(b, src[0], W), + nir_imm_float(b, 128.0)), + nir_imm_float(b, -128.0)); + nir_ssa_def *pow = nir_fpow(b, nir_fmax(b, src0_y, nir_imm_float(b, 0.0)), + wclamp); + + ttn_move_dest_masked(b, dest, + nir_bcsel(b, + nir_fge(b, + nir_imm_float(b, 0.0), + ttn_channel(b, src[0], X)), + nir_imm_float(b, 0.0), + pow), + TGSI_WRITEMASK_Z); + } +} + +/* SCS - Sine Cosine + * dst.x = \cos{src.x} + * dst.y = \sin{src.x} + * dst.z = 0.0 + * dst.w = 1.0 + */ +static void +ttn_scs(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src) +{ + ttn_move_dest_masked(b, dest, nir_fcos(b, ttn_channel(b, src[0], X)), + TGSI_WRITEMASK_X); + ttn_move_dest_masked(b, dest, nir_fsin(b, ttn_channel(b, src[0], X)), + TGSI_WRITEMASK_Y); + ttn_move_dest_masked(b, dest, nir_imm_float(b, 0.0), TGSI_WRITEMASK_Z); + ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_W); +} + +static void +ttn_sle(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src) +{ + ttn_move_dest(b, dest, nir_sge(b, src[1], src[0])); +} + +static void +ttn_sgt(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src) +{ + ttn_move_dest(b, dest, nir_slt(b, src[1], src[0])); +} + +static void +ttn_clamp(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src) +{ + ttn_move_dest(b, dest, nir_fmin(b, nir_fmax(b, src[0], src[1]), src[2])); +} + +static void +ttn_xpd(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src) +{ + ttn_move_dest_masked(b, dest, + nir_fsub(b, + nir_fmul(b, + ttn_swizzle(b, src[0], Y, Z, X, X), + ttn_swizzle(b, src[1], Z, X, Y, X)), + nir_fmul(b, + ttn_swizzle(b, src[1], Y, Z, X, X), + ttn_swizzle(b, src[0], Z, X, Y, X))), + TGSI_WRITEMASK_XYZ); + ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_W); +} + +static void +ttn_dp2a(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src) +{ + ttn_move_dest(b, dest, + ttn_channel(b, nir_fadd(b, nir_fdot2(b, src[0], src[1]), + src[2]), + X)); +} + +static void +ttn_dp2(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src) +{ + ttn_move_dest(b, dest, nir_fdot2(b, src[0], src[1])); +} + +static void +ttn_dp3(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src) +{ + ttn_move_dest(b, dest, nir_fdot3(b, src[0], src[1])); +} + +static void +ttn_dp4(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src) +{ + ttn_move_dest(b, dest, nir_fdot4(b, src[0], src[1])); +} + +static void +ttn_dph(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src) +{ + ttn_move_dest(b, dest, nir_fadd(b, nir_fdot3(b, src[0], src[1]), + ttn_channel(b, src[1], W))); +} + +static void +ttn_arr(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src) +{ + ttn_move_dest(b, dest, nir_ffloor(b, nir_fadd(b, src[0], nir_imm_float(b, 0.5)))); +} + +static void +ttn_cmp(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src) +{ + ttn_move_dest(b, dest, nir_bcsel(b, + nir_flt(b, src[0], nir_imm_float(b, 0.0)), + src[1], src[2])); +} + +static void +ttn_ucmp(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src) +{ + ttn_move_dest(b, dest, nir_bcsel(b, + nir_ine(b, src[0], nir_imm_int(b, 0)), + src[1], src[2])); +} + +static void +ttn_kill(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src) +{ + nir_intrinsic_instr *discard = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard); + nir_instr_insert_after_cf_list(b->cf_node_list, &discard->instr); +} + +static void +ttn_kill_if(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src) +{ + nir_ssa_def *cmp = nir_bany4(b, nir_flt(b, src[0], nir_imm_float(b, 0.0))); + nir_intrinsic_instr *discard = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard_if); + discard->src[0] = nir_src_for_ssa(cmp); + nir_instr_insert_after_cf_list(b->cf_node_list, &discard->instr); +} + +static void +ttn_if(struct ttn_compile *c, nir_ssa_def *src, bool is_uint) +{ + nir_builder *b = &c->build; + + /* Save the outside-of-the-if-statement node list. */ + c->if_stack[c->if_stack_pos] = b->cf_node_list; + c->if_stack_pos++; + + src = ttn_channel(b, src, X); + + nir_if *if_stmt = nir_if_create(b->shader); + if (is_uint) { + if_stmt->condition = nir_src_for_ssa(nir_ine(b, src, nir_imm_int(b, 0))); + } else { + if_stmt->condition = nir_src_for_ssa(nir_fne(b, src, nir_imm_int(b, 0))); + } + nir_cf_node_insert_end(b->cf_node_list, &if_stmt->cf_node); + + nir_builder_insert_after_cf_list(b, &if_stmt->then_list); + + c->if_stack[c->if_stack_pos] = &if_stmt->else_list; + c->if_stack_pos++; +} + +static void +ttn_else(struct ttn_compile *c) +{ + nir_builder *b = &c->build; + + nir_builder_insert_after_cf_list(b, c->if_stack[c->if_stack_pos - 1]); +} + +static void +ttn_endif(struct ttn_compile *c) +{ + nir_builder *b = &c->build; + + c->if_stack_pos -= 2; + nir_builder_insert_after_cf_list(b, c->if_stack[c->if_stack_pos]); +} + +static void +ttn_bgnloop(struct ttn_compile *c) +{ + nir_builder *b = &c->build; + + /* Save the outside-of-the-loop node list. */ + c->loop_stack[c->loop_stack_pos] = b->cf_node_list; + c->loop_stack_pos++; + + nir_loop *loop = nir_loop_create(b->shader); + nir_cf_node_insert_end(b->cf_node_list, &loop->cf_node); + + nir_builder_insert_after_cf_list(b, &loop->body); +} + +static void +ttn_cont(nir_builder *b) +{ + nir_jump_instr *instr = nir_jump_instr_create(b->shader, nir_jump_continue); + nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr); +} + +static void +ttn_brk(nir_builder *b) +{ + nir_jump_instr *instr = nir_jump_instr_create(b->shader, nir_jump_break); + nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr); +} + +static void +ttn_endloop(struct ttn_compile *c) +{ + nir_builder *b = &c->build; + + c->loop_stack_pos--; + nir_builder_insert_after_cf_list(b, c->loop_stack[c->loop_stack_pos]); +} + +static void +ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src) +{ + nir_builder *b = &c->build; + struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction; + nir_tex_instr *instr; + nir_texop op; + unsigned num_srcs; + + switch (tgsi_inst->Instruction.Opcode) { + case TGSI_OPCODE_TEX: + op = nir_texop_tex; + num_srcs = 1; + break; + case TGSI_OPCODE_TXP: + op = nir_texop_tex; + num_srcs = 2; + break; + case TGSI_OPCODE_TXB: + op = nir_texop_txb; + num_srcs = 2; + break; + case TGSI_OPCODE_TXL: + op = nir_texop_txl; + num_srcs = 2; + break; + case TGSI_OPCODE_TXF: + op = nir_texop_txf; + num_srcs = 1; + break; + case TGSI_OPCODE_TXD: + op = nir_texop_txd; + num_srcs = 3; + break; + default: + fprintf(stderr, "unknown TGSI tex op %d\n", tgsi_inst->Instruction.Opcode); + abort(); + } + + if (tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || + tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY || + tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || + tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY || + tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || + tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || + tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { + num_srcs++; + } + + instr = nir_tex_instr_create(b->shader, num_srcs); + instr->op = op; + + switch (tgsi_inst->Texture.Texture) { + case TGSI_TEXTURE_1D: + instr->sampler_dim = GLSL_SAMPLER_DIM_1D; + break; + case TGSI_TEXTURE_1D_ARRAY: + instr->sampler_dim = GLSL_SAMPLER_DIM_1D; + instr->is_array = true; + break; + case TGSI_TEXTURE_SHADOW1D: + instr->sampler_dim = GLSL_SAMPLER_DIM_1D; + instr->is_shadow = true; + break; + case TGSI_TEXTURE_SHADOW1D_ARRAY: + instr->sampler_dim = GLSL_SAMPLER_DIM_1D; + instr->is_shadow = true; + instr->is_array = true; + break; + case TGSI_TEXTURE_2D: + instr->sampler_dim = GLSL_SAMPLER_DIM_2D; + break; + case TGSI_TEXTURE_2D_ARRAY: + instr->sampler_dim = GLSL_SAMPLER_DIM_2D; + instr->is_array = true; + break; + case TGSI_TEXTURE_2D_MSAA: + instr->sampler_dim = GLSL_SAMPLER_DIM_MS; + break; + case TGSI_TEXTURE_2D_ARRAY_MSAA: + instr->sampler_dim = GLSL_SAMPLER_DIM_MS; + instr->is_array = true; + break; + case TGSI_TEXTURE_SHADOW2D: + instr->sampler_dim = GLSL_SAMPLER_DIM_2D; + instr->is_shadow = true; + break; + case TGSI_TEXTURE_SHADOW2D_ARRAY: + instr->sampler_dim = GLSL_SAMPLER_DIM_2D; + instr->is_shadow = true; + instr->is_array = true; + break; + case TGSI_TEXTURE_3D: + instr->sampler_dim = GLSL_SAMPLER_DIM_3D; + break; + case TGSI_TEXTURE_CUBE: + instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE; + break; + case TGSI_TEXTURE_CUBE_ARRAY: + instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE; + instr->is_array = true; + break; + case TGSI_TEXTURE_SHADOWCUBE: + instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE; + instr->is_shadow = true; + break; + case TGSI_TEXTURE_SHADOWCUBE_ARRAY: + instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE; + instr->is_shadow = true; + instr->is_array = true; + break; + case TGSI_TEXTURE_RECT: + instr->sampler_dim = GLSL_SAMPLER_DIM_RECT; + break; + case TGSI_TEXTURE_SHADOWRECT: + instr->sampler_dim = GLSL_SAMPLER_DIM_RECT; + instr->is_shadow = true; + break; + default: + fprintf(stderr, "Unknown TGSI texture target %d\n", + tgsi_inst->Texture.Texture); + abort(); + } + + switch (instr->sampler_dim) { + case GLSL_SAMPLER_DIM_1D: + case GLSL_SAMPLER_DIM_BUF: + instr->coord_components = 1; + break; + case GLSL_SAMPLER_DIM_2D: + case GLSL_SAMPLER_DIM_RECT: + case GLSL_SAMPLER_DIM_EXTERNAL: + case GLSL_SAMPLER_DIM_MS: + instr->coord_components = 2; + break; + case GLSL_SAMPLER_DIM_3D: + case GLSL_SAMPLER_DIM_CUBE: + instr->coord_components = 3; + break; + } + + if (instr->is_array) + instr->coord_components++; + + assert(tgsi_inst->Src[1].Register.File == TGSI_FILE_SAMPLER); + instr->sampler_index = tgsi_inst->Src[1].Register.Index; + + unsigned src_number = 0; + + if (tgsi_inst->Instruction.Opcode != TGSI_OPCODE_TXQ) { + instr->src[src_number].src = + nir_src_for_ssa(nir_swizzle(b, src[0], SWIZ(X, Y, Z, W), + instr->coord_components, false)); + instr->src[src_number].src_type = nir_tex_src_coord; + src_number++; + } + + if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXP) { + instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W)); + instr->src[src_number].src_type = nir_tex_src_projector; + src_number++; + } + + if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXB) { + instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W)); + instr->src[src_number].src_type = nir_tex_src_bias; + src_number++; + } + + if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXL) { + instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W)); + instr->src[src_number].src_type = nir_tex_src_lod; + src_number++; + } + + if (instr->is_shadow) { + if (instr->coord_components < 3) + instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], Z)); + else + instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W)); + + instr->src[src_number].src_type = nir_tex_src_comparitor; + src_number++; + } + + assert(src_number == num_srcs); + + nir_ssa_dest_init(&instr->instr, &instr->dest, 4, NULL); + nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr); + + /* Resolve the writemask on the texture op. */ + ttn_move_dest(b, dest, &instr->dest.ssa); +} + +static const nir_op op_trans[TGSI_OPCODE_LAST] = { + [TGSI_OPCODE_ARL] = 0, + [TGSI_OPCODE_MOV] = nir_op_fmov, + [TGSI_OPCODE_LIT] = 0, + [TGSI_OPCODE_RCP] = nir_op_frcp, + [TGSI_OPCODE_RSQ] = nir_op_frsq, + [TGSI_OPCODE_EXP] = 0, + [TGSI_OPCODE_LOG] = 0, + [TGSI_OPCODE_MUL] = nir_op_fmul, + [TGSI_OPCODE_ADD] = nir_op_fadd, + [TGSI_OPCODE_DP3] = 0, + [TGSI_OPCODE_DP4] = 0, + [TGSI_OPCODE_DST] = 0, + [TGSI_OPCODE_MIN] = nir_op_fmin, + [TGSI_OPCODE_MAX] = nir_op_fmax, + [TGSI_OPCODE_SLT] = nir_op_slt, + [TGSI_OPCODE_SGE] = nir_op_sge, + [TGSI_OPCODE_MAD] = nir_op_ffma, + [TGSI_OPCODE_SUB] = nir_op_fsub, + [TGSI_OPCODE_LRP] = 0, + [TGSI_OPCODE_SQRT] = nir_op_fsqrt, + [TGSI_OPCODE_DP2A] = 0, + [TGSI_OPCODE_FRC] = nir_op_ffract, + [TGSI_OPCODE_CLAMP] = 0, + [TGSI_OPCODE_FLR] = nir_op_ffloor, + [TGSI_OPCODE_ROUND] = nir_op_fround_even, + [TGSI_OPCODE_EX2] = nir_op_fexp2, + [TGSI_OPCODE_LG2] = nir_op_flog2, + [TGSI_OPCODE_POW] = nir_op_fpow, + [TGSI_OPCODE_XPD] = 0, + [TGSI_OPCODE_ABS] = nir_op_fabs, + [TGSI_OPCODE_DPH] = 0, + [TGSI_OPCODE_COS] = nir_op_fcos, + [TGSI_OPCODE_DDX] = nir_op_fddx, + [TGSI_OPCODE_DDY] = nir_op_fddy, + [TGSI_OPCODE_KILL] = 0, + [TGSI_OPCODE_PK2H] = 0, /* XXX */ + [TGSI_OPCODE_PK2US] = 0, /* XXX */ + [TGSI_OPCODE_PK4B] = 0, /* XXX */ + [TGSI_OPCODE_PK4UB] = 0, /* XXX */ + [TGSI_OPCODE_SEQ] = nir_op_seq, + [TGSI_OPCODE_SGT] = 0, + [TGSI_OPCODE_SIN] = nir_op_fsin, + [TGSI_OPCODE_SLE] = 0, + [TGSI_OPCODE_TEX] = 0, + [TGSI_OPCODE_TXD] = 0, + [TGSI_OPCODE_TXP] = 0, + [TGSI_OPCODE_UP2H] = 0, /* XXX */ + [TGSI_OPCODE_UP2US] = 0, /* XXX */ + [TGSI_OPCODE_UP4B] = 0, /* XXX */ + [TGSI_OPCODE_UP4UB] = 0, /* XXX */ + [TGSI_OPCODE_ARR] = 0, + + /* No function calls, yet. */ + [TGSI_OPCODE_CAL] = 0, /* XXX */ + [TGSI_OPCODE_RET] = 0, /* XXX */ + + [TGSI_OPCODE_SSG] = nir_op_fsign, + [TGSI_OPCODE_CMP] = 0, + [TGSI_OPCODE_SCS] = 0, + [TGSI_OPCODE_TXB] = 0, + [TGSI_OPCODE_DIV] = nir_op_fdiv, + [TGSI_OPCODE_DP2] = 0, + [TGSI_OPCODE_DP2A] = 0, + [TGSI_OPCODE_TXL] = 0, + + [TGSI_OPCODE_BRK] = 0, + [TGSI_OPCODE_IF] = 0, + [TGSI_OPCODE_UIF] = 0, + [TGSI_OPCODE_ELSE] = 0, + [TGSI_OPCODE_ENDIF] = 0, + + [TGSI_OPCODE_DDX_FINE] = nir_op_fddx_fine, + [TGSI_OPCODE_DDY_FINE] = nir_op_fddy_fine, + + [TGSI_OPCODE_PUSHA] = 0, /* XXX */ + [TGSI_OPCODE_POPA] = 0, /* XXX */ + + [TGSI_OPCODE_CEIL] = nir_op_fceil, + [TGSI_OPCODE_I2F] = nir_op_i2f, + [TGSI_OPCODE_NOT] = nir_op_inot, + [TGSI_OPCODE_TRUNC] = nir_op_ftrunc, + [TGSI_OPCODE_SHL] = nir_op_ishl, + [TGSI_OPCODE_AND] = nir_op_iand, + [TGSI_OPCODE_OR] = nir_op_ior, + [TGSI_OPCODE_MOD] = nir_op_fmod, + [TGSI_OPCODE_XOR] = nir_op_ixor, + [TGSI_OPCODE_SAD] = 0, /* XXX */ + [TGSI_OPCODE_TXF] = 0, + [TGSI_OPCODE_TXQ] = 0, + + [TGSI_OPCODE_CONT] = 0, + + [TGSI_OPCODE_EMIT] = 0, /* XXX */ + [TGSI_OPCODE_ENDPRIM] = 0, /* XXX */ + + [TGSI_OPCODE_BGNLOOP] = 0, + [TGSI_OPCODE_BGNSUB] = 0, /* XXX: no function calls */ + [TGSI_OPCODE_ENDLOOP] = 0, + [TGSI_OPCODE_ENDSUB] = 0, /* XXX: no function calls */ + + [TGSI_OPCODE_TXQ_LZ] = 0, + [TGSI_OPCODE_NOP] = 0, + [TGSI_OPCODE_FSEQ] = nir_op_feq, + [TGSI_OPCODE_FSGE] = nir_op_fge, + [TGSI_OPCODE_FSLT] = nir_op_flt, + [TGSI_OPCODE_FSNE] = nir_op_fne, + + /* No control flow yet */ + [TGSI_OPCODE_CALLNZ] = 0, /* XXX */ + [TGSI_OPCODE_BREAKC] = 0, /* not emitted by glsl_to_tgsi.cpp */ + + [TGSI_OPCODE_KILL_IF] = 0, + + [TGSI_OPCODE_END] = 0, + + [TGSI_OPCODE_F2I] = nir_op_f2i, + [TGSI_OPCODE_IDIV] = nir_op_idiv, + [TGSI_OPCODE_IMAX] = nir_op_imax, + [TGSI_OPCODE_IMIN] = nir_op_imin, + [TGSI_OPCODE_INEG] = nir_op_ineg, + [TGSI_OPCODE_ISGE] = nir_op_ige, + [TGSI_OPCODE_ISHR] = nir_op_ishr, + [TGSI_OPCODE_ISLT] = nir_op_ilt, + [TGSI_OPCODE_F2U] = nir_op_f2u, + [TGSI_OPCODE_U2F] = nir_op_u2f, + [TGSI_OPCODE_UADD] = nir_op_iadd, + [TGSI_OPCODE_UDIV] = nir_op_udiv, + [TGSI_OPCODE_UMAD] = 0, /* XXX */ + [TGSI_OPCODE_UMAX] = nir_op_umax, + [TGSI_OPCODE_UMIN] = nir_op_umin, + [TGSI_OPCODE_UMOD] = nir_op_umod, + [TGSI_OPCODE_UMUL] = nir_op_imul, + [TGSI_OPCODE_USEQ] = nir_op_ieq, + [TGSI_OPCODE_USGE] = nir_op_uge, + [TGSI_OPCODE_USHR] = nir_op_ushr, + [TGSI_OPCODE_USLT] = nir_op_ult, + [TGSI_OPCODE_USNE] = nir_op_ine, + + [TGSI_OPCODE_SWITCH] = 0, /* not emitted by glsl_to_tgsi.cpp */ + [TGSI_OPCODE_CASE] = 0, /* not emitted by glsl_to_tgsi.cpp */ + [TGSI_OPCODE_DEFAULT] = 0, /* not emitted by glsl_to_tgsi.cpp */ + [TGSI_OPCODE_ENDSWITCH] = 0, /* not emitted by glsl_to_tgsi.cpp */ + + /* XXX: SAMPLE opcodes */ + + [TGSI_OPCODE_UARL] = nir_op_imov, + [TGSI_OPCODE_UCMP] = 0, + [TGSI_OPCODE_IABS] = nir_op_iabs, + [TGSI_OPCODE_ISSG] = nir_op_isign, + + /* XXX: atomics */ + + [TGSI_OPCODE_TEX2] = 0, + [TGSI_OPCODE_TXB2] = 0, + [TGSI_OPCODE_TXL2] = 0, + + [TGSI_OPCODE_IMUL_HI] = nir_op_imul_high, + [TGSI_OPCODE_UMUL_HI] = nir_op_umul_high, + + [TGSI_OPCODE_TG4] = 0, + [TGSI_OPCODE_LODQ] = 0, /* XXX */ + + [TGSI_OPCODE_IBFE] = nir_op_ibitfield_extract, + [TGSI_OPCODE_UBFE] = nir_op_ubitfield_extract, + [TGSI_OPCODE_BFI] = nir_op_bitfield_insert, + [TGSI_OPCODE_BREV] = nir_op_bitfield_reverse, + [TGSI_OPCODE_POPC] = nir_op_bit_count, + [TGSI_OPCODE_LSB] = nir_op_find_lsb, + [TGSI_OPCODE_IMSB] = nir_op_ifind_msb, + [TGSI_OPCODE_UMSB] = nir_op_ifind_msb, /* XXX: signed vs unsigned */ + + [TGSI_OPCODE_INTERP_CENTROID] = 0, /* XXX */ + [TGSI_OPCODE_INTERP_SAMPLE] = 0, /* XXX */ + [TGSI_OPCODE_INTERP_OFFSET] = 0, /* XXX */ +}; + +static void +ttn_emit_instruction(struct ttn_compile *c) +{ + nir_builder *b = &c->build; + struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction; + unsigned i; + unsigned tgsi_op = tgsi_inst->Instruction.Opcode; + + if (tgsi_op == TGSI_OPCODE_END) + return; + + nir_ssa_def *src[TGSI_FULL_MAX_SRC_REGISTERS]; + for (i = 0; i < TGSI_FULL_MAX_SRC_REGISTERS; i++) { + src[i] = ttn_get_src(c, &tgsi_inst->Src[i]); + } + nir_alu_dest dest = ttn_get_dest(c, &tgsi_inst->Dst[0]); + + switch (tgsi_op) { + case TGSI_OPCODE_RSQ: + ttn_move_dest(b, dest, nir_frsq(b, ttn_channel(b, src[0], X))); + break; + + case TGSI_OPCODE_SQRT: + ttn_move_dest(b, dest, nir_fsqrt(b, ttn_channel(b, src[0], X))); + break; + + case TGSI_OPCODE_RCP: + ttn_move_dest(b, dest, nir_frcp(b, ttn_channel(b, src[0], X))); + break; + + case TGSI_OPCODE_EX2: + ttn_move_dest(b, dest, nir_fexp2(b, ttn_channel(b, src[0], X))); + break; + + case TGSI_OPCODE_LG2: + ttn_move_dest(b, dest, nir_flog2(b, ttn_channel(b, src[0], X))); + break; + + case TGSI_OPCODE_POW: + ttn_move_dest(b, dest, nir_fpow(b, + ttn_channel(b, src[0], X), + ttn_channel(b, src[1], X))); + break; + + case TGSI_OPCODE_COS: + ttn_move_dest(b, dest, nir_fcos(b, ttn_channel(b, src[0], X))); + break; + + case TGSI_OPCODE_SIN: + ttn_move_dest(b, dest, nir_fsin(b, ttn_channel(b, src[0], X))); + break; + + case TGSI_OPCODE_ARL: + ttn_arl(b, op_trans[tgsi_op], dest, src); + break; + + case TGSI_OPCODE_EXP: + ttn_exp(b, op_trans[tgsi_op], dest, src); + break; + + case TGSI_OPCODE_LOG: + ttn_log(b, op_trans[tgsi_op], dest, src); + break; + + case TGSI_OPCODE_DST: + ttn_dst(b, op_trans[tgsi_op], dest, src); + break; + + case TGSI_OPCODE_LIT: + ttn_lit(b, op_trans[tgsi_op], dest, src); + break; + + case TGSI_OPCODE_CLAMP: + ttn_clamp(b, op_trans[tgsi_op], dest, src); + break; + + case TGSI_OPCODE_XPD: + ttn_xpd(b, op_trans[tgsi_op], dest, src); + break; + + case TGSI_OPCODE_DP2: + ttn_dp2(b, op_trans[tgsi_op], dest, src); + break; + + case TGSI_OPCODE_DP3: + ttn_dp3(b, op_trans[tgsi_op], dest, src); + break; + + case TGSI_OPCODE_DP4: + ttn_dp4(b, op_trans[tgsi_op], dest, src); + break; + + case TGSI_OPCODE_DP2A: + ttn_dp2a(b, op_trans[tgsi_op], dest, src); + break; + + case TGSI_OPCODE_DPH: + ttn_dph(b, op_trans[tgsi_op], dest, src); + break; + + case TGSI_OPCODE_LRP: + ttn_move_dest(b, dest, nir_flrp(b, src[2], src[1], src[0])); + break; + + case TGSI_OPCODE_KILL: + ttn_kill(b, op_trans[tgsi_op], dest, src); + break; + + case TGSI_OPCODE_ARR: + ttn_arr(b, op_trans[tgsi_op], dest, src); + break; + + case TGSI_OPCODE_CMP: + ttn_cmp(b, op_trans[tgsi_op], dest, src); + break; + + case TGSI_OPCODE_UCMP: + ttn_ucmp(b, op_trans[tgsi_op], dest, src); + break; + + case TGSI_OPCODE_SCS: + ttn_scs(b, op_trans[tgsi_op], dest, src); + break; + + case TGSI_OPCODE_SGT: + ttn_sgt(b, op_trans[tgsi_op], dest, src); + break; + + case TGSI_OPCODE_SLE: + ttn_sle(b, op_trans[tgsi_op], dest, src); + break; + + case TGSI_OPCODE_KILL_IF: + ttn_kill_if(b, op_trans[tgsi_op], dest, src); + break; + + case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXP: + case TGSI_OPCODE_TXL: + case TGSI_OPCODE_TXB: + case TGSI_OPCODE_TXD: + case TGSI_OPCODE_TXQ: + case TGSI_OPCODE_TXL2: + case TGSI_OPCODE_TXB2: + case TGSI_OPCODE_TXQ_LZ: + case TGSI_OPCODE_TXF: + case TGSI_OPCODE_TG4: + ttn_tex(c, dest, src); + break; + + case TGSI_OPCODE_NOP: + break; + + case TGSI_OPCODE_IF: + ttn_if(c, src[0], false); + break; + + case TGSI_OPCODE_UIF: + ttn_if(c, src[0], true); + break; + + case TGSI_OPCODE_ELSE: + ttn_else(c); + break; + + case TGSI_OPCODE_ENDIF: + ttn_endif(c); + break; + + case TGSI_OPCODE_BGNLOOP: + ttn_bgnloop(c); + break; + + case TGSI_OPCODE_BRK: + ttn_brk(b); + break; + + case TGSI_OPCODE_CONT: + ttn_cont(b); + break; + + case TGSI_OPCODE_ENDLOOP: + ttn_endloop(c); + break; + + default: + if (op_trans[tgsi_op] != 0 || tgsi_op == TGSI_OPCODE_MOV) { + ttn_alu(b, op_trans[tgsi_op], dest, src); + } else { + fprintf(stderr, "unknown TGSI opcode: %s\n", + tgsi_get_opcode_name(tgsi_op)); + abort(); + } + break; + } + + if (tgsi_inst->Instruction.Saturate) { + assert(tgsi_inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE); + assert(!dest.dest.is_ssa); + ttn_move_dest(b, dest, nir_fsat(b, ttn_src_for_dest(b, &dest))); + } +} + +/** + * Puts a NIR intrinsic to store of each TGSI_FILE_OUTPUT value to the output + * variables at the end of the shader. + * + * We don't generate these incrementally as the TGSI_FILE_OUTPUT values are + * written, because there's no output load intrinsic, which means we couldn't + * handle writemasks. + */ +static void +ttn_add_output_stores(struct ttn_compile *c) +{ + nir_builder *b = &c->build; + + foreach_list_typed(nir_variable, var, node, &b->shader->outputs) { + unsigned array_len = MAX2(glsl_get_length(var->type), 1); + unsigned i; + + for (i = 0; i < array_len; i++) { + nir_intrinsic_instr *store = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output); + store->num_components = 4; + store->const_index[0] = var->data.driver_location + i; + store->const_index[1] = 1; + store->src[0].reg.reg = c->output_regs[var->data.driver_location].reg; + nir_instr_insert_after_cf_list(b->cf_node_list, &store->instr); + } + } +} + +struct nir_shader * +tgsi_to_nir(const void *tgsi_tokens, + const nir_shader_compiler_options *options) +{ + struct tgsi_parse_context parser; + struct tgsi_shader_info scan; + struct ttn_compile *c; + struct nir_shader *s; + int ret; + + c = rzalloc(NULL, struct ttn_compile); + s = nir_shader_create(NULL, options); + + nir_function *func = nir_function_create(s, "main"); + nir_function_overload *overload = nir_function_overload_create(func); + nir_function_impl *impl = nir_function_impl_create(overload); + + nir_builder_init(&c->build, impl); + nir_builder_insert_after_cf_list(&c->build, &impl->body); + + tgsi_scan_shader(tgsi_tokens, &scan); + c->scan = &scan; + + s->num_inputs = scan.file_max[TGSI_FILE_INPUT] + 1; + s->num_uniforms = scan.file_max[TGSI_FILE_CONSTANT] + 1; + s->num_outputs = scan.file_max[TGSI_FILE_OUTPUT] + 1; + + c->output_regs = rzalloc_array(c, struct ttn_reg_info, + scan.file_max[TGSI_FILE_OUTPUT] + 1); + c->temp_regs = rzalloc_array(c, struct ttn_reg_info, + scan.file_max[TGSI_FILE_TEMPORARY] + 1); + c->imm_defs = rzalloc_array(c, nir_ssa_def *, + scan.file_max[TGSI_FILE_IMMEDIATE] + 1); + + c->if_stack = rzalloc_array(c, struct exec_list *, + (scan.opcode_count[TGSI_OPCODE_IF] + + scan.opcode_count[TGSI_OPCODE_UIF]) * 2); + c->loop_stack = rzalloc_array(c, struct exec_list *, + scan.opcode_count[TGSI_OPCODE_BGNLOOP]); + + ret = tgsi_parse_init(&parser, tgsi_tokens); + assert(ret == TGSI_PARSE_OK); + + while (!tgsi_parse_end_of_tokens(&parser)) { + tgsi_parse_token(&parser); + c->token = &parser.FullToken; + + switch (parser.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + ttn_emit_declaration(c); + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + ttn_emit_instruction(c); + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + ttn_emit_immediate(c); + break; + } + } + + tgsi_parse_free(&parser); + + ttn_add_output_stores(c); + + ralloc_free(c); + return s; +} diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.h b/src/gallium/auxiliary/nir/tgsi_to_nir.h new file mode 100644 index 0000000..687348a --- /dev/null +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.h @@ -0,0 +1,30 @@ +/* + * Copyright ? 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "glsl/nir/nir.h" + +struct nir_shader_compiler_options *options; + +struct nir_shader * +tgsi_to_nir(const void *tgsi_tokens, + const struct nir_shader_compiler_options *options); From anholt at kemper.freedesktop.org Wed Apr 1 18:02:24 2015 From: anholt at kemper.freedesktop.org (Eric Anholt) Date: Wed, 1 Apr 2015 11:02:24 -0700 (PDT) Subject: Mesa (master): mesa: Make a shared header for 3D pipeline enum / #defines. Message-ID: <20150401180224.B8FA476332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: a3a07d46d1a8e89136669dd4bb242c7bd5d10015 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a3a07d46d1a8e89136669dd4bb242c7bd5d10015 Author: Eric Anholt Date: Wed Mar 25 12:58:51 2015 -0700 mesa: Make a shared header for 3D pipeline enum / #defines. NIR uses these enums/#defines in nir_variables and associated intrinsics, but I want to be able to use them from TGSI->NIR and NIR->TGSI. Otherwise, we had to pull in all of mtypes.h. This doesn't cover all of the enums we might want from a shared compiler core (like varying slots or vert attribs), but it at least covers what I need at the moment (system values and interp qualifiers). v2: Move to src/glsl since util/ is really vague. Include in Makefile.am list. Use plain bitshifts and stdint types instead of undefined BITFIELD64_BIT. v3: Rename to shader_enums.h. Move it into Makefile.sources. Reviewed-by: Kenneth Graunke (v2, with recommendation to rename) --- src/glsl/Makefile.sources | 3 +- src/glsl/shader_enums.h | 170 +++++++++++++++++++++++++++++++++++++++++++++ src/mesa/main/mtypes.h | 142 +------------------------------------ 3 files changed, 173 insertions(+), 142 deletions(-) diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index c3b63d1..fa5d991 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -185,7 +185,8 @@ LIBGLSL_FILES = \ opt_vectorize.cpp \ program.h \ s_expression.cpp \ - s_expression.h + s_expression.h \ + shader_enums.h # glsl_compiler diff --git a/src/glsl/shader_enums.h b/src/glsl/shader_enums.h new file mode 100644 index 0000000..0e08bd3 --- /dev/null +++ b/src/glsl/shader_enums.h @@ -0,0 +1,170 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. + * Copyright (C) 2009 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef PIPELINE_H +#define PIPELINE_H + +/** + * Bitflags for system values. + */ +#define SYSTEM_BIT_SAMPLE_ID ((uint64_t)1 << SYSTEM_VALUE_SAMPLE_ID) +#define SYSTEM_BIT_SAMPLE_POS ((uint64_t)1 << SYSTEM_VALUE_SAMPLE_POS) +#define SYSTEM_BIT_SAMPLE_MASK_IN ((uint64_t)1 << SYSTEM_VALUE_SAMPLE_MASK_IN) +/** + * If the gl_register_file is PROGRAM_SYSTEM_VALUE, the register index will be + * one of these values. If a NIR variable's mode is nir_var_system_value, it + * will be one of these values. + */ +typedef enum +{ + /** + * \name Vertex shader system values + */ + /*@{*/ + /** + * OpenGL-style vertex ID. + * + * Section 2.11.7 (Shader Execution), subsection Shader Inputs, of the + * OpenGL 3.3 core profile spec says: + * + * "gl_VertexID holds the integer index i implicitly passed by + * DrawArrays or one of the other drawing commands defined in section + * 2.8.3." + * + * Section 2.8.3 (Drawing Commands) of the same spec says: + * + * "The commands....are equivalent to the commands with the same base + * name (without the BaseVertex suffix), except that the ith element + * transferred by the corresponding draw call will be taken from + * element indices[i] + basevertex of each enabled array." + * + * Additionally, the overview in the GL_ARB_shader_draw_parameters spec + * says: + * + * "In unextended GL, vertex shaders have inputs named gl_VertexID and + * gl_InstanceID, which contain, respectively the index of the vertex + * and instance. The value of gl_VertexID is the implicitly passed + * index of the vertex being processed, which includes the value of + * baseVertex, for those commands that accept it." + * + * gl_VertexID gets basevertex added in. This differs from DirectX where + * SV_VertexID does \b not get basevertex added in. + * + * \note + * If all system values are available, \c SYSTEM_VALUE_VERTEX_ID will be + * equal to \c SYSTEM_VALUE_VERTEX_ID_ZERO_BASE plus + * \c SYSTEM_VALUE_BASE_VERTEX. + * + * \sa SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, SYSTEM_VALUE_BASE_VERTEX + */ + SYSTEM_VALUE_VERTEX_ID, + + /** + * Instanced ID as supplied to gl_InstanceID + * + * Values assigned to gl_InstanceID always begin with zero, regardless of + * the value of baseinstance. + * + * Section 11.1.3.9 (Shader Inputs) of the OpenGL 4.4 core profile spec + * says: + * + * "gl_InstanceID holds the integer instance number of the current + * primitive in an instanced draw call (see section 10.5)." + * + * Through a big chain of pseudocode, section 10.5 describes that + * baseinstance is not counted by gl_InstanceID. In that section, notice + * + * "If an enabled vertex attribute array is instanced (it has a + * non-zero divisor as specified by VertexAttribDivisor), the element + * index that is transferred to the GL, for all vertices, is given by + * + * floor(instance/divisor) + baseinstance + * + * If an array corresponding to an attribute required by a vertex + * shader is not enabled, then the corresponding element is taken from + * the current attribute state (see section 10.2)." + * + * Note that baseinstance is \b not included in the value of instance. + */ + SYSTEM_VALUE_INSTANCE_ID, + + /** + * DirectX-style vertex ID. + * + * Unlike \c SYSTEM_VALUE_VERTEX_ID, this system value does \b not include + * the value of basevertex. + * + * \sa SYSTEM_VALUE_VERTEX_ID, SYSTEM_VALUE_BASE_VERTEX + */ + SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, + + /** + * Value of \c basevertex passed to \c glDrawElementsBaseVertex and similar + * functions. + * + * \sa SYSTEM_VALUE_VERTEX_ID, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE + */ + SYSTEM_VALUE_BASE_VERTEX, + /*@}*/ + + /** + * \name Geometry shader system values + */ + /*@{*/ + SYSTEM_VALUE_INVOCATION_ID, + /*@}*/ + + /** + * \name Fragment shader system values + */ + /*@{*/ + SYSTEM_VALUE_FRONT_FACE, /**< (not done yet) */ + SYSTEM_VALUE_SAMPLE_ID, + SYSTEM_VALUE_SAMPLE_POS, + SYSTEM_VALUE_SAMPLE_MASK_IN, + /*@}*/ + + SYSTEM_VALUE_MAX /**< Number of values */ +} gl_system_value; + + +/** + * The possible interpolation qualifiers that can be applied to a fragment + * shader input in GLSL. + * + * Note: INTERP_QUALIFIER_NONE must be 0 so that memsetting the + * gl_fragment_program data structure to 0 causes the default behavior. + */ +enum glsl_interp_qualifier +{ + INTERP_QUALIFIER_NONE = 0, + INTERP_QUALIFIER_SMOOTH, + INTERP_QUALIFIER_FLAT, + INTERP_QUALIFIER_NOPERSPECTIVE, + INTERP_QUALIFIER_COUNT /**< Number of interpolation qualifiers */ +}; + + +#endif /* PIPELINE_H */ diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index f718768..c1e5dd3 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -42,6 +42,7 @@ #include "main/config.h" #include "glapi/glapi.h" #include "math/m_matrix.h" /* GLmatrix */ +#include "glsl/shader_enums.h" #include "util/simple_list.h" /* struct simple_node */ #include "main/formats.h" /* MESA_FORMAT_COUNT */ @@ -280,13 +281,6 @@ typedef enum /*@}*/ /** - * Bitflags for system values. - */ -#define SYSTEM_BIT_SAMPLE_ID BITFIELD64_BIT(SYSTEM_VALUE_SAMPLE_ID) -#define SYSTEM_BIT_SAMPLE_POS BITFIELD64_BIT(SYSTEM_VALUE_SAMPLE_POS) -#define SYSTEM_BIT_SAMPLE_MASK_IN BITFIELD64_BIT(SYSTEM_VALUE_SAMPLE_MASK_IN) - -/** * Determine if the given gl_varying_slot appears in the fragment shader. */ static inline GLboolean @@ -2082,140 +2076,6 @@ typedef enum /** - * If the register file is PROGRAM_SYSTEM_VALUE, the register index will be - * one of these values. - */ -typedef enum -{ - /** - * \name Vertex shader system values - */ - /*@{*/ - /** - * OpenGL-style vertex ID. - * - * Section 2.11.7 (Shader Execution), subsection Shader Inputs, of the - * OpenGL 3.3 core profile spec says: - * - * "gl_VertexID holds the integer index i implicitly passed by - * DrawArrays or one of the other drawing commands defined in section - * 2.8.3." - * - * Section 2.8.3 (Drawing Commands) of the same spec says: - * - * "The commands....are equivalent to the commands with the same base - * name (without the BaseVertex suffix), except that the ith element - * transferred by the corresponding draw call will be taken from - * element indices[i] + basevertex of each enabled array." - * - * Additionally, the overview in the GL_ARB_shader_draw_parameters spec - * says: - * - * "In unextended GL, vertex shaders have inputs named gl_VertexID and - * gl_InstanceID, which contain, respectively the index of the vertex - * and instance. The value of gl_VertexID is the implicitly passed - * index of the vertex being processed, which includes the value of - * baseVertex, for those commands that accept it." - * - * gl_VertexID gets basevertex added in. This differs from DirectX where - * SV_VertexID does \b not get basevertex added in. - * - * \note - * If all system values are available, \c SYSTEM_VALUE_VERTEX_ID will be - * equal to \c SYSTEM_VALUE_VERTEX_ID_ZERO_BASE plus - * \c SYSTEM_VALUE_BASE_VERTEX. - * - * \sa SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, SYSTEM_VALUE_BASE_VERTEX - */ - SYSTEM_VALUE_VERTEX_ID, - - /** - * Instanced ID as supplied to gl_InstanceID - * - * Values assigned to gl_InstanceID always begin with zero, regardless of - * the value of baseinstance. - * - * Section 11.1.3.9 (Shader Inputs) of the OpenGL 4.4 core profile spec - * says: - * - * "gl_InstanceID holds the integer instance number of the current - * primitive in an instanced draw call (see section 10.5)." - * - * Through a big chain of pseudocode, section 10.5 describes that - * baseinstance is not counted by gl_InstanceID. In that section, notice - * - * "If an enabled vertex attribute array is instanced (it has a - * non-zero divisor as specified by VertexAttribDivisor), the element - * index that is transferred to the GL, for all vertices, is given by - * - * floor(instance/divisor) + baseinstance - * - * If an array corresponding to an attribute required by a vertex - * shader is not enabled, then the corresponding element is taken from - * the current attribute state (see section 10.2)." - * - * Note that baseinstance is \b not included in the value of instance. - */ - SYSTEM_VALUE_INSTANCE_ID, - - /** - * DirectX-style vertex ID. - * - * Unlike \c SYSTEM_VALUE_VERTEX_ID, this system value does \b not include - * the value of basevertex. - * - * \sa SYSTEM_VALUE_VERTEX_ID, SYSTEM_VALUE_BASE_VERTEX - */ - SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, - - /** - * Value of \c basevertex passed to \c glDrawElementsBaseVertex and similar - * functions. - * - * \sa SYSTEM_VALUE_VERTEX_ID, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE - */ - SYSTEM_VALUE_BASE_VERTEX, - /*@}*/ - - /** - * \name Geometry shader system values - */ - /*@{*/ - SYSTEM_VALUE_INVOCATION_ID, - /*@}*/ - - /** - * \name Fragment shader system values - */ - /*@{*/ - SYSTEM_VALUE_FRONT_FACE, /**< (not done yet) */ - SYSTEM_VALUE_SAMPLE_ID, - SYSTEM_VALUE_SAMPLE_POS, - SYSTEM_VALUE_SAMPLE_MASK_IN, - /*@}*/ - - SYSTEM_VALUE_MAX /**< Number of values */ -} gl_system_value; - - -/** - * The possible interpolation qualifiers that can be applied to a fragment - * shader input in GLSL. - * - * Note: INTERP_QUALIFIER_NONE must be 0 so that memsetting the - * gl_fragment_program data structure to 0 causes the default behavior. - */ -enum glsl_interp_qualifier -{ - INTERP_QUALIFIER_NONE = 0, - INTERP_QUALIFIER_SMOOTH, - INTERP_QUALIFIER_FLAT, - INTERP_QUALIFIER_NOPERSPECTIVE, - INTERP_QUALIFIER_COUNT /**< Number of interpolation qualifiers */ -}; - - -/** * \brief Layout qualifiers for gl_FragDepth. * * Extension AMD_conservative_depth allows gl_FragDepth to be redeclared with From anholt at kemper.freedesktop.org Wed Apr 1 18:02:24 2015 From: anholt at kemper.freedesktop.org (Eric Anholt) Date: Wed, 1 Apr 2015 11:02:24 -0700 (PDT) Subject: Mesa (master): vc4: Convert to consuming NIR. Message-ID: <20150401180224.D61BF76332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 73e2d4837d7e4611f31532ab0ccc14369341e0cb URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=73e2d4837d7e4611f31532ab0ccc14369341e0cb Author: Eric Anholt Date: Thu Oct 30 12:51:47 2014 -0700 vc4: Convert to consuming NIR. NIR brings us better optimization than I would have bothered to write within the driver, developers sharing future optimization work, and the ability to share device-specific lowering code that we and other GLES2-level drivers need. total uniforms in shared programs: 13421 -> 13422 (0.01%) uniforms in affected programs: 62 -> 63 (1.61%) total instructions in shared programs: 39961 -> 39707 (-0.64%) instructions in affected programs: 15494 -> 15240 (-1.64%) v2: Add missing imov support, and assert that there are no dest saturates. v3: Rebase on the target-specific algebraic series. v4: Rebase on gallium-includes-from-NIR changes in mater. v5: Rebase on variables being in lists instead of hash tables. v6: Squash in intermediate changes that used the NIR-to-TGSI pass (which I'm not committing) --- src/gallium/drivers/vc4/vc4_program.c | 1398 ++++++++++++++++----------------- src/gallium/drivers/vc4/vc4_qir.c | 3 + src/gallium/drivers/vc4/vc4_qir.h | 23 +- src/gallium/drivers/vc4/vc4_screen.c | 2 + src/gallium/drivers/vc4/vc4_screen.h | 1 + 5 files changed, 707 insertions(+), 720 deletions(-) Diff: http://cgit.freedesktop.org/mesa/mesa/diff/?id=73e2d4837d7e4611f31532ab0ccc14369341e0cb From anholt at kemper.freedesktop.org Wed Apr 1 18:02:24 2015 From: anholt at kemper.freedesktop.org (Eric Anholt) Date: Wed, 1 Apr 2015 11:02:24 -0700 (PDT) Subject: Mesa (master): vc4: Add shader-db dumping of NIR instruction count. Message-ID: <20150401180224.DF98876332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 26261bca2137eb1ca57e53f4efb95bcb3f1419df URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=26261bca2137eb1ca57e53f4efb95bcb3f1419df Author: Eric Anholt Date: Fri Feb 20 00:31:51 2015 -0800 vc4: Add shader-db dumping of NIR instruction count. I was previously using temporary disables of VC4 optimization to show the benefits of improved NIR optimization, but this can get me quick and dirty numbers for NIR-only improvements without having to add hacks to disable VC4's code (disabling of which might hide ways that the NIR changes would hurt actual VC4 codegen). --- src/gallium/drivers/vc4/vc4_program.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 1b87fe4..5ed2165 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -1994,6 +1994,28 @@ static const nir_shader_compiler_options nir_options = { .lower_negate = true, }; +static bool +count_nir_instrs_in_block(nir_block *block, void *state) +{ + int *count = (int *) state; + nir_foreach_instr(block, instr) { + *count = *count + 1; + } + return true; +} + +static int +count_nir_instrs(nir_shader *nir) +{ + int count = 0; + nir_foreach_overload(nir, overload) { + if (!overload->impl) + continue; + nir_foreach_block(overload->impl, count_nir_instrs_in_block, &count); + } + return count; +} + static struct vc4_compile * vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage, struct vc4_key *key) @@ -2064,6 +2086,13 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage, nir_convert_from_ssa(c->s); + if (vc4_debug & VC4_DEBUG_SHADERDB) { + fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d NIR instructions\n", + qir_get_stage_name(c->stage), + c->program_id, c->variant_id, + count_nir_instrs(c->s)); + } + if (vc4_debug & VC4_DEBUG_NIR) { fprintf(stderr, "%s prog %d/%d NIR:\n", qir_get_stage_name(c->stage), From anholt at kemper.freedesktop.org Wed Apr 1 18:02:24 2015 From: anholt at kemper.freedesktop.org (Eric Anholt) Date: Wed, 1 Apr 2015 11:02:24 -0700 (PDT) Subject: Mesa (master): nir: Recognize a pattern for doing b2f without the opcode. Message-ID: <20150401180224.EBE0D76332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 6e8d4a2f8043a3a7a28175326e63770aa9511ee7 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6e8d4a2f8043a3a7a28175326e63770aa9511ee7 Author: Eric Anholt Date: Sun Mar 29 22:59:39 2015 -0700 nir: Recognize a pattern for doing b2f without the opcode. Since we have patterns based on b2f, generate them if we see the b2f equivalent using an iand. This is common when generating NIR from TGSI. Reviewed-by: Connor Abbott --- src/glsl/nir/nir_opt_algebraic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py index 66b456d..301d7a8 100644 --- a/src/glsl/nir/nir_opt_algebraic.py +++ b/src/glsl/nir/nir_opt_algebraic.py @@ -99,6 +99,7 @@ optimizations = [ # Emulating booleans (('fmul', ('b2f', a), ('b2f', b)), ('b2f', ('iand', a, b))), (('fsat', ('fadd', ('b2f', a), ('b2f', b))), ('b2f', ('ior', a, b))), + (('iand', 'a at bool', 1.0), ('b2f', a)), # Comparison with the same args. Note that these are not done for # the float versions because NaN always returns false on float # inequalities. From anholt at kemper.freedesktop.org Wed Apr 1 18:02:24 2015 From: anholt at kemper.freedesktop.org (Eric Anholt) Date: Wed, 1 Apr 2015 11:02:24 -0700 (PDT) Subject: Mesa (master): nir: Recognize a pattern of bool frobbing from TGSI KILL_IF. Message-ID: <20150401180225.003F276332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 15b03b7964fc2c3c52e9f384815b76957f557878 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=15b03b7964fc2c3c52e9f384815b76957f557878 Author: Eric Anholt Date: Sun Mar 29 23:04:21 2015 -0700 nir: Recognize a pattern of bool frobbing from TGSI KILL_IF. TGSI's conditional discards take float arg and negate it, so GLSL to TGSI generates a b2f and negates that value. Only, in NIR we want a proper bool once again, so we compare with 0. This is a lot of pointless extra instructions. total instructions in shared programs: 39735 -> 39702 (-0.08%) instructions in affected programs: 1342 -> 1309 (-2.46%) Reviewed-by: Connor Abbott --- src/glsl/nir/nir_opt_algebraic.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py index 301d7a8..190bd91 100644 --- a/src/glsl/nir/nir_opt_algebraic.py +++ b/src/glsl/nir/nir_opt_algebraic.py @@ -100,6 +100,8 @@ optimizations = [ (('fmul', ('b2f', a), ('b2f', b)), ('b2f', ('iand', a, b))), (('fsat', ('fadd', ('b2f', a), ('b2f', b))), ('b2f', ('ior', a, b))), (('iand', 'a at bool', 1.0), ('b2f', a)), + (('flt', ('fneg', ('b2f', a)), 0), a), # Generated by TGSI KILL_IF. + (('flt', ('fsub', 0.0, ('b2f', a)), 0), a), # Generated by TGSI KILL_IF. # Comparison with the same args. Note that these are not done for # the float versions because NaN always returns false on float # inequalities. From brianp at kemper.freedesktop.org Wed Apr 1 18:05:42 2015 From: brianp at kemper.freedesktop.org (Brian Paul) Date: Wed, 1 Apr 2015 11:05:42 -0700 (PDT) Subject: Mesa (master): swrast: remove unneeded #include of colormac.h Message-ID: <20150401180542.509F776332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 8ac9407a835ee892d96f326b0c56967046cbb982 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8ac9407a835ee892d96f326b0c56967046cbb982 Author: Brian Paul Date: Wed Apr 1 09:45:31 2015 -0600 swrast: remove unneeded #include of colormac.h Acked-by: Matt Turner Reviewed-by: Mark Janes --- src/mesa/swrast/s_aatriangle.c | 1 - src/mesa/swrast/s_alpha.c | 1 - src/mesa/swrast/s_atifragshader.c | 1 - src/mesa/swrast/s_context.c | 1 - src/mesa/swrast/s_copypix.c | 1 - src/mesa/swrast/s_feedback.c | 1 - src/mesa/swrast/s_fog.c | 1 - src/mesa/swrast/s_fragprog.c | 2 +- src/mesa/swrast/s_lines.c | 1 - src/mesa/swrast/s_points.c | 1 - src/mesa/swrast/s_span.c | 1 - src/mesa/swrast/s_texcombine.c | 2 +- src/mesa/swrast/s_texfetch.c | 1 - src/mesa/swrast/s_texfilter.c | 2 +- src/mesa/swrast/s_texrender.c | 1 - src/mesa/swrast/s_triangle.c | 1 - src/mesa/swrast/s_zoom.c | 1 - src/mesa/swrast_setup/ss_context.c | 2 +- src/mesa/swrast_setup/ss_triangle.c | 1 - 19 files changed, 4 insertions(+), 19 deletions(-) diff --git a/src/mesa/swrast/s_aatriangle.c b/src/mesa/swrast/s_aatriangle.c index 1d076cc..b510987 100644 --- a/src/mesa/swrast/s_aatriangle.c +++ b/src/mesa/swrast/s_aatriangle.c @@ -30,7 +30,6 @@ #include "main/glheader.h" #include "main/context.h" -#include "main/colormac.h" #include "main/macros.h" #include "main/imports.h" #include "main/state.h" diff --git a/src/mesa/swrast/s_alpha.c b/src/mesa/swrast/s_alpha.c index b1a7ff1..841642f 100644 --- a/src/mesa/swrast/s_alpha.c +++ b/src/mesa/swrast/s_alpha.c @@ -29,7 +29,6 @@ #include "main/glheader.h" #include "main/context.h" -#include "main/colormac.h" #include "main/macros.h" #include "s_alpha.h" diff --git a/src/mesa/swrast/s_atifragshader.c b/src/mesa/swrast/s_atifragshader.c index 0bf0377..9e029db 100644 --- a/src/mesa/swrast/s_atifragshader.c +++ b/src/mesa/swrast/s_atifragshader.c @@ -20,7 +20,6 @@ */ #include "main/glheader.h" -#include "main/colormac.h" #include "main/macros.h" #include "main/atifragshader.h" #include "main/samplerobj.h" diff --git a/src/mesa/swrast/s_context.c b/src/mesa/swrast/s_context.c index ecde292..af24207 100644 --- a/src/mesa/swrast/s_context.c +++ b/src/mesa/swrast/s_context.c @@ -27,7 +27,6 @@ #include "main/imports.h" #include "main/bufferobj.h" -#include "main/colormac.h" #include "main/mtypes.h" #include "main/samplerobj.h" #include "main/teximage.h" diff --git a/src/mesa/swrast/s_copypix.c b/src/mesa/swrast/s_copypix.c index 17140ad..68c83e4 100644 --- a/src/mesa/swrast/s_copypix.c +++ b/src/mesa/swrast/s_copypix.c @@ -25,7 +25,6 @@ #include "main/glheader.h" #include "main/context.h" -#include "main/colormac.h" #include "main/condrender.h" #include "main/macros.h" #include "main/pixeltransfer.h" diff --git a/src/mesa/swrast/s_feedback.c b/src/mesa/swrast/s_feedback.c index f25b897..71f48ce 100644 --- a/src/mesa/swrast/s_feedback.c +++ b/src/mesa/swrast/s_feedback.c @@ -23,7 +23,6 @@ */ #include "main/glheader.h" -#include "main/colormac.h" #include "main/feedback.h" #include "main/macros.h" diff --git a/src/mesa/swrast/s_fog.c b/src/mesa/swrast/s_fog.c index e270b7e..8b0bdf8 100644 --- a/src/mesa/swrast/s_fog.c +++ b/src/mesa/swrast/s_fog.c @@ -25,7 +25,6 @@ #include "c99_math.h" #include "main/glheader.h" -#include "main/colormac.h" #include "main/macros.h" #include "s_context.h" diff --git a/src/mesa/swrast/s_fragprog.c b/src/mesa/swrast/s_fragprog.c index 12bcda3..175915a 100644 --- a/src/mesa/swrast/s_fragprog.c +++ b/src/mesa/swrast/s_fragprog.c @@ -23,7 +23,7 @@ */ #include "main/glheader.h" -#include "main/colormac.h" +#include "main/macros.h" #include "main/samplerobj.h" #include "main/teximage.h" #include "program/prog_instruction.h" diff --git a/src/mesa/swrast/s_lines.c b/src/mesa/swrast/s_lines.c index 3e626b9..58bd2fc 100644 --- a/src/mesa/swrast/s_lines.c +++ b/src/mesa/swrast/s_lines.c @@ -25,7 +25,6 @@ #include "main/glheader.h" #include "main/context.h" -#include "main/colormac.h" #include "main/macros.h" #include "s_aaline.h" #include "s_context.h" diff --git a/src/mesa/swrast/s_points.c b/src/mesa/swrast/s_points.c index 8180483..2212c95 100644 --- a/src/mesa/swrast/s_points.c +++ b/src/mesa/swrast/s_points.c @@ -24,7 +24,6 @@ #include "main/glheader.h" -#include "main/colormac.h" #include "main/macros.h" #include "s_context.h" #include "s_feedback.h" diff --git a/src/mesa/swrast/s_span.c b/src/mesa/swrast/s_span.c index e304b6b..0a30d10 100644 --- a/src/mesa/swrast/s_span.c +++ b/src/mesa/swrast/s_span.c @@ -33,7 +33,6 @@ #include "c99_math.h" #include "main/glheader.h" -#include "main/colormac.h" #include "main/format_pack.h" #include "main/format_unpack.h" #include "main/macros.h" diff --git a/src/mesa/swrast/s_texcombine.c b/src/mesa/swrast/s_texcombine.c index 58ff164..0adb8e5 100644 --- a/src/mesa/swrast/s_texcombine.c +++ b/src/mesa/swrast/s_texcombine.c @@ -26,8 +26,8 @@ #include "main/glheader.h" #include "main/context.h" -#include "main/colormac.h" #include "main/imports.h" +#include "main/macros.h" #include "main/pixeltransfer.h" #include "main/samplerobj.h" #include "program/prog_instruction.h" diff --git a/src/mesa/swrast/s_texfetch.c b/src/mesa/swrast/s_texfetch.c index 3c4ee15..1fe21c0 100644 --- a/src/mesa/swrast/s_texfetch.c +++ b/src/mesa/swrast/s_texfetch.c @@ -33,7 +33,6 @@ */ -#include "main/colormac.h" #include "main/macros.h" #include "main/texcompress.h" #include "main/texcompress_fxt1.h" diff --git a/src/mesa/swrast/s_texfilter.c b/src/mesa/swrast/s_texfilter.c index 3ade995..abc1727 100644 --- a/src/mesa/swrast/s_texfilter.c +++ b/src/mesa/swrast/s_texfilter.c @@ -26,8 +26,8 @@ #include "c99_math.h" #include "main/glheader.h" #include "main/context.h" -#include "main/colormac.h" #include "main/imports.h" +#include "main/macros.h" #include "main/samplerobj.h" #include "main/teximage.h" #include "main/texobj.h" diff --git a/src/mesa/swrast/s_texrender.c b/src/mesa/swrast/s_texrender.c index 29bb270..fa853c9 100644 --- a/src/mesa/swrast/s_texrender.c +++ b/src/mesa/swrast/s_texrender.c @@ -1,6 +1,5 @@ #include "main/context.h" -#include "main/colormac.h" #include "main/fbobject.h" #include "main/macros.h" #include "main/teximage.h" diff --git a/src/mesa/swrast/s_triangle.c b/src/mesa/swrast/s_triangle.c index af039c3..876a74b 100644 --- a/src/mesa/swrast/s_triangle.c +++ b/src/mesa/swrast/s_triangle.c @@ -31,7 +31,6 @@ #include "main/glheader.h" #include "main/context.h" -#include "main/colormac.h" #include "main/imports.h" #include "main/macros.h" #include "main/mtypes.h" diff --git a/src/mesa/swrast/s_zoom.c b/src/mesa/swrast/s_zoom.c index ab22652..9879e2a 100644 --- a/src/mesa/swrast/s_zoom.c +++ b/src/mesa/swrast/s_zoom.c @@ -26,7 +26,6 @@ #include "main/macros.h" #include "main/imports.h" #include "main/format_pack.h" -#include "main/colormac.h" #include "s_context.h" #include "s_span.h" diff --git a/src/mesa/swrast_setup/ss_context.c b/src/mesa/swrast_setup/ss_context.c index 0b3b9e4..4fc90c3 100644 --- a/src/mesa/swrast_setup/ss_context.c +++ b/src/mesa/swrast_setup/ss_context.c @@ -27,7 +27,7 @@ #include "main/glheader.h" #include "main/imports.h" -#include "main/colormac.h" +#include "main/macros.h" #include "tnl/tnl.h" #include "tnl/t_context.h" #include "tnl/t_pipeline.h" diff --git a/src/mesa/swrast_setup/ss_triangle.c b/src/mesa/swrast_setup/ss_triangle.c index 483c415..b92c20b 100644 --- a/src/mesa/swrast_setup/ss_triangle.c +++ b/src/mesa/swrast_setup/ss_triangle.c @@ -27,7 +27,6 @@ #include "c99_math.h" #include "main/glheader.h" -#include "main/colormac.h" #include "main/macros.h" #include "main/mtypes.h" From brianp at kemper.freedesktop.org Wed Apr 1 18:05:42 2015 From: brianp at kemper.freedesktop.org (Brian Paul) Date: Wed, 1 Apr 2015 11:05:42 -0700 (PDT) Subject: Mesa (master): tnl: remove unneeded #include of colormac.h Message-ID: <20150401180542.5D3E876332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: f1d55017d7cb0e8fd9314ca29849304eb067960f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f1d55017d7cb0e8fd9314ca29849304eb067960f Author: Brian Paul Date: Wed Apr 1 09:45:38 2015 -0600 tnl: remove unneeded #include of colormac.h Acked-by: Matt Turner Reviewed-by: Mark Janes --- src/mesa/tnl/t_rasterpos.c | 1 - src/mesa/tnl/t_vb_fog.c | 1 - src/mesa/tnl/t_vb_light.c | 1 - src/mesa/tnl/t_vb_normals.c | 1 - src/mesa/tnl/t_vb_program.c | 1 - src/mesa/tnl/t_vb_texgen.c | 1 - src/mesa/tnl/t_vb_texmat.c | 1 - src/mesa/tnl/t_vb_vertex.c | 1 - src/mesa/tnl/t_vertex.c | 1 - src/mesa/tnl/t_vertex_generic.c | 2 +- src/mesa/tnl/t_vertex_sse.c | 1 - 11 files changed, 1 insertion(+), 11 deletions(-) diff --git a/src/mesa/tnl/t_rasterpos.c b/src/mesa/tnl/t_rasterpos.c index 9ecf947..1cd3981 100644 --- a/src/mesa/tnl/t_rasterpos.c +++ b/src/mesa/tnl/t_rasterpos.c @@ -25,7 +25,6 @@ #include "c99_math.h" #include "main/glheader.h" -#include "main/colormac.h" #include "main/feedback.h" #include "main/light.h" #include "main/macros.h" diff --git a/src/mesa/tnl/t_vb_fog.c b/src/mesa/tnl/t_vb_fog.c index 3626f1d..1ca72f8 100644 --- a/src/mesa/tnl/t_vb_fog.c +++ b/src/mesa/tnl/t_vb_fog.c @@ -28,7 +28,6 @@ #include "c99_math.h" #include "main/glheader.h" -#include "main/colormac.h" #include "main/macros.h" #include "main/imports.h" #include "main/mtypes.h" diff --git a/src/mesa/tnl/t_vb_light.c b/src/mesa/tnl/t_vb_light.c index 7781b6a..dbd57fa 100644 --- a/src/mesa/tnl/t_vb_light.c +++ b/src/mesa/tnl/t_vb_light.c @@ -25,7 +25,6 @@ #include "c99_math.h" #include "main/glheader.h" -#include "main/colormac.h" #include "main/light.h" #include "main/macros.h" #include "main/imports.h" diff --git a/src/mesa/tnl/t_vb_normals.c b/src/mesa/tnl/t_vb_normals.c index b67789e..9aee1a2 100644 --- a/src/mesa/tnl/t_vb_normals.c +++ b/src/mesa/tnl/t_vb_normals.c @@ -27,7 +27,6 @@ #include "main/glheader.h" -#include "main/colormac.h" #include "main/macros.h" #include "main/imports.h" #include "main/mtypes.h" diff --git a/src/mesa/tnl/t_vb_program.c b/src/mesa/tnl/t_vb_program.c index 464a4cd..1494349 100644 --- a/src/mesa/tnl/t_vb_program.c +++ b/src/mesa/tnl/t_vb_program.c @@ -32,7 +32,6 @@ #include "main/glheader.h" -#include "main/colormac.h" #include "main/macros.h" #include "main/imports.h" #include "main/samplerobj.h" diff --git a/src/mesa/tnl/t_vb_texgen.c b/src/mesa/tnl/t_vb_texgen.c index 9a61ef2..94066f4 100644 --- a/src/mesa/tnl/t_vb_texgen.c +++ b/src/mesa/tnl/t_vb_texgen.c @@ -35,7 +35,6 @@ */ #include "main/glheader.h" -#include "main/colormac.h" #include "main/macros.h" #include "main/imports.h" #include "main/mtypes.h" diff --git a/src/mesa/tnl/t_vb_texmat.c b/src/mesa/tnl/t_vb_texmat.c index 1cc2c81..ef034d6 100644 --- a/src/mesa/tnl/t_vb_texmat.c +++ b/src/mesa/tnl/t_vb_texmat.c @@ -27,7 +27,6 @@ #include "main/glheader.h" -#include "main/colormac.h" #include "main/macros.h" #include "main/imports.h" #include "main/mtypes.h" diff --git a/src/mesa/tnl/t_vb_vertex.c b/src/mesa/tnl/t_vb_vertex.c index ea3a56c..b56d680 100644 --- a/src/mesa/tnl/t_vb_vertex.c +++ b/src/mesa/tnl/t_vb_vertex.c @@ -27,7 +27,6 @@ #include "main/glheader.h" -#include "main/colormac.h" #include "main/macros.h" #include "main/imports.h" #include "main/mtypes.h" diff --git a/src/mesa/tnl/t_vertex.c b/src/mesa/tnl/t_vertex.c index 369d6d9..c3294b0 100644 --- a/src/mesa/tnl/t_vertex.c +++ b/src/mesa/tnl/t_vertex.c @@ -28,7 +28,6 @@ #include #include "main/glheader.h" #include "main/context.h" -#include "main/colormac.h" #include "swrast/s_chan.h" #include "t_context.h" #include "t_vertex.h" diff --git a/src/mesa/tnl/t_vertex_generic.c b/src/mesa/tnl/t_vertex_generic.c index 079d473..2858922 100644 --- a/src/mesa/tnl/t_vertex_generic.c +++ b/src/mesa/tnl/t_vertex_generic.c @@ -28,7 +28,7 @@ #include "main/glheader.h" #include "main/context.h" -#include "main/colormac.h" +#include "main/macros.h" #include "util/simple_list.h" #include "swrast/s_chan.h" #include "t_context.h" diff --git a/src/mesa/tnl/t_vertex_sse.c b/src/mesa/tnl/t_vertex_sse.c index 963432c..30dc1a7 100644 --- a/src/mesa/tnl/t_vertex_sse.c +++ b/src/mesa/tnl/t_vertex_sse.c @@ -29,7 +29,6 @@ #include "main/glheader.h" #include "main/context.h" -#include "main/colormac.h" #include "util/simple_list.h" #include "main/enums.h" #include "swrast/s_chan.h" From brianp at kemper.freedesktop.org Wed Apr 1 18:05:42 2015 From: brianp at kemper.freedesktop.org (Brian Paul) Date: Wed, 1 Apr 2015 11:05:42 -0700 (PDT) Subject: Mesa (master): mesa: don't include colormac.h in format code Message-ID: <20150401180542.79CE876332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 1625d7a87ae66eb50653d9194e8f9e67b6957c05 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1625d7a87ae66eb50653d9194e8f9e67b6957c05 Author: Brian Paul Date: Wed Apr 1 10:31:26 2015 -0600 mesa: don't include colormac.h in format code Acked-by: Matt Turner Reviewed-by: Mark Janes --- src/mesa/main/format_pack.py | 1 - src/mesa/main/format_unpack.h | 2 ++ src/mesa/main/format_unpack.py | 1 - 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mesa/main/format_pack.py b/src/mesa/main/format_pack.py index f141da8..2f43a30 100644 --- a/src/mesa/main/format_pack.py +++ b/src/mesa/main/format_pack.py @@ -43,7 +43,6 @@ string = """/* #include -#include "colormac.h" #include "format_pack.h" #include "format_utils.h" #include "macros.h" diff --git a/src/mesa/main/format_unpack.h b/src/mesa/main/format_unpack.h index eba3c66..964c607 100644 --- a/src/mesa/main/format_unpack.h +++ b/src/mesa/main/format_unpack.h @@ -25,6 +25,8 @@ #ifndef FORMAT_UNPACK_H #define FORMAT_UNPACK_H +#include "formats.h" + extern void _mesa_unpack_rgba_row(mesa_format format, GLuint n, const void *src, GLfloat dst[][4]); diff --git a/src/mesa/main/format_unpack.py b/src/mesa/main/format_unpack.py index 53bdf64..5928c20 100644 --- a/src/mesa/main/format_unpack.py +++ b/src/mesa/main/format_unpack.py @@ -43,7 +43,6 @@ string = """/* #include -#include "colormac.h" #include "format_unpack.h" #include "format_utils.h" #include "macros.h" From brianp at kemper.freedesktop.org Wed Apr 1 18:05:42 2015 From: brianp at kemper.freedesktop.org (Brian Paul) Date: Wed, 1 Apr 2015 11:05:42 -0700 (PDT) Subject: Mesa (master): mesa: remove unused macros from colormac.h Message-ID: <20150401180542.3F73476332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 2ad8af1a0c319c83e4a8e00db3a9b9cb0ae029eb URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=2ad8af1a0c319c83e4a8e00db3a9b9cb0ae029eb Author: Brian Paul Date: Wed Apr 1 09:45:12 2015 -0600 mesa: remove unused macros from colormac.h Acked-by: Matt Turner Reviewed-by: Mark Janes --- src/mesa/main/colormac.h | 45 --------------------------------------------- 1 file changed, 45 deletions(-) diff --git a/src/mesa/main/colormac.h b/src/mesa/main/colormac.h index bc69f46..33ca5af 100644 --- a/src/mesa/main/colormac.h +++ b/src/mesa/main/colormac.h @@ -52,70 +52,25 @@ _mesa_unclamped_float_rgba_to_ubyte(GLubyte dst[4], const GLfloat src[4]) /** * \name Generic color packing macros. All inputs should be GLubytes. - * - * \todo We may move these into texstore.h at some point. */ /*@{*/ #define PACK_COLOR_8888( X, Y, Z, W ) \ (((X) << 24) | ((Y) << 16) | ((Z) << 8) | (W)) -#define PACK_COLOR_8888_REV( X, Y, Z, W ) \ - (((W) << 24) | ((Z) << 16) | ((Y) << 8) | (X)) - -#define PACK_COLOR_888( X, Y, Z ) \ - (((X) << 16) | ((Y) << 8) | (Z)) - #define PACK_COLOR_565( X, Y, Z ) \ ((((X) & 0xf8) << 8) | (((Y) & 0xfc) << 3) | (((Z) & 0xf8) >> 3)) -#define PACK_COLOR_5551( R, G, B, A ) \ - ((((R) & 0xf8) << 8) | (((G) & 0xf8) << 3) | (((B) & 0xf8) >> 2) | \ - ((A) >> 7)) - #define PACK_COLOR_1555( A, B, G, R ) \ ((((B) & 0xf8) << 7) | (((G) & 0xf8) << 2) | (((R) & 0xf8) >> 3) | \ (((A) & 0x80) << 8)) -#define PACK_COLOR_1555_REV( A, B, G, R ) \ - ((((B) & 0xf8) >> 1) | (((G) & 0xc0) >> 6) | (((G) & 0x38) << 10) | (((R) & 0xf8) << 5) | \ - ((A) ? 0x80 : 0)) - -#define PACK_COLOR_2101010_UB( A, B, G, R ) \ - (((B) << 22) | ((G) << 12) | ((R) << 2) | \ - (((A) & 0xc0) << 24)) - -#define PACK_COLOR_2101010_US( A, B, G, R ) \ - ((((B) >> 6) << 20) | (((G) >> 6) << 10) | ((R) >> 6) | \ - (((A) >> 14) << 30)) - #define PACK_COLOR_4444( R, G, B, A ) \ ((((R) & 0xf0) << 8) | (((G) & 0xf0) << 4) | ((B) & 0xf0) | ((A) >> 4)) -#define PACK_COLOR_4444_REV( R, G, B, A ) \ - ((((B) & 0xf0) << 8) | (((A) & 0xf0) << 4) | ((R) & 0xf0) | ((G) >> 4)) - -#define PACK_COLOR_44( L, A ) \ - (((L) & 0xf0) | (((A) & 0xf0) >> 4)) - #define PACK_COLOR_88( L, A ) \ (((L) << 8) | (A)) -#define PACK_COLOR_88_REV( L, A ) \ - (((A) << 8) | (L)) - -#define PACK_COLOR_1616( L, A ) \ - (((L) << 16) | (A)) - -#define PACK_COLOR_1616_REV( L, A ) \ - (((A) << 16) | (L)) - -#define PACK_COLOR_332( R, G, B ) \ - (((R) & 0xe0) | (((G) & 0xe0) >> 3) | (((B) & 0xc0) >> 6)) - -#define PACK_COLOR_233( B, G, R ) \ - (((B) & 0xc0) | (((G) & 0xe0) >> 2) | (((R) & 0xe0) >> 5)) - /*@}*/ From brianp at kemper.freedesktop.org Wed Apr 1 18:05:42 2015 From: brianp at kemper.freedesktop.org (Brian Paul) Date: Wed, 1 Apr 2015 11:05:42 -0700 (PDT) Subject: Mesa (master): mesa: remove unneeded #include of colormac.h Message-ID: <20150401180542.6D9A076332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 2768a0b1b42f3c1531ab9c3647a93f0504002280 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=2768a0b1b42f3c1531ab9c3647a93f0504002280 Author: Brian Paul Date: Wed Apr 1 09:45:57 2015 -0600 mesa: remove unneeded #include of colormac.h Acked-by: Matt Turner Reviewed-by: Mark Janes --- src/mesa/main/attrib.c | 1 - src/mesa/main/debug.c | 2 +- src/mesa/main/pixel.c | 1 - src/mesa/main/pixeltransfer.c | 2 +- src/mesa/main/samplerobj.h | 3 +++ src/mesa/main/texcompress.c | 1 - src/mesa/main/texcompress_fxt1.c | 1 - src/mesa/main/texcompress_rgtc.c | 1 - src/mesa/main/texcompress_s3tc.c | 1 - src/mesa/main/texparam.c | 1 - src/mesa/main/texstate.c | 2 -- src/mesa/main/texstore.c | 1 - src/mesa/program/prog_execute.c | 1 - 13 files changed, 5 insertions(+), 13 deletions(-) diff --git a/src/mesa/main/attrib.c b/src/mesa/main/attrib.c index 20216a8..b163c0a 100644 --- a/src/mesa/main/attrib.c +++ b/src/mesa/main/attrib.c @@ -32,7 +32,6 @@ #include "buffers.h" #include "bufferobj.h" #include "clear.h" -#include "colormac.h" #include "context.h" #include "depth.h" #include "enable.h" diff --git a/src/mesa/main/debug.c b/src/mesa/main/debug.c index b50d79e..c93e84a 100644 --- a/src/mesa/main/debug.c +++ b/src/mesa/main/debug.c @@ -26,11 +26,11 @@ #include #include "mtypes.h" #include "attrib.h" -#include "colormac.h" #include "enums.h" #include "formats.h" #include "hash.h" #include "imports.h" +#include "macros.h" #include "debug.h" #include "get.h" #include "pixelstore.h" diff --git a/src/mesa/main/pixel.c b/src/mesa/main/pixel.c index 7162c6f..ecda269 100644 --- a/src/mesa/main/pixel.c +++ b/src/mesa/main/pixel.c @@ -30,7 +30,6 @@ #include "glheader.h" #include "bufferobj.h" -#include "colormac.h" #include "context.h" #include "macros.h" #include "pixel.h" diff --git a/src/mesa/main/pixeltransfer.c b/src/mesa/main/pixeltransfer.c index 8bbeeb8..94464ea 100644 --- a/src/mesa/main/pixeltransfer.c +++ b/src/mesa/main/pixeltransfer.c @@ -31,7 +31,7 @@ #include "glheader.h" -#include "colormac.h" +#include "macros.h" #include "pixeltransfer.h" #include "imports.h" #include "mtypes.h" diff --git a/src/mesa/main/samplerobj.h b/src/mesa/main/samplerobj.h index 988b874..7bea911 100644 --- a/src/mesa/main/samplerobj.h +++ b/src/mesa/main/samplerobj.h @@ -32,6 +32,9 @@ extern "C" { #endif +#include "mtypes.h" + + struct dd_function_table; static inline struct gl_sampler_object * diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c index 00234d4..0fd1a36 100644 --- a/src/mesa/main/texcompress.c +++ b/src/mesa/main/texcompress.c @@ -32,7 +32,6 @@ #include "glheader.h" #include "imports.h" -#include "colormac.h" #include "context.h" #include "formats.h" #include "mtypes.h" diff --git a/src/mesa/main/texcompress_fxt1.c b/src/mesa/main/texcompress_fxt1.c index 5623594..f06f048 100644 --- a/src/mesa/main/texcompress_fxt1.c +++ b/src/mesa/main/texcompress_fxt1.c @@ -31,7 +31,6 @@ #include "glheader.h" #include "imports.h" -#include "colormac.h" #include "image.h" #include "macros.h" #include "mipmap.h" diff --git a/src/mesa/main/texcompress_rgtc.c b/src/mesa/main/texcompress_rgtc.c index f40e4e6..66de1f1 100644 --- a/src/mesa/main/texcompress_rgtc.c +++ b/src/mesa/main/texcompress_rgtc.c @@ -35,7 +35,6 @@ #include "glheader.h" #include "imports.h" -#include "colormac.h" #include "image.h" #include "macros.h" #include "mipmap.h" diff --git a/src/mesa/main/texcompress_s3tc.c b/src/mesa/main/texcompress_s3tc.c index 41d00d4..38ce5f8 100644 --- a/src/mesa/main/texcompress_s3tc.c +++ b/src/mesa/main/texcompress_s3tc.c @@ -35,7 +35,6 @@ #include "glheader.h" #include "imports.h" -#include "colormac.h" #include "dlopen.h" #include "image.h" #include "macros.h" diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c index 1ef7286..b5d42d3 100644 --- a/src/mesa/main/texparam.c +++ b/src/mesa/main/texparam.c @@ -32,7 +32,6 @@ #include #include "main/glheader.h" #include "main/blend.h" -#include "main/colormac.h" #include "main/context.h" #include "main/enums.h" #include "main/formats.h" diff --git a/src/mesa/main/texstate.c b/src/mesa/main/texstate.c index 0a7f983..1af9d47 100644 --- a/src/mesa/main/texstate.c +++ b/src/mesa/main/texstate.c @@ -31,8 +31,6 @@ #include #include "glheader.h" #include "bufferobj.h" -#include "colormac.h" -#include "colortab.h" #include "context.h" #include "enums.h" #include "macros.h" diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c index 7ad9d2b..1525205 100644 --- a/src/mesa/main/texstore.c +++ b/src/mesa/main/texstore.c @@ -53,7 +53,6 @@ #include "glheader.h" #include "bufferobj.h" -#include "colormac.h" #include "format_pack.h" #include "format_utils.h" #include "image.h" diff --git a/src/mesa/program/prog_execute.c b/src/mesa/program/prog_execute.c index dc4919a..16e8e34 100644 --- a/src/mesa/program/prog_execute.c +++ b/src/mesa/program/prog_execute.c @@ -37,7 +37,6 @@ #include "c99_math.h" #include "main/glheader.h" -#include "main/colormac.h" #include "main/macros.h" #include "prog_execute.h" #include "prog_instruction.h" From jrfonseca at kemper.freedesktop.org Wed Apr 1 18:50:36 2015 From: jrfonseca at kemper.freedesktop.org (Jose Fonseca) Date: Wed, 1 Apr 2015 11:50:36 -0700 (PDT) Subject: Mesa (master): automake: Fix out-of-source builds. Message-ID: <20150401185036.0E8F976332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 7f0682cebf8b100922d45a423df1e2fbd036bc3e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7f0682cebf8b100922d45a423df1e2fbd036bc3e Author: Jose Fonseca Date: Wed Apr 1 19:48:09 2015 +0100 automake: Fix out-of-source builds. Add include path for generated nir_opcodes.h. Trivial. --- src/gallium/auxiliary/Makefile.am | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/auxiliary/Makefile.am b/src/gallium/auxiliary/Makefile.am index 27a8b3f..36b84cf 100644 --- a/src/gallium/auxiliary/Makefile.am +++ b/src/gallium/auxiliary/Makefile.am @@ -10,6 +10,7 @@ include $(top_srcdir)/src/gallium/Automake.inc noinst_LTLIBRARIES = libgallium.la AM_CFLAGS = \ + -I$(top_builddir)/src/glsl/nir \ -I$(top_srcdir)/src/gallium/auxiliary/util \ $(GALLIUM_CFLAGS) \ $(VISIBILITY_CFLAGS) \ From jrfonseca at kemper.freedesktop.org Wed Apr 1 18:50:36 2015 From: jrfonseca at kemper.freedesktop.org (Jose Fonseca) Date: Wed, 1 Apr 2015 11:50:36 -0700 (PDT) Subject: Mesa (master): automake,scons: Put NIR source files in a separate var to fix SCons build. Message-ID: <20150401185036.17BF87635A@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 3321724c10e2c9c5cd8f84bafb26678c6d1108b5 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3321724c10e2c9c5cd8f84bafb26678c6d1108b5 Author: Jose Fonseca Date: Wed Apr 1 19:49:09 2015 +0100 automake,scons: Put NIR source files in a separate var to fix SCons build. SCons does not build NIR yet. Trivial. --- src/gallium/auxiliary/Makefile.am | 1 + src/gallium/auxiliary/Makefile.sources | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/Makefile.am b/src/gallium/auxiliary/Makefile.am index 36b84cf..89c7a13 100644 --- a/src/gallium/auxiliary/Makefile.am +++ b/src/gallium/auxiliary/Makefile.am @@ -22,6 +22,7 @@ AM_CXXFLAGS = \ libgallium_la_SOURCES = \ $(C_SOURCES) \ + $(NIR_SOURCES) \ $(GENERATED_SOURCES) if HAVE_MESA_LLVM diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources index 08e4e4c..bd8e949 100644 --- a/src/gallium/auxiliary/Makefile.sources +++ b/src/gallium/auxiliary/Makefile.sources @@ -69,7 +69,6 @@ C_SOURCES := \ indices/u_indices_priv.h \ indices/u_primconvert.c \ indices/u_primconvert.h \ - nir/tgsi_to_nir.c \ os/os_memory_aligned.h \ os/os_memory_debug.h \ os/os_memory_stdc.h \ @@ -303,6 +302,9 @@ C_SOURCES := \ util/u_vbuf.h \ util/u_video.h +NIR_SOURCES := \ + nir/tgsi_to_nir.c + VL_SOURCES := \ vl/vl_compositor.c \ vl/vl_compositor.h \ From ldeks at kemper.freedesktop.org Wed Apr 1 19:07:53 2015 From: ldeks at kemper.freedesktop.org (Laura Ekstrand) Date: Wed, 1 Apr 2015 12:07:53 -0700 (PDT) Subject: Mesa (master): main: create_buffers unlocks mutex when throwing OUT_OF_MEMORY. Message-ID: <20150401190753.4206176332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 142909f19ddeae7237acbb98b47e9228533e58f0 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=142909f19ddeae7237acbb98b47e9228533e58f0 Author: Laura Ekstrand Date: Wed Apr 1 11:18:32 2015 -0700 main: create_buffers unlocks mutex when throwing OUT_OF_MEMORY. Ilia Mirkin found that I had forgotten to free the mutex in the error case. Reviewed-by: Ilia Mirkin --- src/mesa/main/bufferobj.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c index c45cec4..66dee68 100644 --- a/src/mesa/main/bufferobj.c +++ b/src/mesa/main/bufferobj.c @@ -1333,6 +1333,7 @@ create_buffers(GLsizei n, GLuint *buffers, bool dsa) buf = ctx->Driver.NewBufferObject(ctx, buffers[i]); if (!buf) { _mesa_error(ctx, GL_OUT_OF_MEMORY, "%s", func); + mtx_unlock(&ctx->Shared->Mutex); return; } } From mattst88 at kemper.freedesktop.org Wed Apr 1 19:25:43 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Wed, 1 Apr 2015 12:25:43 -0700 (PDT) Subject: Mesa (master): glsl: Make sure not to dereference NULL. Message-ID: <20150401192543.92E1576332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 3384179faab0456f93f4a5c62ce05305ff20fe5c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3384179faab0456f93f4a5c62ce05305ff20fe5c Author: Matt Turner Date: Wed Apr 1 10:24:26 2015 -0700 glsl: Make sure not to dereference NULL. Found by Coverity. --- src/glsl/opt_algebraic.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/glsl/opt_algebraic.cpp b/src/glsl/opt_algebraic.cpp index a940d2f..3d2f2ca 100644 --- a/src/glsl/opt_algebraic.cpp +++ b/src/glsl/opt_algebraic.cpp @@ -574,6 +574,8 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir) continue; ir_expression *add_expr = floor_expr->operands[0]->as_expression(); + if (!add_expr) + continue; for (int j = 0; j < 2; j++) { ir_expression *abs_expr = add_expr->operands[j]->as_expression(); From evelikov at kemper.freedesktop.org Wed Apr 1 19:39:31 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Wed, 1 Apr 2015 12:39:31 -0700 (PDT) Subject: Mesa (master): configure.ac: error out if python/ mako is not found when required Message-ID: <20150401193931.BDCF376332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 4008975e6f4b2e15fceed4f07d8ec763dd0949d1 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4008975e6f4b2e15fceed4f07d8ec763dd0949d1 Author: Emil Velikov Date: Mon Mar 23 17:49:24 2015 +0000 configure.ac: error out if python/mako is not found when required In case of using a distribution tarball (or a dirty git tree) one can have the generated sources locally. Make configure.ac error out otherwise, to alert that about the unmet requirement(s) of python/mako. v2: Check only for a single file for each dependency. Suggested-by: Matt Turner Signed-off-by: Emil Velikov Reviewed-by: Matt Turner --- configure.ac | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index 3e5b6f5..3d8e0c6 100644 --- a/configure.ac +++ b/configure.ac @@ -123,8 +123,17 @@ if test "x$INDENT" != "xcat"; then fi AX_CHECK_PYTHON_MAKO_MODULE($PYTHON_MAKO_REQUIRED) -if test -n "$PYTHON2" -a "x$acv_mako_found" != "xyes"; then - AC_MSG_ERROR([Python mako module v$PYTHON_MAKO_REQUIRED or higher not found]) + +if test -z "$PYTHON2"; then + if test ! -f "$srcdir/src/util/format_srgb.c"; then + AC_MSG_ERROR([Python not found - unable to generate sources]) + fi +else + if test "x$acv_mako_found" = xno; then + if test ! -f "$srcdir/src/mesa/main/format_unpack.c"; then + AC_MSG_ERROR([Python mako module v$PYTHON_MAKO_REQUIRED or higher not found]) + fi + fi fi AC_PROG_INSTALL From evelikov at kemper.freedesktop.org Wed Apr 1 19:39:31 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Wed, 1 Apr 2015 12:39:31 -0700 (PDT) Subject: Mesa (master): gallium: ship tgsi_to_nir.h in the tarball Message-ID: <20150401193931.C426F7635A@kemper.freedesktop.org> Module: Mesa Branch: master Commit: bd4925c6ac468e80f6106f3d684119498b606a0d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=bd4925c6ac468e80f6106f3d684119498b606a0d Author: Emil Velikov Date: Wed Apr 1 19:09:00 2015 +0000 gallium: ship tgsi_to_nir.h in the tarball Acked-by: Matt Turner Signed-off-by: Emil Velikov --- src/gallium/auxiliary/Makefile.sources | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources index bd8e949..ec7547c 100644 --- a/src/gallium/auxiliary/Makefile.sources +++ b/src/gallium/auxiliary/Makefile.sources @@ -303,7 +303,8 @@ C_SOURCES := \ util/u_video.h NIR_SOURCES := \ - nir/tgsi_to_nir.c + nir/tgsi_to_nir.c \ + nir/tgsi_to_nir.h VL_SOURCES := \ vl/vl_compositor.c \ From evelikov at kemper.freedesktop.org Wed Apr 1 19:44:13 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Wed, 1 Apr 2015 12:44:13 -0700 (PDT) Subject: Mesa (master): configure: nuke --with-max-{width,height} Message-ID: <20150401194413.BF14376332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: d99135b2e9b7599ee57ac90952c605b725239908 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d99135b2e9b7599ee57ac90952c605b725239908 Author: Emil Velikov Date: Wed Apr 1 15:51:59 2015 +0100 configure: nuke --with-max-{width,height} Unused as of commit 630ab0d27ba(mesa: remove last of MAX_WIDTH, MAX_HEIGHT). Update all the remaining references to the defines. v2: Use the correct variable name in the comments Signed-off-by: Emil Velikov Reviewed-by: Brian Paul --- configure.ac | 15 --------------- include/GL/osmesa.h | 6 ++---- src/gallium/state_trackers/glx/xlib/glx_api.c | 2 +- src/mesa/drivers/x11/fakeglx.c | 2 +- 4 files changed, 4 insertions(+), 21 deletions(-) diff --git a/configure.ac b/configure.ac index 3d8e0c6..4ed4b74 100644 --- a/configure.ac +++ b/configure.ac @@ -1832,21 +1832,6 @@ if ! echo "$egl_platforms" | grep -q 'x11'; then GL_PC_CFLAGS="$GL_PC_CFLAGS -DMESA_EGL_NO_X11_HEADERS" fi -AC_ARG_WITH([max-width], - [AS_HELP_STRING([--with-max-width=N], - [Maximum framebuffer width (4096)])], - [DEFINES="${DEFINES} -DMAX_WIDTH=${withval}"; - AS_IF([test "${withval}" -gt "4096"], - [AC_MSG_WARN([Large framebuffer: see s_tritemp.h comments.])])] -) -AC_ARG_WITH([max-height], - [AS_HELP_STRING([--with-max-height=N], - [Maximum framebuffer height (4096)])], - [DEFINES="${DEFINES} -DMAX_HEIGHT=${withval}"; - AS_IF([test "${withval}" -gt "4096"], - [AC_MSG_WARN([Large framebuffer: see s_tritemp.h comments.])])] -) - dnl dnl Gallium LLVM dnl diff --git a/include/GL/osmesa.h b/include/GL/osmesa.h index 16ee89a..ca0d167 100644 --- a/include/GL/osmesa.h +++ b/include/GL/osmesa.h @@ -41,10 +41,8 @@ * OSMesaGetIntegerv - return OSMesa state parameters * * - * The limits on the width and height of an image buffer are MAX_WIDTH and - * MAX_HEIGHT as defined in Mesa/src/config.h. Defaults are 1280 and 1024. - * You can increase them as needed but beware that many temporary arrays in - * Mesa are dimensioned by MAX_WIDTH or MAX_HEIGHT. + * The limits on the width and height of an image buffer can be retrieved + * via OSMesaGetIntegerv(OSMESA_MAX_WIDTH/OSMESA_MAX_HEIGHT). */ diff --git a/src/gallium/state_trackers/glx/xlib/glx_api.c b/src/gallium/state_trackers/glx/xlib/glx_api.c index f9572b7..0508255 100644 --- a/src/gallium/state_trackers/glx/xlib/glx_api.c +++ b/src/gallium/state_trackers/glx/xlib/glx_api.c @@ -1636,7 +1636,7 @@ get_config( XMesaVisual xmvis, int attrib, int *value, GLboolean fbconfig ) case GLX_MAX_PBUFFER_WIDTH: if (!fbconfig) return GLX_BAD_ATTRIBUTE; - /* XXX or MAX_WIDTH? */ + /* XXX should be same as ctx->Const.MaxRenderbufferSize */ *value = DisplayWidth(xmvis->display, xmvis->visinfo->screen); break; case GLX_MAX_PBUFFER_HEIGHT: diff --git a/src/mesa/drivers/x11/fakeglx.c b/src/mesa/drivers/x11/fakeglx.c index 4fd6d75..9286f71 100644 --- a/src/mesa/drivers/x11/fakeglx.c +++ b/src/mesa/drivers/x11/fakeglx.c @@ -1720,7 +1720,7 @@ get_config( XMesaVisual xmvis, int attrib, int *value, GLboolean fbconfig ) case GLX_MAX_PBUFFER_WIDTH: if (!fbconfig) return GLX_BAD_ATTRIBUTE; - /* XXX or MAX_WIDTH? */ + /* XXX should be same as ctx->Const.MaxRenderbufferSize */ *value = DisplayWidth(xmvis->display, xmvis->visinfo->screen); break; case GLX_MAX_PBUFFER_HEIGHT: From jekstrand at kemper.freedesktop.org Wed Apr 1 19:50:52 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 1 Apr 2015 12:50:52 -0700 (PDT) Subject: Mesa (master): nir/print: Don't print extra swizzzle components Message-ID: <20150401195052.3414176332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 793a94d6b5fc589ca8d7475347def4e222cd3d7c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=793a94d6b5fc589ca8d7475347def4e222cd3d7c Author: Jason Ekstrand Date: Mon Mar 23 18:20:21 2015 -0700 nir/print: Don't print extra swizzzle components Previously, NIR would just print 4 swizzle components if the swizzle was anything other than foo.xyzw. This creates lots of noise if, for example, you have a one-component element with a swizzle of foo.xxxx. Reviewed-by: Kenneth Grunke --- src/glsl/nir/nir_print.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c index fa11a31..53fada8 100644 --- a/src/glsl/nir/nir_print.c +++ b/src/glsl/nir/nir_print.c @@ -137,7 +137,7 @@ print_dest(nir_dest *dest, FILE *fp) } static void -print_alu_src(nir_alu_src *src, FILE *fp) +print_alu_src(nir_alu_src *src, uint8_t read_mask, FILE *fp) { if (src->negate) fprintf(fp, "-"); @@ -146,13 +146,25 @@ print_alu_src(nir_alu_src *src, FILE *fp) print_src(&src->src, fp); - if (src->swizzle[0] != 0 || - src->swizzle[1] != 1 || - src->swizzle[2] != 2 || - src->swizzle[3] != 3) { + bool print_swizzle = false; + for (unsigned i = 0; i < 4; i++) { + if (read_mask >> i == 0) + break; + + if (src->swizzle[i] != i) { + print_swizzle = true; + break; + } + } + + if (print_swizzle) { fprintf(fp, "."); - for (unsigned i = 0; i < 4; i++) + for (unsigned i = 0; i < 4; i++) { + if (read_mask >> i == 0) + break; + fprintf(fp, "%c", "xyzw"[src->swizzle[i]]); + } } if (src->abs) @@ -189,7 +201,7 @@ print_alu_instr(nir_alu_instr *instr, FILE *fp) if (i != 0) fprintf(fp, ", "); - print_alu_src(&instr->src[i], fp); + print_alu_src(&instr->src[i], instr->dest.write_mask, fp); } } From jekstrand at kemper.freedesktop.org Wed Apr 1 20:28:26 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 1 Apr 2015 13:28:26 -0700 (PDT) Subject: Mesa (master): nir/algebraic: Remove a duplicate optimization Message-ID: <20150401202826.8EAF17635A@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 1779dc060fa8d55d979f887e41f1ec2c793859b7 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1779dc060fa8d55d979f887e41f1ec2c793859b7 Author: Jason Ekstrand Date: Wed Apr 1 12:24:37 2015 -0700 nir/algebraic: Remove a duplicate optimization This optimization is repeated verbatim above Reviewed-by: Matt Turner --- src/glsl/nir/nir_opt_algebraic.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py index 190bd91..ddf78be 100644 --- a/src/glsl/nir/nir_opt_algebraic.py +++ b/src/glsl/nir/nir_opt_algebraic.py @@ -172,9 +172,6 @@ optimizations = [ (('iadd', a, ('isub', 0, b)), ('isub', a, b)), (('fabs', ('fsub', 0.0, a)), ('fabs', a)), (('iabs', ('isub', 0, a)), ('iabs', a)), - -# This one may not be exact - (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))), ] # Add optimizations to handle the case where the result of a ternary is From jekstrand at kemper.freedesktop.org Wed Apr 1 20:28:26 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 1 Apr 2015 13:28:26 -0700 (PDT) Subject: Mesa (master): nir/algebraic: #define around structure definitions Message-ID: <20150401202826.8413676332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 22ee7eeb4e08ffdd8d53aa68e4d2b2c09f1721d0 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=22ee7eeb4e08ffdd8d53aa68e4d2b2c09f1721d0 Author: Jason Ekstrand Date: Mon Mar 23 17:22:44 2015 -0700 nir/algebraic: #define around structure definitions Previously, we couldn't generate two algebraic passes in the same file because of multiple structure definitions. To solve this, we play the age-old header file trick and just #define around it. Reviewed-by: Matt Turner --- src/glsl/nir/nir_algebraic.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/glsl/nir/nir_algebraic.py b/src/glsl/nir/nir_algebraic.py index afab1a0..bbf4f08 100644 --- a/src/glsl/nir/nir_algebraic.py +++ b/src/glsl/nir/nir_algebraic.py @@ -181,12 +181,23 @@ _algebraic_pass_template = mako.template.Template(""" #include "nir.h" #include "nir_search.h" +#ifndef NIR_OPT_ALGEBRAIC_STRUCT_DEFS +#define NIR_OPT_ALGEBRAIC_STRUCT_DEFS + struct transform { const nir_search_expression *search; const nir_search_value *replace; unsigned condition_offset; }; +struct opt_state { + void *mem_ctx; + bool progress; + const bool *condition_flags; +}; + +#endif + % for (opcode, xform_list) in xform_dict.iteritems(): % for xform in xform_list: ${xform.search.render()} @@ -200,12 +211,6 @@ static const struct transform ${pass_name}_${opcode}_xforms[] = { }; % endfor -struct opt_state { - void *mem_ctx; - bool progress; - const bool *condition_flags; -}; - static bool ${pass_name}_block(nir_block *block, void *void_state) { From jekstrand at kemper.freedesktop.org Wed Apr 1 20:28:26 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 1 Apr 2015 13:28:26 -0700 (PDT) Subject: Mesa (master): nir: Move the compare-with-zero optimizations to the late section Message-ID: <20150401202826.A89B376332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: e06a3d0282ef018ae4a28b091a632c6a2e8c7c52 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e06a3d0282ef018ae4a28b091a632c6a2e8c7c52 Author: Jason Ekstrand Date: Mon Mar 23 17:36:22 2015 -0700 nir: Move the compare-with-zero optimizations to the late section total instructions in shared programs: 4422307 -> 4422363 (0.00%) instructions in affected programs: 4230 -> 4286 (1.32%) helped: 0 HURT: 12 While this does hurt some things, the losses are minor and it prevents the compare-with-zero optimization from fighting with ffma which is much more important. Reviewed-by: Matt Turner --- src/glsl/nir/nir_opt_algebraic.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py index 2079235..39f37bb 100644 --- a/src/glsl/nir/nir_opt_algebraic.py +++ b/src/glsl/nir/nir_opt_algebraic.py @@ -83,10 +83,6 @@ optimizations = [ (('inot', ('fge', a, b)), ('flt', a, b)), (('inot', ('ilt', a, b)), ('ige', a, b)), (('inot', ('ige', a, b)), ('ilt', a, b)), - (('flt', ('fadd', a, b), 0.0), ('flt', a, ('fneg', b))), - (('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))), - (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))), - (('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))), (('fge', ('fneg', ('fabs', a)), 0.0), ('feq', a, 0.0)), (('bcsel', ('flt', a, b), a, b), ('fmin', a, b)), (('bcsel', ('flt', a, b), b, a), ('fmax', a, b)), @@ -200,6 +196,10 @@ for op in ['flt', 'fge', 'feq', 'fne', # they help code generation but do not necessarily produce code that is # more easily optimizable. late_optimizations = [ + (('flt', ('fadd', a, b), 0.0), ('flt', a, ('fneg', b))), + (('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))), + (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))), + (('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))), ] print nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render() From jekstrand at kemper.freedesktop.org Wed Apr 1 20:28:26 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 1 Apr 2015 13:28:26 -0700 (PDT) Subject: Mesa (master): nir/algebraic: Add a seperate section for "late" optimizations Message-ID: <20150401202826.9D2E37635B@kemper.freedesktop.org> Module: Mesa Branch: master Commit: da294f9b2f666f487001b2a25627c867c40eb3d9 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=da294f9b2f666f487001b2a25627c867c40eb3d9 Author: Jason Ekstrand Date: Mon Mar 23 17:11:49 2015 -0700 nir/algebraic: Add a seperate section for "late" optimizations i965/nir: Use the late optimizations Reviewed-by: Matt Turner --- src/glsl/nir/nir.h | 1 + src/glsl/nir/nir_opt_algebraic.py | 9 +++++++++ src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 3 +++ 3 files changed, 13 insertions(+) diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 7b886e3..24deb82 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1613,6 +1613,7 @@ void nir_convert_to_ssa(nir_shader *shader); void nir_convert_from_ssa(nir_shader *shader); bool nir_opt_algebraic(nir_shader *shader); +bool nir_opt_algebraic_late(nir_shader *shader); bool nir_opt_constant_folding(nir_shader *shader); bool nir_opt_global_to_local(nir_shader *shader); diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py index ddf78be..2079235 100644 --- a/src/glsl/nir/nir_opt_algebraic.py +++ b/src/glsl/nir/nir_opt_algebraic.py @@ -195,4 +195,13 @@ for op in ['flt', 'fge', 'feq', 'fne', ('bcsel', 'a', (op, 'd', 'b'), (op, 'd', 'c'))), ] +# This section contains "late" optimizations that should be run after the +# regular optimizations have finished. Optimizations should go here if +# they help code generation but do not necessarily produce code that is +# more easily optimizable. +late_optimizations = [ +] + print nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render() +print nir_algebraic.AlgebraicPass("nir_opt_algebraic_late", + late_optimizations).render() diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 21e52fe..204e713 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -147,6 +147,9 @@ fs_visitor::emit_nir_code() nir_optimize(nir); + nir_opt_algebraic_late(nir); + nir_validate_shader(nir); + nir_lower_locals_to_regs(nir); nir_validate_shader(nir); From jekstrand at kemper.freedesktop.org Wed Apr 1 20:28:26 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 1 Apr 2015 13:28:26 -0700 (PDT) Subject: Mesa (master): nir: Add a dedicated ffma peephole optimization Message-ID: <20150401202826.B7AC176332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a8c8b3b8720bb7ce8ac1cb94815ed36d8c881f66 Author: Jason Ekstrand Date: Sat Mar 21 12:21:21 2015 -0700 nir: Add a dedicated ffma peephole optimization i965/nir: Use the dedicated ffma peephole total instructions in shared programs: 4418748 -> 4394618 (-0.55%) instructions in affected programs: 1292790 -> 1268660 (-1.87%) helped: 5999 HURT: 457 GAINED: 4 LOST: 9 Reviewed-by: Matt Turner --- src/glsl/Makefile.sources | 1 + src/glsl/nir/nir_opt_peephole_ffma.c | 220 ++++++++++++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 2 + 3 files changed, 223 insertions(+) diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index fa5d991..b56fa26 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -48,6 +48,7 @@ NIR_FILES = \ nir/nir_opt_dce.c \ nir/nir_opt_gcm.c \ nir/nir_opt_global_to_local.c \ + nir/nir_opt_peephole_ffma.c \ nir/nir_opt_peephole_select.c \ nir/nir_opt_remove_phis.c \ nir/nir_print.c \ diff --git a/src/glsl/nir/nir_opt_peephole_ffma.c b/src/glsl/nir/nir_opt_peephole_ffma.c new file mode 100644 index 0000000..1ba4ac2 --- /dev/null +++ b/src/glsl/nir/nir_opt_peephole_ffma.c @@ -0,0 +1,220 @@ +/* + * Copyright ? 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason at jlekstrand.net) + * + */ + +#include "nir.h" + +/* + * Implements a small peephole optimization that looks for a multiply that + * is only ever used in an add and replaces both with an fma. + */ + +struct peephole_ffma_state { + void *mem_ctx; + nir_function_impl *impl; + bool progress; +}; + +static nir_alu_instr * +get_mul_for_src(nir_alu_src *src, uint8_t swizzle[4], bool *negate, bool *abs) +{ + assert(src->src.is_ssa && !src->abs && !src->negate); + + nir_instr *instr = src->src.ssa->parent_instr; + if (instr->type != nir_instr_type_alu) + return NULL; + + nir_alu_instr *alu = nir_instr_as_alu(instr); + switch (alu->op) { + case nir_op_imov: + case nir_op_fmov: + alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs); + break; + + case nir_op_fneg: + alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs); + *negate = !*negate; + break; + + case nir_op_fabs: + alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs); + *negate = false; + *abs = true; + break; + + case nir_op_fmul: + break; + + default: + return NULL; + } + + if (!alu) + return NULL; + + for (unsigned i = 0; i < 4; i++) { + if (!(alu->dest.write_mask & (1 << i))) + break; + + swizzle[i] = swizzle[src->swizzle[i]]; + } + + return alu; +} + +static bool +nir_opt_peephole_ffma_block(nir_block *block, void *void_state) +{ + struct peephole_ffma_state *state = void_state; + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_alu) + continue; + + nir_alu_instr *add = nir_instr_as_alu(instr); + if (add->op != nir_op_fadd) + continue; + + /* TODO: Maybe bail if this expression is considered "precise"? */ + + assert(add->src[0].src.is_ssa && add->src[1].src.is_ssa); + + /* This, is the case a + a. We would rather handle this with an + * algebraic reduction than fuse it. Also, we want to only fuse + * things where the multiply is used only once and, in this case, + * it would be used twice by the same instruction. + */ + if (add->src[0].src.ssa == add->src[1].src.ssa) + continue; + + nir_alu_instr *mul; + uint8_t add_mul_src, swizzle[4]; + bool negate, abs; + for (add_mul_src = 0; add_mul_src < 2; add_mul_src++) { + for (unsigned i = 0; i < 4; i++) + swizzle[i] = i; + + negate = false; + abs = false; + + mul = get_mul_for_src(&add->src[add_mul_src], swizzle, &negate, &abs); + + if (mul != NULL) + break; + } + + if (mul == NULL) + continue; + + nir_ssa_def *mul_src[2]; + mul_src[0] = mul->src[0].src.ssa; + mul_src[1] = mul->src[1].src.ssa; + + if (abs) { + for (unsigned i = 0; i < 2; i++) { + nir_alu_instr *abs = nir_alu_instr_create(state->mem_ctx, + nir_op_fabs); + abs->src[0].src = nir_src_for_ssa(mul_src[i]); + nir_ssa_dest_init(&abs->instr, &abs->dest.dest, + mul_src[i]->num_components, NULL); + abs->dest.write_mask = (1 << mul_src[i]->num_components) - 1; + nir_instr_insert_before(&add->instr, &abs->instr); + mul_src[i] = &abs->dest.dest.ssa; + } + } + + if (negate) { + nir_alu_instr *neg = nir_alu_instr_create(state->mem_ctx, + nir_op_fneg); + neg->src[0].src = nir_src_for_ssa(mul_src[0]); + nir_ssa_dest_init(&neg->instr, &neg->dest.dest, + mul_src[0]->num_components, NULL); + neg->dest.write_mask = (1 << mul_src[0]->num_components) - 1; + nir_instr_insert_before(&add->instr, &neg->instr); + mul_src[0] = &neg->dest.dest.ssa; + } + + nir_alu_instr *ffma = nir_alu_instr_create(state->mem_ctx, nir_op_ffma); + ffma->dest.saturate = add->dest.saturate; + ffma->dest.write_mask = add->dest.write_mask; + + for (unsigned i = 0; i < 2; i++) { + ffma->src[i].src = nir_src_for_ssa(mul_src[i]); + for (unsigned j = 0; j < add->dest.dest.ssa.num_components; j++) + ffma->src[i].swizzle[j] = mul->src[i].swizzle[swizzle[j]]; + } + nir_alu_src_copy(&ffma->src[2], &add->src[1 - add_mul_src], + state->mem_ctx); + + assert(add->dest.dest.is_ssa); + + nir_ssa_dest_init(&ffma->instr, &ffma->dest.dest, + add->dest.dest.ssa.num_components, + add->dest.dest.ssa.name); + nir_ssa_def_rewrite_uses(&add->dest.dest.ssa, + nir_src_for_ssa(&ffma->dest.dest.ssa), + state->mem_ctx); + + nir_instr_insert_before(&add->instr, &ffma->instr); + assert(add->dest.dest.ssa.uses->entries == 0); + nir_instr_remove(&add->instr); + + state->progress = true; + } + + return true; +} + +static bool +nir_opt_peephole_ffma_impl(nir_function_impl *impl) +{ + struct peephole_ffma_state state; + + state.mem_ctx = ralloc_parent(impl); + state.impl = impl; + state.progress = false; + + nir_foreach_block(impl, nir_opt_peephole_ffma_block, &state); + + if (state.progress) + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + + return state.progress; +} + +bool +nir_opt_peephole_ffma(nir_shader *shader) +{ + bool progress = false; + + nir_foreach_overload(shader, overload) { + if (overload->impl) + progress |= nir_opt_peephole_ffma_impl(overload->impl); + } + + return progress; +} diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 204e713..4f4b746 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -52,6 +52,8 @@ nir_optimize(nir_shader *nir) nir_validate_shader(nir); progress |= nir_opt_algebraic(nir); nir_validate_shader(nir); + progress |= nir_opt_peephole_ffma(nir); + nir_validate_shader(nir); progress |= nir_opt_constant_folding(nir); nir_validate_shader(nir); progress |= nir_opt_remove_phis(nir); From jekstrand at kemper.freedesktop.org Wed Apr 1 20:28:26 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 1 Apr 2015 13:28:26 -0700 (PDT) Subject: Mesa (master): nir/peephole_ffma: Be less agressive about fusing multiply-adds Message-ID: <20150401202826.C3C5E76332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 7f344721b1a94a6166b53f959ff6b159af3b5f9a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7f344721b1a94a6166b53f959ff6b159af3b5f9a Author: Jason Ekstrand Date: Mon Mar 23 14:55:20 2015 -0700 nir/peephole_ffma: Be less agressive about fusing multiply-adds shader-db results for fragment shaders on Haswell: total instructions in shared programs: 4395688 -> 4389623 (-0.14%) instructions in affected programs: 355876 -> 349811 (-1.70%) helped: 1455 HURT: 14 GAINED: 5 LOST: 0 Reviewed-by: Matt Turner --- src/glsl/nir/nir_opt_peephole_ffma.c | 41 ++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/src/glsl/nir/nir_opt_peephole_ffma.c b/src/glsl/nir/nir_opt_peephole_ffma.c index 1ba4ac2..bf4dbe1 100644 --- a/src/glsl/nir/nir_opt_peephole_ffma.c +++ b/src/glsl/nir/nir_opt_peephole_ffma.c @@ -38,6 +38,41 @@ struct peephole_ffma_state { bool progress; }; +static inline bool +are_all_uses_fadd(nir_ssa_def *def) +{ + if (def->if_uses->entries > 0) + return false; + + struct set_entry *use_iter; + set_foreach(def->uses, use_iter) { + nir_instr *use_instr = (nir_instr *)use_iter->key; + + if (use_instr->type != nir_instr_type_alu) + return false; + + nir_alu_instr *use_alu = nir_instr_as_alu(use_instr); + switch (use_alu->op) { + case nir_op_fadd: + break; /* This one's ok */ + + case nir_op_imov: + case nir_op_fmov: + case nir_op_fneg: + case nir_op_fabs: + assert(use_alu->dest.dest.is_ssa); + if (!are_all_uses_fadd(&use_alu->dest.dest.ssa)) + return false; + break; + + default: + return false; + } + } + + return true; +} + static nir_alu_instr * get_mul_for_src(nir_alu_src *src, uint8_t swizzle[4], bool *negate, bool *abs) { @@ -66,6 +101,12 @@ get_mul_for_src(nir_alu_src *src, uint8_t swizzle[4], bool *negate, bool *abs) break; case nir_op_fmul: + /* Only absorbe a fmul into a ffma if the fmul is is only used in fadd + * operations. This prevents us from being too agressive with our + * fusing which can actually lead to more instructions. + */ + if (!are_all_uses_fadd(&alu->dest.dest.ssa)) + return NULL; break; default: From jekstrand at kemper.freedesktop.org Wed Apr 1 20:28:26 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 1 Apr 2015 13:28:26 -0700 (PDT) Subject: Mesa (master): i965/nir: Run the ffma peephole after the rest of the optimizations Message-ID: <20150401202826.D1D6776332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 37703040a142da6bc7c458479a70e35118e10e6b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=37703040a142da6bc7c458479a70e35118e10e6b Author: Jason Ekstrand Date: Mon Mar 23 15:08:31 2015 -0700 i965/nir: Run the ffma peephole after the rest of the optimizations The idea here is that fusing multiply-add combinations too early can reduce our ability to perform CSE and value-numbering. Instead, we split ffma opcodes up-front, hope CSE cleans up, and then fuse after-the-fact. Unless an algebraic pass does something silly where it inserts something between the multiply and the add, splitting and re-fusing should never cause a problem. We run the late algebraic optimizations after this so that things like compare-with-zero don't hurt our ability to fuse things. shader-db results for fragment shaders on Haswell: total instructions in shared programs: 4390538 -> 4379236 (-0.26%) instructions in affected programs: 989359 -> 978057 (-1.14%) helped: 5308 HURT: 97 GAINED: 78 LOST: 5 This does, unfortunately, cause some substantial hurt to a shader in Kerbal Space Program. However, the damage is caused by changing a single instruction from a ffma to an add. This, in turn, *decreases* register pressure in one part of the program causing it to fail to register allocate and spill. Given the overwhelmingly positive results in other shaders and the fact that the NIR for the Kerbal shaders is actually better, this should be considered a positive. Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_context.c | 5 +++++ src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 8 ++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index ed6fdff..21c8bd3 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -558,6 +558,11 @@ brw_initialize_context_constants(struct brw_context *brw) static const nir_shader_compiler_options gen6_nir_options = { .native_integers = true, + /* In order to help allow for better CSE at the NIR level we tell NIR + * to split all ffma instructions during opt_algebraic and we then + * re-combine them as a later step. + */ + .lower_ffma = true, }; /* We want the GLSL compiler to emit code that uses condition codes */ diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 4f4b746..94641cf 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -52,8 +52,6 @@ nir_optimize(nir_shader *nir) nir_validate_shader(nir); progress |= nir_opt_algebraic(nir); nir_validate_shader(nir); - progress |= nir_opt_peephole_ffma(nir); - nir_validate_shader(nir); progress |= nir_opt_constant_folding(nir); nir_validate_shader(nir); progress |= nir_opt_remove_phis(nir); @@ -149,6 +147,12 @@ fs_visitor::emit_nir_code() nir_optimize(nir); + if (brw->gen >= 6) { + /* Try and fuse multiply-adds */ + nir_opt_peephole_ffma(nir); + nir_validate_shader(nir); + } + nir_opt_algebraic_late(nir); nir_validate_shader(nir); From jekstrand at kemper.freedesktop.org Wed Apr 1 20:28:26 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 1 Apr 2015 13:28:26 -0700 (PDT) Subject: Mesa (master): i965/nir: Run DCE again before going out of SSA Message-ID: <20150401202826.E150B76332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: b9d7454571029ab330f28164fe6869f5e455ca90 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b9d7454571029ab330f28164fe6869f5e455ca90 Author: Jason Ekstrand Date: Wed Apr 1 12:37:43 2015 -0700 i965/nir: Run DCE again before going out of SSA We run lowering and optimization passes that might leave garbage lying around. This keeps the FS cse from having to clean it up. Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 94641cf..4dfb4d6 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -163,6 +163,8 @@ fs_visitor::emit_nir_code() nir_validate_shader(nir); nir_copy_prop(nir); nir_validate_shader(nir); + nir_opt_dce(nir); + nir_validate_shader(nir); if (unlikely(debug_enabled)) { fprintf(stderr, "NIR (SSA form) for %s shader:\n", stage_name); From jekstrand at kemper.freedesktop.org Wed Apr 1 20:28:26 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 1 Apr 2015 13:28:26 -0700 (PDT) Subject: Mesa (master): i965: Use the same nir options for all gens Message-ID: <20150401202826.EEF9C76332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 218e45e2f7b2d6c20bbba837f6e5cbe15610771e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=218e45e2f7b2d6c20bbba837f6e5cbe15610771e Author: Jason Ekstrand Date: Mon Mar 23 15:58:34 2015 -0700 i965: Use the same nir options for all gens If we tell NIR to split ffma's, then we don't need seperate options anymore. Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_context.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 21c8bd3..84818f0 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -551,12 +551,7 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128; } - static const nir_shader_compiler_options gen4_nir_options = { - .native_integers = true, - .lower_ffma = true, - }; - - static const nir_shader_compiler_options gen6_nir_options = { + static const nir_shader_compiler_options nir_options = { .native_integers = true, /* In order to help allow for better CSE at the NIR level we tell NIR * to split all ffma instructions during opt_algebraic and we then @@ -578,10 +573,7 @@ brw_initialize_context_constants(struct brw_context *brw) (i == MESA_SHADER_FRAGMENT); ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectUniform = false; ctx->Const.ShaderCompilerOptions[i].LowerClipDistance = true; - if (brw->gen >= 6) - ctx->Const.ShaderCompilerOptions[i].NirOptions = &gen6_nir_options; - else - ctx->Const.ShaderCompilerOptions[i].NirOptions = &gen4_nir_options; + ctx->Const.ShaderCompilerOptions[i].NirOptions = &nir_options; } ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = true; From kwg at kemper.freedesktop.org Wed Apr 1 20:35:22 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Wed, 1 Apr 2015 13:35:22 -0700 (PDT) Subject: Mesa (master): mesa: Implement _mesa_flsll(). Message-ID: <20150401203522.9C7017635A@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 3d166b313db14523c2e618e0ebf22b83c86d6334 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3d166b313db14523c2e618e0ebf22b83c86d6334 Author: Kenneth Graunke Date: Mon Mar 30 16:08:26 2015 -0700 mesa: Implement _mesa_flsll(). This is _mesa_fls() for 64-bit values. Signed-off-by: Kenneth Graunke Reviewed-by: Matt Turner --- src/mesa/main/imports.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/mesa/main/imports.h b/src/mesa/main/imports.h index 29f2499..c4d917e 100644 --- a/src/mesa/main/imports.h +++ b/src/mesa/main/imports.h @@ -433,6 +433,30 @@ _mesa_fls(unsigned int n) #endif } +/** + * Find the last (most significant) bit set in a uint64_t value. + * + * Essentially ffsll() in the reverse direction. + */ +static inline unsigned int +_mesa_flsll(uint64_t n) +{ +#ifdef HAVE___BUILTIN_CLZLL + return n == 0 ? 0 : 64 - __builtin_clzll(n); +#else + unsigned int v = 1; + + if (n == 0) + return 0; + + while (n >>= 1) + v++; + + return v; +#endif +} + + extern GLhalfARB _mesa_float_to_half(float f); From kwg at kemper.freedesktop.org Wed Apr 1 20:35:22 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Wed, 1 Apr 2015 13:35:22 -0700 (PDT) Subject: Mesa (master): nir: Use _mesa_flsll(InputsRead) in prog->nir. Message-ID: <20150401203522.A5B2976332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 25e214db0094306835a03225e1a37164c7c98bf7 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=25e214db0094306835a03225e1a37164c7c98bf7 Author: Kenneth Graunke Date: Mon Mar 30 16:09:51 2015 -0700 nir: Use _mesa_flsll(InputsRead) in prog->nir. InputsRead is a 64-bit bitfield. Using _mesa_fls would silently truncate off the high bits, claiming inputs 32..56 (VARYING_SLOT_MAX) were never read. Using <= here was a hack I threw in at the last minute to fix programs which happened to use input slot 32. Switch back to using < now that the underlying problem is fixed. Fixes crashes in "Euro Truck Simulator 2" when using prog->nir, which uses input slot 33. Signed-off-by: Kenneth Graunke Reviewed-by: Matt Turner --- src/mesa/program/prog_to_nir.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c index 4e210d1..5f00a8b 100644 --- a/src/mesa/program/prog_to_nir.c +++ b/src/mesa/program/prog_to_nir.c @@ -931,8 +931,8 @@ setup_registers_and_variables(struct ptn_compile *c) struct nir_shader *shader = b->shader; /* Create input variables. */ - const int last_input = _mesa_fls(c->prog->InputsRead); - for (int i = 0; i <= last_input; i++) { + const int num_inputs = _mesa_flsll(c->prog->InputsRead); + for (int i = 0; i < num_inputs; i++) { if (!(c->prog->InputsRead & BITFIELD64_BIT(i))) continue; nir_variable *var = rzalloc(shader, nir_variable); From kwg at kemper.freedesktop.org Wed Apr 1 20:35:22 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Wed, 1 Apr 2015 13:35:22 -0700 (PDT) Subject: Mesa (master): nir: In prog->nir, don't wrap dot products with ptn_channel (..., X). Message-ID: <20150401203522.9470B76332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 4b38c5c783e8858f362bb1ff6cf651875bd0bbc5 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4b38c5c783e8858f362bb1ff6cf651875bd0bbc5 Author: Kenneth Graunke Date: Mon Mar 30 05:17:56 2015 -0700 nir: In prog->nir, don't wrap dot products with ptn_channel(..., X). ptn_move_dest and nir_fadd already take care of replicating the last channel out, so we can just use a scalar and skip splatting it. Signed-off-by: Kenneth Graunke Reviewed-by: Eric Anholt --- src/mesa/program/prog_to_nir.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c index d0e6110..4e210d1 100644 --- a/src/mesa/program/prog_to_nir.c +++ b/src/mesa/program/prog_to_nir.c @@ -499,25 +499,25 @@ ptn_xpd(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) static void ptn_dp2(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) { - ptn_move_dest(b, dest, ptn_channel(b, nir_fdot2(b, src[0], src[1]), X)); + ptn_move_dest(b, dest, nir_fdot2(b, src[0], src[1])); } static void ptn_dp3(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) { - ptn_move_dest(b, dest, ptn_channel(b, nir_fdot3(b, src[0], src[1]), X)); + ptn_move_dest(b, dest, nir_fdot3(b, src[0], src[1])); } static void ptn_dp4(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) { - ptn_move_dest(b, dest, ptn_channel(b, nir_fdot4(b, src[0], src[1]), X)); + ptn_move_dest(b, dest, nir_fdot4(b, src[0], src[1])); } static void ptn_dph(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) { - nir_ssa_def *dp3 = ptn_channel(b, nir_fdot3(b, src[0], src[1]), X); + nir_ssa_def *dp3 = nir_fdot3(b, src[0], src[1]); ptn_move_dest(b, dest, nir_fadd(b, dp3, ptn_channel(b, src[1], W))); } From mattst88 at kemper.freedesktop.org Wed Apr 1 20:53:47 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Wed, 1 Apr 2015 13:53:47 -0700 (PDT) Subject: Mesa (master): nir: Add identities for the log function. Message-ID: <20150401205347.E46B376332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 099c729b4cb6863e8ddbdb9afe7fd7bd53c11ee1 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=099c729b4cb6863e8ddbdb9afe7fd7bd53c11ee1 Author: Matt Turner Date: Thu Mar 26 10:07:58 2015 -0700 nir: Add identities for the log function. The rcp(log(x)) pattern affects instruction counts. instructions in affected programs: 144 -> 138 (-4.17%) helped: 6 Reviewed-by: Eric Anholt --- src/glsl/nir/nir_opt_algebraic.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py index db2fe03..185c291 100644 --- a/src/glsl/nir/nir_opt_algebraic.py +++ b/src/glsl/nir/nir_opt_algebraic.py @@ -145,6 +145,14 @@ optimizations = [ (('frcp', ('fexp', a)), ('fexp', ('fneg', a))), (('frsq', ('fexp2', a)), ('fexp2', ('fmul', -0.5, a))), (('frsq', ('fexp', a)), ('fexp', ('fmul', -0.5, a))), + (('flog2', ('fsqrt', a)), ('fmul', 0.5, ('flog2', a))), + (('flog', ('fsqrt', a)), ('fmul', 0.5, ('flog', a))), + (('flog2', ('frcp', a)), ('fneg', ('flog2', a))), + (('flog', ('frcp', a)), ('fneg', ('flog', a))), + (('flog2', ('frsq', a)), ('fmul', -0.5, ('flog2', a))), + (('flog', ('frsq', a)), ('fmul', -0.5, ('flog', a))), + (('flog2', ('fpow', a, b)), ('fmul', b, ('flog2', a))), + (('flog', ('fpow', a, b)), ('fmul', b, ('flog', a))), # Division and reciprocal (('fdiv', 1.0, a), ('frcp', a)), (('frcp', ('frcp', a)), a), From mattst88 at kemper.freedesktop.org Wed Apr 1 20:53:47 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Wed, 1 Apr 2015 13:53:47 -0700 (PDT) Subject: Mesa (master): nir: Recognize open coded lrp. Message-ID: <20150401205347.C435776332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: e82437e14153af212006deee4a6b808091314482 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e82437e14153af212006deee4a6b808091314482 Author: Matt Turner Date: Wed Mar 25 15:03:52 2015 -0700 nir: Recognize open coded lrp. total instructions in shared programs: 6197614 -> 6195924 (-0.03%) instructions in affected programs: 34773 -> 33083 (-4.86%) helped: 147 HURT: 6 Reviewed-by: Eric Anholt --- src/glsl/nir/nir_opt_algebraic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py index 39f37bb..368ec7f 100644 --- a/src/glsl/nir/nir_opt_algebraic.py +++ b/src/glsl/nir/nir_opt_algebraic.py @@ -76,6 +76,7 @@ optimizations = [ (('flrp', a, a, b), a), (('flrp', 0.0, a, b), ('fmul', a, b)), (('flrp', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp'), + (('fadd', ('fmul', a, ('fadd', 1.0, ('fneg', c))), ('fmul', b, c)), ('flrp', a, b, c), '!options->lower_flrp'), (('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'), (('fadd', ('fmul', a, b), c), ('ffma', a, b, c), '!options->lower_ffma'), # Comparison simplifications From mattst88 at kemper.freedesktop.org Wed Apr 1 20:53:47 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Wed, 1 Apr 2015 13:53:47 -0700 (PDT) Subject: Mesa (master): nir: Recognize another open coded lrp. Message-ID: <20150401205347.CDCB97635A@kemper.freedesktop.org> Module: Mesa Branch: master Commit: e26783d4459583790e50e1f59eb2507f618695c6 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e26783d4459583790e50e1f59eb2507f618695c6 Author: Matt Turner Date: Thu Mar 26 10:36:42 2015 -0700 nir: Recognize another open coded lrp. total instructions in shared programs: 6195924 -> 6195768 (-0.00%) instructions in affected programs: 4876 -> 4720 (-3.20%) helped: 58 HURT: 10 Reviewed-by: Eric Anholt --- src/glsl/nir/nir_opt_algebraic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py index 368ec7f..5b84a4e 100644 --- a/src/glsl/nir/nir_opt_algebraic.py +++ b/src/glsl/nir/nir_opt_algebraic.py @@ -77,6 +77,7 @@ optimizations = [ (('flrp', 0.0, a, b), ('fmul', a, b)), (('flrp', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp'), (('fadd', ('fmul', a, ('fadd', 1.0, ('fneg', c))), ('fmul', b, c)), ('flrp', a, b, c), '!options->lower_flrp'), + (('fadd', a, ('fmul', c, ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp'), (('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'), (('fadd', ('fmul', a, b), c), ('ffma', a, b, c), '!options->lower_ffma'), # Comparison simplifications From mattst88 at kemper.freedesktop.org Wed Apr 1 20:53:47 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Wed, 1 Apr 2015 13:53:47 -0700 (PDT) Subject: Mesa (master): nir: Add identities for the exponential function. Message-ID: <20150401205347.D907176332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 8a6ae384b2ae95f047e9d48a5a4c11002ffdd814 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8a6ae384b2ae95f047e9d48a5a4c11002ffdd814 Author: Matt Turner Date: Thu Mar 26 10:16:24 2015 -0700 nir: Add identities for the exponential function. No changes in shader-db. Reviewed-by: Eric Anholt --- src/glsl/nir/nir_opt_algebraic.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py index 5b84a4e..db2fe03 100644 --- a/src/glsl/nir/nir_opt_algebraic.py +++ b/src/glsl/nir/nir_opt_algebraic.py @@ -139,6 +139,12 @@ optimizations = [ (('fpow', a, 1.0), a), (('fpow', a, 2.0), ('fmul', a, a)), (('fpow', 2.0, a), ('fexp2', a)), + (('fsqrt', ('fexp2', a)), ('fexp2', ('fmul', 0.5, a))), + (('fsqrt', ('fexp', a)), ('fexp', ('fmul', 0.5, a))), + (('frcp', ('fexp2', a)), ('fexp2', ('fneg', a))), + (('frcp', ('fexp', a)), ('fexp', ('fneg', a))), + (('frsq', ('fexp2', a)), ('fexp2', ('fmul', -0.5, a))), + (('frsq', ('fexp', a)), ('fexp', ('fmul', -0.5, a))), # Division and reciprocal (('fdiv', 1.0, a), ('frcp', a)), (('frcp', ('frcp', a)), a), From mattst88 at kemper.freedesktop.org Wed Apr 1 20:53:48 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Wed, 1 Apr 2015 13:53:48 -0700 (PDT) Subject: Mesa (master): nir: Recognize (a < b || a < c) as a < max(b, c). Message-ID: <20150401205348.0858176332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 97e6c1b9579573444487d0ac6d9a6b73c067b495 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=97e6c1b9579573444487d0ac6d9a6b73c067b495 Author: Matt Turner Date: Thu Mar 26 10:09:21 2015 -0700 nir: Recognize (a < b || a < c) as a < max(b, c). Doesn't work for analogous && cases, because of NaNs. total instructions in shared programs: 6195712 -> 6194829 (-0.01%) instructions in affected programs: 42000 -> 41117 (-2.10%) helped: 403 Reviewed-by: Eric Anholt --- src/glsl/nir/nir_opt_algebraic.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py index 3a3e6bf..6181f5d 100644 --- a/src/glsl/nir/nir_opt_algebraic.py +++ b/src/glsl/nir/nir_opt_algebraic.py @@ -94,6 +94,8 @@ optimizations = [ (('fsat', a), ('fmin', ('fmax', a, 0.0), 1.0), 'options->lower_fsat'), (('fsat', ('fsat', a)), ('fsat', a)), (('fmin', ('fmax', ('fmin', ('fmax', a, 0.0), 1.0), 0.0), 1.0), ('fmin', ('fmax', a, 0.0), 1.0)), + (('ior', ('flt', a, b), ('flt', a, c)), ('flt', a, ('fmax', b, c))), + (('ior', ('fge', a, b), ('fge', a, c)), ('fge', a, ('fmin', b, c))), # Emulating booleans (('fmul', ('b2f', a), ('b2f', b)), ('b2f', ('iand', a, b))), (('fsat', ('fadd', ('b2f', a), ('b2f', b))), ('b2f', ('ior', a, b))), From mattst88 at kemper.freedesktop.org Wed Apr 1 20:53:48 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Wed, 1 Apr 2015 13:53:48 -0700 (PDT) Subject: Mesa (master): nir: Remove useless ftrunc inside f2i/f2u. Message-ID: <20150401205348.1385776332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 781badee7a46c7eb778fb2755d799151d8b748bf URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=781badee7a46c7eb778fb2755d799151d8b748bf Author: Matt Turner Date: Thu Mar 26 10:09:42 2015 -0700 nir: Remove useless ftrunc inside f2i/f2u. No shader-db changes, probably because they're all removed by the GLSL compiler optimization added in commit 69ad5fd4. Reviewed-by: Eric Anholt --- src/glsl/nir/nir_opt_algebraic.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py index 6181f5d..60d1160 100644 --- a/src/glsl/nir/nir_opt_algebraic.py +++ b/src/glsl/nir/nir_opt_algebraic.py @@ -181,6 +181,10 @@ optimizations = [ (('bcsel', a, b, b), b), (('fcsel', a, b, b), b), + # Conversions + (('f2i', ('ftrunc', a)), ('f2i', a)), + (('f2u', ('ftrunc', a)), ('f2u', a)), + # Subtracts (('fsub', a, ('fsub', 0.0, b)), ('fadd', a, b)), (('isub', a, ('isub', 0, b)), ('iadd', a, b)), From mattst88 at kemper.freedesktop.org Wed Apr 1 20:53:48 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Wed, 1 Apr 2015 13:53:48 -0700 (PDT) Subject: Mesa (master): i965/fs: Relax type check in cmod propagation. Message-ID: <20150401205348.1F10F76332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: a03d0ba78f33e43df2ca7a9e4c58fdc9fbc9876a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a03d0ba78f33e43df2ca7a9e4c58fdc9fbc9876a Author: Matt Turner Date: Thu Mar 26 10:09:54 2015 -0700 i965/fs: Relax type check in cmod propagation. The thing we want to avoid is int/float comparisons, but int/unsigned comparisons with 0 are equivalent. total instructions in shared programs: 6194829 -> 6193996 (-0.01%) instructions in affected programs: 117192 -> 116359 (-0.71%) helped: 471 Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp index 798fef3..469f2ea 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp @@ -111,7 +111,9 @@ opt_cmod_propagation_local(bblock_t *block) break; /* Comparisons operate differently for ints and floats */ - if (scan_inst->dst.type != inst->dst.type) + if (scan_inst->dst.type != inst->dst.type && + (scan_inst->dst.type == BRW_REGISTER_TYPE_F || + inst->dst.type == BRW_REGISTER_TYPE_F)) break; /* If the instruction generating inst's source also wrote the From mattst88 at kemper.freedesktop.org Wed Apr 1 20:53:47 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Wed, 1 Apr 2015 13:53:47 -0700 (PDT) Subject: Mesa (master): nir: Add addition/multiplication identities of exp/log. Message-ID: <20150401205347.EFAD076332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: a2b6e908cfa1b85bc3eed9d31869ec3768b3daa2 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a2b6e908cfa1b85bc3eed9d31869ec3768b3daa2 Author: Matt Turner Date: Thu Mar 26 11:08:01 2015 -0700 nir: Add addition/multiplication identities of exp/log. instructions in affected programs: 2858 -> 2808 (-1.75%) helped: 12 Reviewed-by: Eric Anholt --- src/glsl/nir/nir_opt_algebraic.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py index 185c291..3a3e6bf 100644 --- a/src/glsl/nir/nir_opt_algebraic.py +++ b/src/glsl/nir/nir_opt_algebraic.py @@ -153,6 +153,12 @@ optimizations = [ (('flog', ('frsq', a)), ('fmul', -0.5, ('flog', a))), (('flog2', ('fpow', a, b)), ('fmul', b, ('flog2', a))), (('flog', ('fpow', a, b)), ('fmul', b, ('flog', a))), + (('fadd', ('flog2', a), ('flog2', b)), ('flog2', ('fmul', a, b))), + (('fadd', ('flog', a), ('flog', b)), ('flog', ('fmul', a, b))), + (('fadd', ('flog2', a), ('fneg', ('flog2', b))), ('flog2', ('fdiv', a, b))), + (('fadd', ('flog', a), ('fneg', ('flog', b))), ('flog', ('fdiv', a, b))), + (('fmul', ('fexp2', a), ('fexp2', b)), ('fexp2', ('fadd', a, b))), + (('fmul', ('fexp', a), ('fexp', b)), ('fexp', ('fadd', a, b))), # Division and reciprocal (('fdiv', 1.0, a), ('frcp', a)), (('frcp', ('frcp', a)), a), From marcheu at kemper.freedesktop.org Thu Apr 2 03:13:42 2015 From: marcheu at kemper.freedesktop.org (Stephane Marchesin) Date: Wed, 1 Apr 2015 20:13:42 -0700 (PDT) Subject: Mesa (master): i915g: Implement EGL_EXT_image_dma_buf_import Message-ID: <20150402031342.3A0AB76332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 70eed78cacd711e663068e78a8430372cc5fabf1 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=70eed78cacd711e663068e78a8430372cc5fabf1 Author: St?phane Marchesin Date: Wed Apr 1 20:00:08 2015 -0700 i915g: Implement EGL_EXT_image_dma_buf_import This adds all the plumbing to get EGL_EXT_image_dma_buf_import in i915g. Signed-off-by: St?phane Marchesin --- .../auxiliary/target-helpers/inline_drm_helper.h | 2 +- src/gallium/drivers/i915/i915_resource_texture.c | 2 +- src/gallium/drivers/i915/i915_winsys.h | 1 + src/gallium/targets/pipe-loader/pipe_i915.c | 25 +++++++++++++++++++- src/gallium/winsys/i915/drm/i915_drm_buffer.c | 15 ++++++++---- 5 files changed, 38 insertions(+), 7 deletions(-) diff --git a/src/gallium/auxiliary/target-helpers/inline_drm_helper.h b/src/gallium/auxiliary/target-helpers/inline_drm_helper.h index 54c1c6c..542ad43 100644 --- a/src/gallium/auxiliary/target-helpers/inline_drm_helper.h +++ b/src/gallium/auxiliary/target-helpers/inline_drm_helper.h @@ -430,7 +430,7 @@ dd_configuration(enum drm_conf conf) #if defined(GALLIUM_I915) if (strcmp(driver_name, "i915") == 0) - return NULL; + return configuration_query(conf); else #endif #if defined(GALLIUM_ILO) diff --git a/src/gallium/drivers/i915/i915_resource_texture.c b/src/gallium/drivers/i915/i915_resource_texture.c index 36fb3e2..dc8f4d1c9 100644 --- a/src/gallium/drivers/i915/i915_resource_texture.c +++ b/src/gallium/drivers/i915/i915_resource_texture.c @@ -989,7 +989,7 @@ i915_texture_from_handle(struct pipe_screen * screen, assert(screen); - buffer = iws->buffer_from_handle(iws, whandle, &tiling, &stride); + buffer = iws->buffer_from_handle(iws, whandle, template->height0, &tiling, &stride); /* Only supports one type */ if ((template->target != PIPE_TEXTURE_2D && diff --git a/src/gallium/drivers/i915/i915_winsys.h b/src/gallium/drivers/i915/i915_winsys.h index 6cf802f..509e6cc 100644 --- a/src/gallium/drivers/i915/i915_winsys.h +++ b/src/gallium/drivers/i915/i915_winsys.h @@ -176,6 +176,7 @@ struct i915_winsys { struct i915_winsys_buffer * (*buffer_from_handle)(struct i915_winsys *iws, struct winsys_handle *whandle, + unsigned height, enum i915_winsys_buffer_tile *tiling, unsigned *stride); diff --git a/src/gallium/targets/pipe-loader/pipe_i915.c b/src/gallium/targets/pipe-loader/pipe_i915.c index 85662cb..b0da613 100644 --- a/src/gallium/targets/pipe-loader/pipe_i915.c +++ b/src/gallium/targets/pipe-loader/pipe_i915.c @@ -23,5 +23,28 @@ create_screen(int fd) return screen; } +static const struct drm_conf_ret throttle_ret = { + .type = DRM_CONF_INT, + .val.val_int = 2, +}; + +static const struct drm_conf_ret share_fd_ret = { + .type = DRM_CONF_BOOL, + .val.val_int = true, +}; + +static const struct drm_conf_ret *drm_configuration(enum drm_conf conf) +{ + switch (conf) { + case DRM_CONF_THROTTLE: + return &throttle_ret; + case DRM_CONF_SHARE_FD: + return &share_fd_ret; + default: + break; + } + return NULL; +} + PUBLIC -DRM_DRIVER_DESCRIPTOR("i915", "i915", create_screen, NULL) +DRM_DRIVER_DESCRIPTOR("i915", "i915", create_screen, drm_configuration) diff --git a/src/gallium/winsys/i915/drm/i915_drm_buffer.c b/src/gallium/winsys/i915/drm/i915_drm_buffer.c index 38e0619..c069852 100644 --- a/src/gallium/winsys/i915/drm/i915_drm_buffer.c +++ b/src/gallium/winsys/i915/drm/i915_drm_buffer.c @@ -1,4 +1,3 @@ - #include "state_tracker/drm_driver.h" #include "i915_drm_winsys.h" #include "util/u_memory.h" @@ -72,7 +71,7 @@ i915_drm_buffer_create_tiled(struct i915_winsys *iws, buf->bo = drm_intel_bo_alloc_tiled(idws->gem_manager, i915_drm_type_to_name(type), - *stride, height, 1, + *stride, height, 1, &tiling_mode, &pitch, 0); if (!buf->bo) @@ -91,6 +90,7 @@ err: static struct i915_winsys_buffer * i915_drm_buffer_from_handle(struct i915_winsys *iws, struct winsys_handle *whandle, + unsigned height, enum i915_winsys_buffer_tile *tiling, unsigned *stride) { @@ -98,7 +98,7 @@ i915_drm_buffer_from_handle(struct i915_winsys *iws, struct i915_drm_buffer *buf; uint32_t tile = 0, swizzle = 0; - if (whandle->type != DRM_API_HANDLE_TYPE_SHARED) + if ((whandle->type != DRM_API_HANDLE_TYPE_SHARED) && (whandle->type != DRM_API_HANDLE_TYPE_FD)) return NULL; buf = CALLOC_STRUCT(i915_drm_buffer); @@ -106,7 +106,14 @@ i915_drm_buffer_from_handle(struct i915_winsys *iws, return NULL; buf->magic = 0xDEAD1337; - buf->bo = drm_intel_bo_gem_create_from_name(idws->gem_manager, "gallium3d_from_handle", whandle->handle); + + if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) + buf->bo = drm_intel_bo_gem_create_from_name(idws->gem_manager, "gallium3d_from_handle", whandle->handle); + else if (whandle->type == DRM_API_HANDLE_TYPE_FD) { + int fd = (int) whandle->handle; + buf->bo = drm_intel_bo_gem_create_from_prime(idws->gem_manager, fd, height * whandle->stride); + } + buf->flinked = TRUE; buf->flink = whandle->handle; From imirkin at kemper.freedesktop.org Thu Apr 2 04:12:00 2015 From: imirkin at kemper.freedesktop.org (Ilia Mirkin) Date: Wed, 1 Apr 2015 21:12:00 -0700 (PDT) Subject: Mesa (master): freedreno: add core infrastructure support for MRTs Message-ID: <20150402041200.30457760E6@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 602bc6c88dbfa34083aa9d229fb6396b008e23eb URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=602bc6c88dbfa34083aa9d229fb6396b008e23eb Author: Ilia Mirkin Date: Sun Mar 29 20:39:48 2015 -0400 freedreno: add core infrastructure support for MRTs Signed-off-by: Ilia Mirkin --- src/gallium/drivers/freedreno/freedreno_context.c | 6 ++++-- src/gallium/drivers/freedreno/freedreno_context.h | 2 +- src/gallium/drivers/freedreno/freedreno_draw.c | 7 +++++-- src/gallium/drivers/freedreno/freedreno_gmem.c | 7 ++++--- 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c index 79a27fe..bb1b527 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.c +++ b/src/gallium/drivers/freedreno/freedreno_context.c @@ -95,6 +95,7 @@ fd_context_render(struct pipe_context *pctx) { struct fd_context *ctx = fd_context(pctx); struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + int i; DBG("needs_flush: %d", ctx->needs_flush); @@ -116,8 +117,9 @@ fd_context_render(struct pipe_context *pctx) ctx->gmem_reason = 0; ctx->num_draws = 0; - if (pfb->cbufs[0]) - fd_resource(pfb->cbufs[0]->texture)->dirty = false; + for (i = 0; i < pfb->nr_cbufs; i++) + if (pfb->cbufs[i]) + fd_resource(pfb->cbufs[i]->texture)->dirty = false; if (pfb->zsbuf) fd_resource(pfb->zsbuf->texture)->dirty = false; } diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index bf9abaf..244d527 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -189,7 +189,7 @@ struct fd_context { */ enum { /* align bitmask values w/ PIPE_CLEAR_*.. since that is convenient.. */ - FD_BUFFER_COLOR = PIPE_CLEAR_COLOR0, + FD_BUFFER_COLOR = PIPE_CLEAR_COLOR, FD_BUFFER_DEPTH = PIPE_CLEAR_DEPTH, FD_BUFFER_STENCIL = PIPE_CLEAR_STENCIL, FD_BUFFER_ALL = FD_BUFFER_COLOR | FD_BUFFER_DEPTH | FD_BUFFER_STENCIL, diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c index 213bad8..423ae23 100644 --- a/src/gallium/drivers/freedreno/freedreno_draw.c +++ b/src/gallium/drivers/freedreno/freedreno_draw.c @@ -92,7 +92,7 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) surf = pfb->cbufs[i]->texture; fd_resource(surf)->dirty = true; - buffers |= FD_BUFFER_COLOR; + buffers |= PIPE_CLEAR_COLOR0 << i; if (surf->nr_samples > 1) ctx->gmem_reason |= FD_GMEM_MSAA_ENABLED; @@ -147,6 +147,7 @@ fd_clear(struct pipe_context *pctx, unsigned buffers, struct pipe_framebuffer_state *pfb = &ctx->framebuffer; struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx); unsigned cleared_buffers; + int i; /* for bookkeeping about which buffers have been cleared (and thus * can fully or partially skip mem2gmem) we need to ignore buffers @@ -173,7 +174,9 @@ fd_clear(struct pipe_context *pctx, unsigned buffers, ctx->needs_flush = true; if (buffers & PIPE_CLEAR_COLOR) - fd_resource(pfb->cbufs[0]->texture)->dirty = true; + for (i = 0; i < pfb->nr_cbufs; i++) + if (buffers & (PIPE_CLEAR_COLOR0 << i)) + fd_resource(pfb->cbufs[i]->texture)->dirty = true; if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) { fd_resource(pfb->zsbuf->texture)->dirty = true; diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c index 4040d1f..afe088a 100644 --- a/src/gallium/drivers/freedreno/freedreno_gmem.c +++ b/src/gallium/drivers/freedreno/freedreno_gmem.c @@ -319,7 +319,7 @@ void fd_gmem_render_tiles(struct fd_context *ctx) { struct pipe_framebuffer_state *pfb = &ctx->framebuffer; - uint32_t timestamp = 0; + uint32_t i, timestamp = 0; bool sysmem = false; if (ctx->emit_sysmem_prep) { @@ -373,8 +373,9 @@ fd_gmem_render_tiles(struct fd_context *ctx) /* update timestamps on render targets: */ timestamp = fd_ringbuffer_timestamp(ctx->ring); - if (pfb->cbufs[0]) - fd_resource(pfb->cbufs[0]->texture)->timestamp = timestamp; + for (i = 0; i < pfb->nr_cbufs; i++) + if (pfb->cbufs[i]) + fd_resource(pfb->cbufs[i]->texture)->timestamp = timestamp; if (pfb->zsbuf) fd_resource(pfb->zsbuf->texture)->timestamp = timestamp; From imirkin at kemper.freedesktop.org Thu Apr 2 04:12:00 2015 From: imirkin at kemper.freedesktop.org (Ilia Mirkin) Date: Wed, 1 Apr 2015 21:12:00 -0700 (PDT) Subject: Mesa (master): freedreno: convert blit program to array for each number of rts Message-ID: <20150402041200.47AE676332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 6f4c1976f4e5ecdebfe5b9ac16b6d13a5e60eed1 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6f4c1976f4e5ecdebfe5b9ac16b6d13a5e60eed1 Author: Ilia Mirkin Date: Wed Apr 1 01:14:39 2015 -0400 freedreno: convert blit program to array for each number of rts Signed-off-by: Ilia Mirkin --- src/gallium/drivers/freedreno/a2xx/fd2_gmem.c | 2 +- src/gallium/drivers/freedreno/a2xx/fd2_program.c | 4 +- src/gallium/drivers/freedreno/a2xx/fd2_screen.c | 1 + src/gallium/drivers/freedreno/a3xx/fd3_gmem.c | 2 +- src/gallium/drivers/freedreno/a3xx/fd3_screen.c | 1 + src/gallium/drivers/freedreno/a4xx/fd4_gmem.c | 2 +- src/gallium/drivers/freedreno/a4xx/fd4_program.c | 2 +- src/gallium/drivers/freedreno/a4xx/fd4_screen.c | 1 + src/gallium/drivers/freedreno/freedreno_context.h | 2 +- src/gallium/drivers/freedreno/freedreno_program.c | 46 +++++++++++++++------ src/gallium/drivers/freedreno/freedreno_screen.c | 2 +- src/gallium/drivers/freedreno/freedreno_screen.h | 1 + 12 files changed, 45 insertions(+), 21 deletions(-) diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c b/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c index 8593c4a..982c9c2 100644 --- a/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c +++ b/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c @@ -255,7 +255,7 @@ fd2_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL)); OUT_RING(ring, 0x0000003b); - fd2_program_emit(ring, &ctx->blit_prog); + fd2_program_emit(ring, &ctx->blit_prog[0]); OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1); OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE); diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_program.c b/src/gallium/drivers/freedreno/a2xx/fd2_program.c index cb6281b..5ccfd58 100644 --- a/src/gallium/drivers/freedreno/a2xx/fd2_program.c +++ b/src/gallium/drivers/freedreno/a2xx/fd2_program.c @@ -474,6 +474,6 @@ fd2_prog_init(struct pipe_context *pctx) ctx->solid_prog.fp = create_solid_fp(); ctx->solid_prog.vp = create_solid_vp(); - ctx->blit_prog.fp = create_blit_fp(); - ctx->blit_prog.vp = create_blit_vp(); + ctx->blit_prog[0].fp = create_blit_fp(); + ctx->blit_prog[0].vp = create_blit_vp(); } diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_screen.c b/src/gallium/drivers/freedreno/a2xx/fd2_screen.c index 1801d95..c2baa6f 100644 --- a/src/gallium/drivers/freedreno/a2xx/fd2_screen.c +++ b/src/gallium/drivers/freedreno/a2xx/fd2_screen.c @@ -104,6 +104,7 @@ fd2_screen_is_format_supported(struct pipe_screen *pscreen, void fd2_screen_init(struct pipe_screen *pscreen) { + fd_screen(pscreen)->max_rts = 1; pscreen->context_create = fd2_context_create; pscreen->is_format_supported = fd2_screen_is_format_supported; } diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c index 8ec28d9..304fc84 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c @@ -467,7 +467,7 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) enum pipe_format format = pipe_surface_format(pfb->cbufs[0]); struct fd3_emit emit = { .vtx = &fd3_ctx->blit_vbuf_state, - .prog = &ctx->blit_prog, + .prog = &ctx->blit_prog[0], .sprite_coord_enable = 1, .key = { .half_precision = fd3_half_precision(format), diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_screen.c b/src/gallium/drivers/freedreno/a3xx/fd3_screen.c index 5fc63e8..182db84 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_screen.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_screen.c @@ -103,6 +103,7 @@ fd3_screen_is_format_supported(struct pipe_screen *pscreen, void fd3_screen_init(struct pipe_screen *pscreen) { + fd_screen(pscreen)->max_rts = 1; pscreen->context_create = fd3_context_create; pscreen->is_format_supported = fd3_screen_is_format_supported; } diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c index 2c57995..9a90506 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c @@ -282,7 +282,7 @@ fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) struct pipe_framebuffer_state *pfb = &ctx->framebuffer; struct fd4_emit emit = { .vtx = &fd4_ctx->blit_vbuf_state, - .prog = &ctx->blit_prog, + .prog = &ctx->blit_prog[0], .key = key, .format = fd4_emit_format(pfb->cbufs[0]), }; diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/src/gallium/drivers/freedreno/a4xx/fd4_program.c index 9ee47fb..015f6c8 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_program.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c @@ -497,7 +497,7 @@ static void fix_blit_fp(struct pipe_context *pctx) { struct fd_context *ctx = fd_context(pctx); - struct fd4_shader_stateobj *so = ctx->blit_prog.fp; + struct fd4_shader_stateobj *so = ctx->blit_prog[0].fp; so->shader->vpsrepl[0] = 0x99999999; so->shader->vpsrepl[1] = 0x99999999; diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_screen.c b/src/gallium/drivers/freedreno/a4xx/fd4_screen.c index cf697d4..f5b4668 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_screen.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_screen.c @@ -100,6 +100,7 @@ fd4_screen_is_format_supported(struct pipe_screen *pscreen, void fd4_screen_init(struct pipe_screen *pscreen) { + fd_screen(pscreen)->max_rts = 1; pscreen->context_create = fd4_context_create; pscreen->is_format_supported = fd4_screen_is_format_supported; } diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index 244d527..7b0424e 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -176,7 +176,7 @@ struct fd_context { struct fd_program_stateobj solid_prog; // TODO move to screen? /* shaders used by mem->gmem blits: */ - struct fd_program_stateobj blit_prog; // TODO move to screen? + struct fd_program_stateobj blit_prog[8]; // TODO move to screen? /* do we need to mem2gmem before rendering. We don't, if for example, * there was a glClear() that invalidated the entire previous buffer diff --git a/src/gallium/drivers/freedreno/freedreno_program.c b/src/gallium/drivers/freedreno/freedreno_program.c index 167ed02..52a165b 100644 --- a/src/gallium/drivers/freedreno/freedreno_program.c +++ b/src/gallium/drivers/freedreno/freedreno_program.c @@ -27,6 +27,7 @@ */ #include "tgsi/tgsi_text.h" +#include "tgsi/tgsi_ureg.h" #include "freedreno_program.h" #include "freedreno_context.h" @@ -64,15 +65,6 @@ static const char *solid_vp = " 0: MOV OUT[0], IN[0] \n" " 1: END \n"; -static const char *blit_fp = - "FRAG \n" - "PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 \n" - "DCL IN[0], TEXCOORD[0], PERSPECTIVE \n" - "DCL OUT[0], COLOR \n" - "DCL SAMP[0] \n" - " 0: TEX OUT[0], IN[0], SAMP[0], 2D \n" - " 1: END \n"; - static const char *blit_vp = "VERT \n" "DCL IN[0] \n" @@ -99,9 +91,31 @@ static void * assemble_tgsi(struct pipe_context *pctx, return pctx->create_vs_state(pctx, &cso); } +static void * +fd_prog_blit(struct pipe_context *pctx, int rts) +{ + int i; + struct ureg_src tc; + struct ureg_program *ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT); + if (!ureg) + return NULL; + + tc = ureg_DECL_fs_input( + ureg, TGSI_SEMANTIC_GENERIC, 0, TGSI_INTERPOLATE_PERSPECTIVE); + for (i = 0; i < rts; i++) + ureg_TEX(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, i), + TGSI_TEXTURE_2D, tc, ureg_DECL_sampler(ureg, i)); + + ureg_END(ureg); + + return ureg_create_shader_and_destroy(ureg, pctx); +} + + void fd_prog_init(struct pipe_context *pctx) { struct fd_context *ctx = fd_context(pctx); + int i; pctx->bind_fs_state = fd_fp_state_bind; pctx->bind_vs_state = fd_vp_state_bind; @@ -113,16 +127,22 @@ void fd_prog_init(struct pipe_context *pctx) ctx->solid_prog.fp = assemble_tgsi(pctx, solid_fp, true); ctx->solid_prog.vp = assemble_tgsi(pctx, solid_vp, false); - ctx->blit_prog.fp = assemble_tgsi(pctx, blit_fp, true); - ctx->blit_prog.vp = assemble_tgsi(pctx, blit_vp, false); + ctx->blit_prog[0].vp = assemble_tgsi(pctx, blit_vp, false); + ctx->blit_prog[0].fp = fd_prog_blit(pctx, 1); + for (i = 1; i < ctx->screen->max_rts; i++) { + ctx->blit_prog[i].vp = ctx->blit_prog[0].vp; + ctx->blit_prog[i].fp = fd_prog_blit(pctx, i + 1); + } } void fd_prog_fini(struct pipe_context *pctx) { struct fd_context *ctx = fd_context(pctx); + int i; pctx->delete_vs_state(pctx, ctx->solid_prog.vp); pctx->delete_fs_state(pctx, ctx->solid_prog.fp); - pctx->delete_vs_state(pctx, ctx->blit_prog.vp); - pctx->delete_fs_state(pctx, ctx->blit_prog.fp); + pctx->delete_vs_state(pctx, ctx->blit_prog[0].vp); + for (i = 0; i < ctx->screen->max_rts; i++) + pctx->delete_fs_state(pctx, ctx->blit_prog[i].fp); } diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index bb48802..fe72444 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -252,7 +252,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) /* Render targets. */ case PIPE_CAP_MAX_RENDER_TARGETS: - return 1; + return screen->max_rts; /* Queries. */ case PIPE_CAP_QUERY_TIME_ELAPSED: diff --git a/src/gallium/drivers/freedreno/freedreno_screen.h b/src/gallium/drivers/freedreno/freedreno_screen.h index e1c554c..3b470d1 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.h +++ b/src/gallium/drivers/freedreno/freedreno_screen.h @@ -46,6 +46,7 @@ struct fd_screen { uint32_t device_id; uint32_t gpu_id; /* 220, 305, etc */ uint32_t chip_id; /* coreid:8 majorrev:8 minorrev:8 patch:8 */ + uint32_t max_rts; struct fd_device *dev; struct fd_pipe *pipe; From imirkin at kemper.freedesktop.org Thu Apr 2 04:12:00 2015 From: imirkin at kemper.freedesktop.org (Ilia Mirkin) Date: Wed, 1 Apr 2015 21:12:00 -0700 (PDT) Subject: Mesa (master): freedreno: remove alpha key from ir3_shader Message-ID: <20150402041200.14D4776332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 8efa3e340d13a9f373e7b2834f12d9fae43e6867 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8efa3e340d13a9f373e7b2834f12d9fae43e6867 Author: Ilia Mirkin Date: Sun Mar 29 19:59:38 2015 -0400 freedreno: remove alpha key from ir3_shader This complication is unnecessary and makes MRTs more complicated and likely to generate tons of variants. Signed-off-by: Ilia Mirkin --- src/gallium/drivers/freedreno/a3xx/fd3_draw.c | 4 ---- src/gallium/drivers/freedreno/a3xx/fd3_program.c | 3 +++ src/gallium/drivers/freedreno/a4xx/fd4_draw.c | 5 +---- src/gallium/drivers/freedreno/a4xx/fd4_emit.h | 1 + src/gallium/drivers/freedreno/a4xx/fd4_program.c | 3 +++ src/gallium/drivers/freedreno/ir3/ir3_cmdline.c | 8 -------- src/gallium/drivers/freedreno/ir3/ir3_compiler.c | 17 ----------------- src/gallium/drivers/freedreno/ir3/ir3_shader.c | 1 - src/gallium/drivers/freedreno/ir3/ir3_shader.h | 8 -------- 9 files changed, 8 insertions(+), 42 deletions(-) diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c index a3f9549..6ff762e 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c @@ -121,9 +121,6 @@ fixup_shader_state(struct fd_context *ctx, struct ir3_shader_key *key) if (last_key->half_precision != key->half_precision) ctx->prog.dirty |= FD_SHADER_DIRTY_FP; - if (last_key->alpha != key->alpha) - ctx->prog.dirty |= FD_SHADER_DIRTY_FP; - fd3_ctx->last_key = *key; } } @@ -141,7 +138,6 @@ fd3_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info) /* do binning pass first: */ .binning_pass = true, .color_two_side = ctx->rasterizer ? ctx->rasterizer->light_twoside : false, - .alpha = util_format_is_alpha(pipe_surface_format(pfb->cbufs[0])), // TODO set .half_precision based on render target format, // ie. float16 and smaller use half, float32 use full.. .half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF), diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c index 1250dff..442b47d 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c @@ -202,6 +202,9 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit) color_regid = ir3_find_output_regid(fp, ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0)); + if (util_format_is_alpha(emit->format)) + color_regid += 3; + /* we could probably divide this up into things that need to be * emitted if frag-prog is dirty vs if vert-prog is dirty.. */ diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c index 57f2574..6c54f61 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c @@ -97,9 +97,6 @@ fixup_shader_state(struct fd_context *ctx, struct ir3_shader_key *key) if (last_key->half_precision != key->half_precision) ctx->prog.dirty |= FD_SHADER_DIRTY_FP; - if (last_key->alpha != key->alpha) - ctx->prog.dirty |= FD_SHADER_DIRTY_FP; - if (last_key->rasterflat != key->rasterflat) ctx->prog.dirty |= FD_SHADER_DIRTY_FP; @@ -120,7 +117,6 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info) /* do binning pass first: */ .binning_pass = true, .color_two_side = ctx->rasterizer ? ctx->rasterizer->light_twoside : false, - .alpha = util_format_is_alpha(pipe_surface_format(pfb->cbufs[0])), .rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade, // TODO set .half_precision based on render target format, // ie. float16 and smaller use half, float32 use full.. @@ -134,6 +130,7 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info) .fsaturate_r = fd4_ctx->fsaturate_r, }, .format = fd4_emit_format(pfb->cbufs[0]), + .pformat = pipe_surface_format(pfb->cbufs[0]), }; unsigned dirty; diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.h b/src/gallium/drivers/freedreno/a4xx/fd4_emit.h index 5dc3db8..7d059f8 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.h +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.h @@ -54,6 +54,7 @@ struct fd4_emit { const struct pipe_draw_info *info; struct ir3_shader_key key; enum a4xx_color_fmt format; + enum pipe_format pformat; uint32_t dirty; /* cached to avoid repeated lookups of same variants: */ diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/src/gallium/drivers/freedreno/a4xx/fd4_program.c index 776e4a1..9ee47fb 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_program.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c @@ -218,6 +218,9 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit) color_regid = ir3_find_output_regid(s[FS].v, ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0)); + if (util_format_is_alpha(emit->pformat)) + color_regid += 3; + /* TODO get these dynamically: */ face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0); coord_regid = s[FS].v->frag_coord ? regid(0,0) : regid(63,0); diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c index bf6bcb8..11bfe34 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c @@ -225,7 +225,6 @@ static void print_usage(void) printf(" --binning-pass - generate binning pass shader (VERT)\n"); printf(" --color-two-side - emulate two-sided color (FRAG)\n"); printf(" --half-precision - use half-precision\n"); - printf(" --alpha - generate render-to-alpha shader (FRAG)\n"); printf(" --saturate-s MASK - bitmask of samplers to saturate S coord\n"); printf(" --saturate-t MASK - bitmask of samplers to saturate T coord\n"); printf(" --saturate-r MASK - bitmask of samplers to saturate R coord\n"); @@ -282,13 +281,6 @@ int main(int argc, char **argv) continue; } - if (!strcmp(argv[n], "--alpha")) { - debug_printf(" %s", argv[n]); - key.alpha = true; - n++; - continue; - } - if (!strcmp(argv[n], "--saturate-s")) { debug_printf(" %s %s", argv[n], argv[n+1]); key.vsaturate_s = key.fsaturate_s = strtol(argv[n+1], NULL, 0); diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c index f6bdc06..c600252 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c @@ -3496,23 +3496,6 @@ ir3_compile_shader(struct ir3_shader_variant *so, block->noutputs = j * 4; } - /* for rendering to alpha format, we only need the .w component, - * and we need it to be in the .x position: - */ - if (key.alpha) { - for (i = 0, j = 0; i < so->outputs_count; i++) { - unsigned name = sem2name(so->outputs[i].semantic); - - /* move .w component to .x and discard others: */ - if (name == TGSI_SEMANTIC_COLOR) { - block->outputs[(i*4)+0] = block->outputs[(i*4)+3]; - block->outputs[(i*4)+1] = NULL; - block->outputs[(i*4)+2] = NULL; - block->outputs[(i*4)+3] = NULL; - } - } - } - /* if we want half-precision outputs, mark the output registers * as half: */ diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c index 122a447..b1dff38 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c @@ -236,7 +236,6 @@ ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key) case SHADER_VERTEX: key.color_two_side = false; key.half_precision = false; - key.alpha = false; key.rasterflat = false; if (key.has_per_samp) { key.fsaturate_s = 0; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h index 7f38067..4b7d038 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h @@ -69,14 +69,6 @@ struct ir3_shader_key { */ unsigned color_two_side : 1; unsigned half_precision : 1; - /* For rendering to alpha, we need a bit of special handling - * since the hw always takes gl_FragColor starting from x - * component, rather than figuring out to take the w component. - * We could be more clever and generate variants for other - * render target formats (ie. luminance formats are xxx1), but - * let's start with this and see how it goes: - */ - unsigned alpha : 1; /* used when shader needs to handle flat varyings (a4xx), * for TGSI_INTERPOLATE_COLOR: */ From imirkin at kemper.freedesktop.org Thu Apr 2 04:12:00 2015 From: imirkin at kemper.freedesktop.org (Ilia Mirkin) Date: Wed, 1 Apr 2015 21:12:00 -0700 (PDT) Subject: Mesa (master): freedreno/a3xx: add independent blend function support Message-ID: <20150402041200.1832B760E6@kemper.freedesktop.org> Module: Mesa Branch: master Commit: f27ec5908416e6a3ea3d770026cd6a1c6ded188f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f27ec5908416e6a3ea3d770026cd6a1c6ded188f Author: Ilia Mirkin Date: Sun Mar 29 20:04:38 2015 -0400 freedreno/a3xx: add independent blend function support This is needed for MRT support Signed-off-by: Ilia Mirkin --- src/gallium/drivers/freedreno/a3xx/fd3_blend.c | 11 +++++------ src/gallium/drivers/freedreno/freedreno_screen.c | 6 ++++-- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_blend.c b/src/gallium/drivers/freedreno/a3xx/fd3_blend.c index 9229556..6f5de9d 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_blend.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_blend.c @@ -86,11 +86,6 @@ fd3_blend_state_create(struct pipe_context *pctx, } } - if (cso->independent_blend_enable) { - DBG("Unsupported! independent blend state"); - return NULL; - } - so = CALLOC_STRUCT(fd3_blend_stateobj); if (!so) return NULL; @@ -98,7 +93,11 @@ fd3_blend_state_create(struct pipe_context *pctx, so->base = *cso; for (i = 0; i < ARRAY_SIZE(so->rb_mrt); i++) { - const struct pipe_rt_blend_state *rt = &cso->rt[i]; + const struct pipe_rt_blend_state *rt; + if (cso->independent_blend_enable) + rt = &cso->rt[i]; + else + rt = &cso->rt[0]; so->rb_mrt[i].blend_control_rgb = A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(rt->rgb_src_factor)) | diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index 68c8105..bb48802 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -179,6 +179,10 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: return is_a3xx(screen) || is_a4xx(screen); + case PIPE_CAP_INDEP_BLEND_ENABLE: + case PIPE_CAP_INDEP_BLEND_FUNC: + return is_a3xx(screen); + case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: return 256; @@ -188,8 +192,6 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return (is_a3xx(screen) || is_a4xx(screen)) ? 130 : 120; /* Unsupported features. */ - case PIPE_CAP_INDEP_BLEND_ENABLE: - case PIPE_CAP_INDEP_BLEND_FUNC: case PIPE_CAP_DEPTH_CLIP_DISABLE: case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: From imirkin at kemper.freedesktop.org Thu Apr 2 04:12:00 2015 From: imirkin at kemper.freedesktop.org (Ilia Mirkin) Date: Wed, 1 Apr 2015 21:12:00 -0700 (PDT) Subject: Mesa (master): freedreno/ir3: add support for FS_COLOR0_WRITES_ALL_CBUFS property Message-ID: <20150402041200.248727635B@kemper.freedesktop.org> Module: Mesa Branch: master Commit: d13803c76fd7429df64c1aa3631dcc451e7f1a29 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d13803c76fd7429df64c1aa3631dcc451e7f1a29 Author: Ilia Mirkin Date: Sun Mar 29 20:24:57 2015 -0400 freedreno/ir3: add support for FS_COLOR0_WRITES_ALL_CBUFS property This will enable the driver to tell which regids to link up to which MRT outputs. Signed-off-by: Ilia Mirkin --- src/gallium/drivers/freedreno/ir3/ir3_compiler.c | 9 +++++++++ src/gallium/drivers/freedreno/ir3/ir3_shader.h | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c index c600252..511cf77 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c @@ -3412,6 +3412,15 @@ compile_instructions(struct ir3_compile_context *ctx) break; } + case TGSI_TOKEN_TYPE_PROPERTY: { + struct tgsi_full_property *prop = + &ctx->parser.FullToken.FullProperty; + switch (prop->Property.PropertyName) { + case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS: + ctx->so->color0_mrt = !!prop->u[0].Data; + break; + } + } default: break; } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h index 4b7d038..e5410bf 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h @@ -133,7 +133,7 @@ struct ir3_shader_variant { * to bary.f instructions */ uint8_t pos_regid; - bool frag_coord, frag_face; + bool frag_coord, frag_face, color0_mrt; /* varyings/outputs: */ unsigned outputs_count; From imirkin at kemper.freedesktop.org Thu Apr 2 04:12:00 2015 From: imirkin at kemper.freedesktop.org (Ilia Mirkin) Date: Wed, 1 Apr 2015 21:12:00 -0700 (PDT) Subject: Mesa (master): freedreno: add support for laying out MRTs in gmem Message-ID: <20150402041200.3AE167635A@kemper.freedesktop.org> Module: Mesa Branch: master Commit: d9992ab35a51c574dcfa8049859c0887956ecdf6 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d9992ab35a51c574dcfa8049859c0887956ecdf6 Author: Ilia Mirkin Date: Sun Mar 29 20:54:42 2015 -0400 freedreno: add support for laying out MRTs in gmem Signed-off-by: Ilia Mirkin --- src/gallium/drivers/freedreno/freedreno_gmem.c | 53 +++++++++++++++++------- src/gallium/drivers/freedreno/freedreno_gmem.h | 6 ++- 2 files changed, 43 insertions(+), 16 deletions(-) diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c index afe088a..6bdd770 100644 --- a/src/gallium/drivers/freedreno/freedreno_gmem.c +++ b/src/gallium/drivers/freedreno/freedreno_gmem.c @@ -76,6 +76,27 @@ static uint32_t bin_width(struct fd_context *ctx) return 512; } +static uint32_t +total_size(uint8_t cbuf_cpp[], uint8_t zsbuf_cpp, + uint32_t bin_w, uint32_t bin_h, struct fd_gmem_stateobj *gmem) +{ + uint32_t total = 0, i; + + for (i = 0; i < 4; i++) { + if (cbuf_cpp[i]) { + gmem->cbuf_base[i] = align(total, 0x4000); + total = gmem->cbuf_base[i] + cbuf_cpp[i] * bin_w * bin_h; + } + } + + if (zsbuf_cpp) { + gmem->zsbuf_base = align(total, 0x4000); + total = gmem->zsbuf_base + zsbuf_cpp * bin_w * bin_h; + } + + return total; +} + static void calculate_tiles(struct fd_context *ctx) { @@ -87,26 +108,27 @@ calculate_tiles(struct fd_context *ctx) uint32_t nbins_x = 1, nbins_y = 1; uint32_t bin_w, bin_h; uint32_t max_width = bin_width(ctx); - uint32_t cpp = 4; + uint8_t cbuf_cpp[4] = {0}, zsbuf_cpp = 0; uint32_t i, j, t, xoff, yoff; uint32_t tpp_x, tpp_y; bool has_zs = !!(ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)); - if (pfb->cbufs[0]) - cpp = util_format_get_blocksize(pfb->cbufs[0]->format); + if (has_zs) + zsbuf_cpp = util_format_get_blocksize(pfb->zsbuf->format); + for (i = 0; i < pfb->nr_cbufs; i++) { + if (pfb->cbufs[i]) + cbuf_cpp[i] = util_format_get_blocksize(pfb->cbufs[i]->format); + else + cbuf_cpp[i] = 4; + } - if ((gmem->cpp == cpp) && (gmem->has_zs == has_zs) && - !memcmp(&gmem->scissor, scissor, sizeof(gmem->scissor))) { + if (gmem->zsbuf_cpp == zsbuf_cpp && + !memcmp(gmem->cbuf_cpp, cbuf_cpp, sizeof(cbuf_cpp)) && + !memcmp(&gmem->scissor, scissor, sizeof(gmem->scissor))) { /* everything is up-to-date */ return; } - /* if have depth/stencil, we need to leave room: */ - if (has_zs) { - gmem_size /= 2; - max_width /= 2; - } - if (fd_mesa_debug & FD_DBG_NOSCIS) { minx = 0; miny = 0; @@ -133,7 +155,10 @@ calculate_tiles(struct fd_context *ctx) /* then find a bin width/height that satisfies the memory * constraints: */ - while ((bin_w * bin_h * cpp) > gmem_size) { + DBG("binning input: cbuf cpp: %d %d %d %d, zsbuf cpp: %d; %dx%d", + cbuf_cpp[0], cbuf_cpp[1], cbuf_cpp[2], cbuf_cpp[3], zsbuf_cpp, + width, height); + while (total_size(cbuf_cpp, zsbuf_cpp, bin_w, bin_h, gmem) > gmem_size) { if (bin_w > bin_h) { nbins_x++; bin_w = align(width / nbins_x, 32); @@ -146,8 +171,8 @@ calculate_tiles(struct fd_context *ctx) DBG("using %d bins of size %dx%d", nbins_x*nbins_y, bin_w, bin_h); gmem->scissor = *scissor; - gmem->cpp = cpp; - gmem->has_zs = has_zs; + memcpy(gmem->cbuf_cpp, cbuf_cpp, sizeof(cbuf_cpp)); + gmem->zsbuf_cpp = zsbuf_cpp; gmem->bin_h = bin_h; gmem->bin_w = bin_w; gmem->nbins_x = nbins_x; diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.h b/src/gallium/drivers/freedreno/freedreno_gmem.h index ff322df..81f9b6a 100644 --- a/src/gallium/drivers/freedreno/freedreno_gmem.h +++ b/src/gallium/drivers/freedreno/freedreno_gmem.h @@ -47,12 +47,14 @@ struct fd_tile { struct fd_gmem_stateobj { struct pipe_scissor_state scissor; - uint cpp; + uint32_t cbuf_base[4]; + uint32_t zsbuf_base; + uint8_t cbuf_cpp[4]; + uint8_t zsbuf_cpp; uint16_t bin_h, nbins_y; uint16_t bin_w, nbins_x; uint16_t minx, miny; uint16_t width, height; - bool has_zs; /* gmem config using depth/stencil? */ }; struct fd_context; From imirkin at kemper.freedesktop.org Thu Apr 2 04:12:00 2015 From: imirkin at kemper.freedesktop.org (Ilia Mirkin) Date: Wed, 1 Apr 2015 21:12:00 -0700 (PDT) Subject: Mesa (master): freedreno/a3xx: add MRT support Message-ID: <20150402041200.50ABB76332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 4a3c0e995063320693782b934962969e11dab29d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4a3c0e995063320693782b934962969e11dab29d Author: Ilia Mirkin Date: Sun Feb 15 03:39:43 2015 -0500 freedreno/a3xx: add MRT support The hardware only supports 4 MRTs. It should be possible to emulate support for 8, but doesn't seem worth the trouble. Signed-off-by: Ilia Mirkin --- docs/relnotes/10.6.0.html | 3 +- src/gallium/drivers/freedreno/a3xx/fd3_draw.c | 20 +-- src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 104 +++++++++----- src/gallium/drivers/freedreno/a3xx/fd3_emit.h | 3 +- src/gallium/drivers/freedreno/a3xx/fd3_format.h | 8 +- src/gallium/drivers/freedreno/a3xx/fd3_gmem.c | 159 +++++++++++++--------- src/gallium/drivers/freedreno/a3xx/fd3_program.c | 58 +++++--- src/gallium/drivers/freedreno/a3xx/fd3_program.h | 3 +- src/gallium/drivers/freedreno/a3xx/fd3_screen.c | 2 +- 9 files changed, 221 insertions(+), 139 deletions(-) diff --git a/docs/relnotes/10.6.0.html b/docs/relnotes/10.6.0.html index 3233637..22201e1 100644 --- a/docs/relnotes/10.6.0.html +++ b/docs/relnotes/10.6.0.html @@ -45,11 +45,12 @@ Note: some of the new features are only available with certain drivers.
    • GL_AMD_pinned_memory on r600, radeonsi
    • +
    • GL_ARB_draw_indirect, GL_ARB_multi_draw_indirect on r600
    • GL_ARB_draw_instanced on freedreno
    • GL_ARB_gpu_shader_fp64 on nvc0, softpipe
    • GL_ARB_instanced_arrays on freedreno
    • GL_ARB_pipeline_statistics_query on i965, nv50, nvc0, r600, radeonsi, softpipe
    • -
    • GL_ARB_draw_indirect, GL_ARB_multi_draw_indirect on r600
    • +
    • GL_EXT_draw_buffers2 on freedreno

    Bug fixes

    diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c index 6ff762e..044355c 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c @@ -129,7 +129,6 @@ static void fd3_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info) { struct fd3_context *fd3_ctx = fd3_context(ctx); - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; struct fd3_emit emit = { .vtx = &ctx->vtx, .prog = &ctx->prog, @@ -152,7 +151,6 @@ fd3_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info) .vinteger_s = fd3_ctx->vinteger_s, .finteger_s = fd3_ctx->finteger_s, }, - .format = pipe_surface_format(pfb->cbufs[0]), .rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade, .sprite_coord_enable = ctx->rasterizer ? ctx->rasterizer->sprite_coord_enable : 0, }; @@ -239,17 +237,18 @@ fd3_clear(struct fd_context *ctx, unsigned buffers, { struct fd3_context *fd3_ctx = fd3_context(ctx); struct pipe_framebuffer_state *pfb = &ctx->framebuffer; - enum pipe_format format = pipe_surface_format(pfb->cbufs[0]); struct fd_ringbuffer *ring = ctx->ring; unsigned dirty = ctx->dirty; - unsigned ce, i; + unsigned i; struct fd3_emit emit = { .vtx = &fd3_ctx->solid_vbuf_state, .prog = &ctx->solid_prog, .key = { - .half_precision = fd3_half_precision(format), + .half_precision = (fd3_half_precision(pfb->cbufs[0]) && + fd3_half_precision(pfb->cbufs[1]) && + fd3_half_precision(pfb->cbufs[2]) && + fd3_half_precision(pfb->cbufs[3])), }, - .format = format, }; dirty &= FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR; @@ -326,17 +325,12 @@ fd3_clear(struct fd_context *ctx, unsigned buffers, A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP)); } - if (buffers & PIPE_CLEAR_COLOR) { - ce = 0xf; - } else { - ce = 0x0; - } - for (i = 0; i < 4; i++) { OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1); OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) | A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_ALWAYS) | - A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(ce)); + COND(buffers & (PIPE_CLEAR_COLOR0 << i), + A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf))); OUT_PKT0(ring, REG_A3XX_RB_MRT_BLEND_CONTROL(i), 1); OUT_RING(ring, A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) | diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index a5874e4..1b656b7 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -293,59 +293,92 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, * case format (fd3_gmem_restore_format()) stuff for restoring depth/stencil. */ void -fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf) +fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, + struct pipe_surface **psurf, + int bufs) { - struct fd_resource *rsc = fd_resource(psurf->texture); - unsigned lvl = psurf->u.tex.level; - struct fd_resource_slice *slice = fd_resource_slice(rsc, lvl); - uint32_t offset = fd_resource_offset(rsc, lvl, psurf->u.tex.first_layer); - enum pipe_format format = fd3_gmem_restore_format(psurf->format); - - debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer); + int i, j; /* output sampler state: */ - OUT_PKT3(ring, CP_LOAD_STATE, 4); + OUT_PKT3(ring, CP_LOAD_STATE, 2 + 2 * bufs); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(FRAG_TEX_OFF) | CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) | - CP_LOAD_STATE_0_NUM_UNIT(1)); + CP_LOAD_STATE_0_NUM_UNIT(bufs)); OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) | CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); - OUT_RING(ring, A3XX_TEX_SAMP_0_XY_MAG(A3XX_TEX_NEAREST) | - A3XX_TEX_SAMP_0_XY_MIN(A3XX_TEX_NEAREST) | - A3XX_TEX_SAMP_0_WRAP_S(A3XX_TEX_CLAMP_TO_EDGE) | - A3XX_TEX_SAMP_0_WRAP_T(A3XX_TEX_CLAMP_TO_EDGE) | - A3XX_TEX_SAMP_0_WRAP_R(A3XX_TEX_REPEAT)); - OUT_RING(ring, 0x00000000); + for (i = 0; i < bufs; i++) { + OUT_RING(ring, A3XX_TEX_SAMP_0_XY_MAG(A3XX_TEX_NEAREST) | + A3XX_TEX_SAMP_0_XY_MIN(A3XX_TEX_NEAREST) | + A3XX_TEX_SAMP_0_WRAP_S(A3XX_TEX_CLAMP_TO_EDGE) | + A3XX_TEX_SAMP_0_WRAP_T(A3XX_TEX_CLAMP_TO_EDGE) | + A3XX_TEX_SAMP_0_WRAP_R(A3XX_TEX_REPEAT)); + OUT_RING(ring, 0x00000000); + } /* emit texture state: */ - OUT_PKT3(ring, CP_LOAD_STATE, 6); + OUT_PKT3(ring, CP_LOAD_STATE, 2 + 4 * bufs); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(FRAG_TEX_OFF) | CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) | - CP_LOAD_STATE_0_NUM_UNIT(1)); + CP_LOAD_STATE_0_NUM_UNIT(bufs)); OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) | CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); - OUT_RING(ring, A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(format)) | - A3XX_TEX_CONST_0_TYPE(A3XX_TEX_2D) | - fd3_tex_swiz(format, PIPE_SWIZZLE_RED, PIPE_SWIZZLE_GREEN, - PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA)); - OUT_RING(ring, A3XX_TEX_CONST_1_FETCHSIZE(TFETCH_DISABLE) | - A3XX_TEX_CONST_1_WIDTH(psurf->width) | - A3XX_TEX_CONST_1_HEIGHT(psurf->height)); - OUT_RING(ring, A3XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp) | - A3XX_TEX_CONST_2_INDX(0)); - OUT_RING(ring, 0x00000000); + for (i = 0; i < bufs; i++) { + if (!psurf[i]) { + OUT_RING(ring, A3XX_TEX_CONST_0_TYPE(A3XX_TEX_2D) | + A3XX_TEX_CONST_0_SWIZ_X(A3XX_TEX_ONE) | + A3XX_TEX_CONST_0_SWIZ_Y(A3XX_TEX_ONE) | + A3XX_TEX_CONST_0_SWIZ_Z(A3XX_TEX_ONE) | + A3XX_TEX_CONST_0_SWIZ_W(A3XX_TEX_ONE)); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, A3XX_TEX_CONST_2_INDX(BASETABLE_SZ * i)); + OUT_RING(ring, 0x00000000); + continue; + } + + struct fd_resource *rsc = fd_resource(psurf[i]->texture); + unsigned lvl = psurf[i]->u.tex.level; + struct fd_resource_slice *slice = fd_resource_slice(rsc, lvl); + enum pipe_format format = fd3_gmem_restore_format(psurf[i]->format); + + debug_assert(psurf[i]->u.tex.first_layer == psurf[i]->u.tex.last_layer); + + OUT_RING(ring, A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(format)) | + A3XX_TEX_CONST_0_TYPE(A3XX_TEX_2D) | + fd3_tex_swiz(format, PIPE_SWIZZLE_RED, PIPE_SWIZZLE_GREEN, + PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA)); + OUT_RING(ring, A3XX_TEX_CONST_1_FETCHSIZE(TFETCH_DISABLE) | + A3XX_TEX_CONST_1_WIDTH(psurf[i]->width) | + A3XX_TEX_CONST_1_HEIGHT(psurf[i]->height)); + OUT_RING(ring, A3XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp) | + A3XX_TEX_CONST_2_INDX(BASETABLE_SZ * i)); + OUT_RING(ring, 0x00000000); + } /* emit mipaddrs: */ - OUT_PKT3(ring, CP_LOAD_STATE, 3); + OUT_PKT3(ring, CP_LOAD_STATE, 2 + BASETABLE_SZ * bufs); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(BASETABLE_SZ * FRAG_TEX_OFF) | CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_MIPADDR) | - CP_LOAD_STATE_0_NUM_UNIT(1)); + CP_LOAD_STATE_0_NUM_UNIT(BASETABLE_SZ * bufs)); OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) | CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); - OUT_RELOC(ring, rsc->bo, offset, 0, 0); + for (i = 0; i < bufs; i++) { + if (psurf[i]) { + struct fd_resource *rsc = fd_resource(psurf[i]->texture); + unsigned lvl = psurf[i]->u.tex.level; + uint32_t offset = fd_resource_offset(rsc, lvl, psurf[i]->u.tex.first_layer); + OUT_RELOC(ring, rsc->bo, offset, 0, 0); + } else { + OUT_RING(ring, 0x00000000); + } + + /* pad the remaining entries w/ null: */ + for (j = 1; j < BASETABLE_SZ; j++) { + OUT_RING(ring, 0x00000000); + } + } } void @@ -570,8 +603,10 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(ctx->viewport.scale[2])); } - if (dirty & FD_DIRTY_PROG) - fd3_program_emit(ring, emit); + if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER)) { + struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + fd3_program_emit(ring, emit, pfb->nr_cbufs, pfb->cbufs); + } /* TODO we should not need this or fd_wfi() before emit_constants(): */ @@ -624,6 +659,9 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, control |= A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY); } + if (format == PIPE_FORMAT_NONE) + control &= ~A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK; + if (has_alpha) { blend_control |= blend->rb_mrt[i].blend_control_rgb; } else { diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h index ce51c0c..a438dda 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h @@ -45,7 +45,7 @@ void fd3_emit_constant(struct fd_ringbuffer *ring, const uint32_t *dwords, struct pipe_resource *prsc); void fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, - struct pipe_surface *psurf); + struct pipe_surface **psurf, int bufs); /* grouped together emit-state for prog/vertex/state emit: */ struct fd3_emit { @@ -53,7 +53,6 @@ struct fd3_emit { const struct fd_program_stateobj *prog; const struct pipe_draw_info *info; struct ir3_shader_key key; - enum pipe_format format; uint32_t dirty; uint32_t sprite_coord_enable; diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_format.h b/src/gallium/drivers/freedreno/a3xx/fd3_format.h index 6a47fda..6afc301 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_format.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_format.h @@ -42,8 +42,14 @@ uint32_t fd3_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a); static INLINE bool -fd3_half_precision(enum pipe_format format) +fd3_half_precision(const struct pipe_surface *surface) { + enum pipe_format format; + if (!surface) + return true; + + format = surface->format; + /* colors are provided in consts, which go through cov.f32f16, which will * break these values */ diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c index 304fc84..8589dd6 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c @@ -89,6 +89,8 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, } else { stride = slice->pitch * rsc->cpp; } + } else if (i < nr_bufs && bases) { + base = bases[i]; } OUT_PKT0(ring, REG_A3XX_RB_MRT_BUF_INFO(i), 2); @@ -97,7 +99,7 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(stride) | A3XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) | COND(srgb, A3XX_RB_MRT_BUF_INFO_COLOR_SRGB)); - if (bin_w || (i >= nr_bufs)) { + if (bin_w || (i >= nr_bufs) || !bufs[i]) { OUT_RING(ring, A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(base)); } else { OUT_RELOCW(ring, rsc->bo, offset, 0, -1); @@ -110,20 +112,6 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, } } -static uint32_t -depth_base(struct fd_context *ctx) -{ - struct fd_gmem_stateobj *gmem = &ctx->gmem; - struct pipe_framebuffer_state *pfb = &ctx->framebuffer; - uint32_t cpp = 4; - if (pfb->cbufs[0]) { - struct fd_resource *rsc = - fd_resource(pfb->cbufs[0]->texture); - cpp = rsc->cpp; - } - return align(gmem->bin_w * gmem->bin_h * cpp, 0x4000); -} - static bool use_hw_binning(struct fd_context *ctx) { @@ -167,7 +155,8 @@ emit_binning_workaround(struct fd_context *ctx) OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2); OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) | - A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE); + A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | + A3XX_RB_MODE_CONTROL_MRT(0)); OUT_RING(ring, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(32) | A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE | A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER)); @@ -189,7 +178,7 @@ emit_binning_workaround(struct fd_context *ctx) A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | A3XX_GRAS_SC_CONTROL_RASTER_MODE(1)); - fd3_program_emit(ring, &emit); + fd3_program_emit(ring, &emit, 0, NULL); fd3_emit_vertex_bufs(ring, &emit); OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 4); @@ -338,15 +327,14 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile) struct fd3_context *fd3_ctx = fd3_context(ctx); struct fd_ringbuffer *ring = ctx->ring; struct pipe_framebuffer_state *pfb = &ctx->framebuffer; - enum pipe_format format = pipe_surface_format(pfb->cbufs[0]); struct fd3_emit emit = { .vtx = &fd3_ctx->solid_vbuf_state, .prog = &ctx->solid_prog, .key = { - .half_precision = fd3_half_precision(format), + .half_precision = true, }, - .format = format, }; + int i; OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1); OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER)); @@ -388,7 +376,8 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile) OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) | - A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE); + A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | + A3XX_RB_MODE_CONTROL_MRT(0)); OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1); OUT_RING(ring, A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE | @@ -419,21 +408,28 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile) OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */ OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */ - fd3_program_emit(ring, &emit); + fd3_program_emit(ring, &emit, 0, NULL); fd3_emit_vertex_bufs(ring, &emit); - if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { - uint32_t base = depth_base(ctx); - emit_gmem2mem_surf(ctx, RB_COPY_DEPTH_STENCIL, base, pfb->zsbuf); - } + if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) + emit_gmem2mem_surf(ctx, RB_COPY_DEPTH_STENCIL, + ctx->gmem.zsbuf_base, pfb->zsbuf); if (ctx->resolve & FD_BUFFER_COLOR) { - emit_gmem2mem_surf(ctx, RB_COPY_RESOLVE, 0, pfb->cbufs[0]); + for (i = 0; i < pfb->nr_cbufs; i++) { + if (!pfb->cbufs[i]) + continue; + if (!(ctx->resolve & (PIPE_CLEAR_COLOR0 << i))) + continue; + emit_gmem2mem_surf(ctx, RB_COPY_RESOLVE, + ctx->gmem.cbuf_base[i], pfb->cbufs[i]); + } } OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | - A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE); + A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | + A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1)); OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | @@ -444,14 +440,24 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile) /* transfer from system memory to gmem */ static void -emit_mem2gmem_surf(struct fd_context *ctx, uint32_t base, - struct pipe_surface *psurf, uint32_t bin_w) +emit_mem2gmem_surf(struct fd_context *ctx, uint32_t bases[], + struct pipe_surface **psurf, uint32_t bufs, uint32_t bin_w) { struct fd_ringbuffer *ring = ctx->ring; - emit_mrt(ring, 1, &psurf, &base, bin_w); + assert(bufs > 0); + + emit_mrt(ring, bufs, psurf, bases, bin_w); + + OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); + OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | + A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | + A3XX_RB_MODE_CONTROL_MRT(bufs - 1)); + + OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1); + OUT_RING(ring, A3XX_SP_FS_OUTPUT_REG_MRT(bufs - 1)); - fd3_emit_gmem_restore_tex(ring, psurf); + fd3_emit_gmem_restore_tex(ring, psurf, bufs); fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, DI_SRC_SEL_AUTO_INDEX, 2, 0, INDEX_SIZE_IGN, 0, 0, NULL); @@ -464,15 +470,17 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) struct fd_gmem_stateobj *gmem = &ctx->gmem; struct fd_ringbuffer *ring = ctx->ring; struct pipe_framebuffer_state *pfb = &ctx->framebuffer; - enum pipe_format format = pipe_surface_format(pfb->cbufs[0]); struct fd3_emit emit = { .vtx = &fd3_ctx->blit_vbuf_state, - .prog = &ctx->blit_prog[0], .sprite_coord_enable = 1, + /* NOTE: They all use the same VP, this is for vtx bufs. */ + .prog = &ctx->blit_prog[0], .key = { - .half_precision = fd3_half_precision(format), + .half_precision = (fd3_half_precision(pfb->cbufs[0]) && + fd3_half_precision(pfb->cbufs[1]) && + fd3_half_precision(pfb->cbufs[2]) && + fd3_half_precision(pfb->cbufs[3])), }, - .format = format, }; float x0, y0, x1, y1; unsigned bin_w = tile->bin_w; @@ -515,6 +523,10 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1); OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_LESS)); + OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2); + OUT_RING(ring, 0); + OUT_RING(ring, 0); + OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1); OUT_RING(ring, A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER); /* GRAS_CL_CLIP_CNTL */ @@ -567,7 +579,6 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */ OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */ - fd3_program_emit(ring, &emit); fd3_emit_vertex_bufs(ring, &emit); /* for gmem pitch/base calculations, we need to use the non- @@ -576,16 +587,27 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) bin_w = gmem->bin_w; bin_h = gmem->bin_h; - if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) - emit_mem2gmem_surf(ctx, depth_base(ctx), pfb->zsbuf, bin_w); + if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_COLOR)) { + emit.prog = &ctx->blit_prog[pfb->nr_cbufs - 1]; + fd3_program_emit(ring, &emit, pfb->nr_cbufs, pfb->cbufs); + emit_mem2gmem_surf(ctx, gmem->cbuf_base, pfb->cbufs, pfb->nr_cbufs, bin_w); + } - if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_COLOR)) - emit_mem2gmem_surf(ctx, 0, pfb->cbufs[0], bin_w); + if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { + emit.prog = &ctx->blit_prog[0]; + fd3_program_emit(ring, &emit, 1, &pfb->zsbuf); + emit_mem2gmem_surf(ctx, &gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w); + } OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | A3XX_GRAS_SC_CONTROL_RASTER_MODE(0)); + + OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); + OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | + A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | + A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1)); } static void @@ -617,12 +639,13 @@ fd3_emit_sysmem_prep(struct fd_context *ctx) { struct pipe_framebuffer_state *pfb = &ctx->framebuffer; struct fd_ringbuffer *ring = ctx->ring; - uint32_t pitch = 0; + uint32_t i, pitch = 0; - if (pfb->cbufs[0]) { - struct pipe_surface *psurf = pfb->cbufs[0]; - unsigned lvl = psurf->u.tex.level; - pitch = fd_resource(psurf->texture)->slices[lvl].pitch; + for (i = 0; i < pfb->nr_cbufs; i++) { + struct pipe_surface *psurf = pfb->cbufs[i]; + if (!psurf) + continue; + pitch = fd_resource(psurf->texture)->slices[psurf->u.tex.level].pitch; } fd3_emit_restore(ctx); @@ -647,7 +670,8 @@ fd3_emit_sysmem_prep(struct fd_context *ctx) OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | A3XX_RB_MODE_CONTROL_GMEM_BYPASS | - A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE); + A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | + A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1)); patch_draws(ctx, IGNORE_VISIBILITY); patch_rbrc(ctx, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(pitch)); @@ -734,7 +758,8 @@ emit_binning_pass(struct fd_context *ctx) OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_TILING_PASS) | - A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE); + A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | + A3XX_RB_MODE_CONTROL_MRT(0)); for (i = 0; i < 4; i++) { OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1); @@ -774,7 +799,8 @@ emit_binning_pass(struct fd_context *ctx) OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2); OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | - A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE); + A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | + A3XX_RB_MODE_CONTROL_MRT(pfb->nr_cbufs - 1)); OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ENABLE_GMEM | A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) | A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w)); @@ -848,21 +874,6 @@ fd3_emit_tile_prep(struct fd_context *ctx, struct fd_tile *tile) { struct fd_ringbuffer *ring = ctx->ring; struct pipe_framebuffer_state *pfb = &ctx->framebuffer; - struct fd_gmem_stateobj *gmem = &ctx->gmem; - uint32_t reg; - - OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2); - reg = A3XX_RB_DEPTH_INFO_DEPTH_BASE(depth_base(ctx)); - if (pfb->zsbuf) { - reg |= A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format)); - } - OUT_RING(ring, reg); - if (pfb->zsbuf) { - uint32_t cpp = util_format_get_blocksize(pfb->zsbuf->format); - OUT_RING(ring, A3XX_RB_DEPTH_PITCH(cpp * gmem->bin_w)); - } else { - OUT_RING(ring, 0x00000000); - } if (ctx->needs_rb_fbd) { fd_wfi(ctx, ring); @@ -874,7 +885,8 @@ fd3_emit_tile_prep(struct fd_context *ctx, struct fd_tile *tile) OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | - A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE); + A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | + A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1)); } /* before IB to rendering cmds: */ @@ -891,6 +903,21 @@ fd3_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile) uint32_t x2 = tile->xoff + tile->bin_w - 1; uint32_t y2 = tile->yoff + tile->bin_h - 1; + uint32_t reg; + + OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2); + reg = A3XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base); + if (pfb->zsbuf) { + reg |= A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format)); + } + OUT_RING(ring, reg); + if (pfb->zsbuf) { + uint32_t cpp = util_format_get_blocksize(pfb->zsbuf->format); + OUT_RING(ring, A3XX_RB_DEPTH_PITCH(cpp * gmem->bin_w)); + } else { + OUT_RING(ring, 0x00000000); + } + if (use_hw_binning(ctx)) { struct fd_vsc_pipe *pipe = &ctx->pipe[tile->p]; @@ -918,7 +945,7 @@ fd3_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile) OUT_RING(ring, CP_SET_BIN_1_X1(x1) | CP_SET_BIN_1_Y1(y1)); OUT_RING(ring, CP_SET_BIN_2_X2(x2) | CP_SET_BIN_2_Y2(y2)); - emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, gmem->bin_w); + emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem->cbuf_base, gmem->bin_w); /* setup scissor/offset for current tile: */ OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1); diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c index 442b47d..4581a6b 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c @@ -31,8 +31,6 @@ #include "util/u_memory.h" #include "util/u_inlines.h" #include "util/u_format.h" -#include "tgsi/tgsi_dump.h" -#include "tgsi/tgsi_parse.h" #include "freedreno_program.h" @@ -127,13 +125,14 @@ emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so) } void -fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit) +fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, + int nr, struct pipe_surface **bufs) { const struct ir3_shader_variant *vp, *fp; const struct ir3_info *vsi, *fsi; enum a3xx_instrbuffermode fpbuffer, vpbuffer; uint32_t fpbuffersz, vpbuffersz, fsoff; - uint32_t pos_regid, posz_regid, psize_regid, color_regid; + uint32_t pos_regid, posz_regid, psize_regid, color_regid[4] = {0}; int constmode; int i, j, k; @@ -199,11 +198,26 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit) ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0)); psize_regid = ir3_find_output_regid(vp, ir3_semantic_name(TGSI_SEMANTIC_PSIZE, 0)); - color_regid = ir3_find_output_regid(fp, - ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0)); + if (fp->color0_mrt) { + color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] = + ir3_find_output_regid(fp, ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0)); + } else { + for (int i = 0; i < fp->outputs_count; i++) { + ir3_semantic sem = fp->outputs[i].semantic; + unsigned idx = sem2idx(sem); + if (sem2name(sem) != TGSI_SEMANTIC_COLOR) + continue; + assert(idx < 4); + color_regid[idx] = fp->outputs[i].regid; + } + } - if (util_format_is_alpha(emit->format)) - color_regid += 3; + /* adjust regids for alpha output formats. there is no alpha render + * format, so it's just treated like red + */ + for (i = 0; i < nr; i++) + if (util_format_is_alpha(pipe_surface_format(bufs[i]))) + color_regid[i] += 3; /* we could probably divide this up into things that need to be * emitted if frag-prog is dirty vs if vert-prog is dirty.. @@ -345,21 +359,23 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit) } OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1); - if (fp->writes_pos) { - OUT_RING(ring, A3XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE | - A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid)); - } else { - OUT_RING(ring, 0x00000000); - } + OUT_RING(ring, + COND(fp->writes_pos, A3XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE) | + A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid) | + A3XX_SP_FS_OUTPUT_REG_MRT(MAX2(1, nr) - 1)); OUT_PKT0(ring, REG_A3XX_SP_FS_MRT_REG(0), 4); - OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(color_regid) | - COND(fp->key.half_precision, A3XX_SP_FS_MRT_REG_HALF_PRECISION) | - COND(util_format_is_pure_uint(emit->format), A3XX_SP_FS_MRT_REG_UINT) | - COND(util_format_is_pure_sint(emit->format), A3XX_SP_FS_MRT_REG_SINT)); - OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0)); - OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0)); - OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0)); + for (i = 0; i < 4; i++) { + uint32_t mrt_reg = A3XX_SP_FS_MRT_REG_REGID(color_regid[i]) | + COND(fp->key.half_precision, A3XX_SP_FS_MRT_REG_HALF_PRECISION); + + if (i < nr) { + enum pipe_format fmt = pipe_surface_format(bufs[i]); + mrt_reg |= COND(util_format_is_pure_uint(fmt), A3XX_SP_FS_MRT_REG_UINT) | + COND(util_format_is_pure_sint(fmt), A3XX_SP_FS_MRT_REG_SINT); + } + OUT_RING(ring, mrt_reg); + } if (emit->key.binning_pass) { OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2); diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.h b/src/gallium/drivers/freedreno/a3xx/fd3_program.h index 0313b77..52c8080 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.h @@ -39,7 +39,8 @@ struct fd3_shader_stateobj { struct fd3_emit; -void fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit); +void fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, + int nr, struct pipe_surface **bufs); void fd3_prog_init(struct pipe_context *pctx); diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_screen.c b/src/gallium/drivers/freedreno/a3xx/fd3_screen.c index 182db84..3497921 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_screen.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_screen.c @@ -103,7 +103,7 @@ fd3_screen_is_format_supported(struct pipe_screen *pscreen, void fd3_screen_init(struct pipe_screen *pscreen) { - fd_screen(pscreen)->max_rts = 1; + fd_screen(pscreen)->max_rts = 4; pscreen->context_create = fd3_context_create; pscreen->is_format_supported = fd3_screen_is_format_supported; } From jekstrand at kemper.freedesktop.org Thu Apr 2 17:21:45 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Thu, 2 Apr 2015 10:21:45 -0700 (PDT) Subject: Mesa (master): nir/print: Correctly print swizzles for explicitly sized alu sources Message-ID: <20150402172145.924FA76338@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 0573d0e4845803969634c975355bbf55651dde19 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=0573d0e4845803969634c975355bbf55651dde19 Author: Jason Ekstrand Date: Wed Apr 1 16:16:52 2015 -0700 nir/print: Correctly print swizzles for explicitly sized alu sources Reviewed-by: Connor Abbott --- src/glsl/nir/nir_print.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c index 53fada8..fb8c934 100644 --- a/src/glsl/nir/nir_print.c +++ b/src/glsl/nir/nir_print.c @@ -137,21 +137,21 @@ print_dest(nir_dest *dest, FILE *fp) } static void -print_alu_src(nir_alu_src *src, uint8_t read_mask, FILE *fp) +print_alu_src(nir_alu_instr *instr, unsigned src, FILE *fp) { - if (src->negate) + if (instr->src[src].negate) fprintf(fp, "-"); - if (src->abs) + if (instr->src[src].abs) fprintf(fp, "abs("); - print_src(&src->src, fp); + print_src(&instr->src[src].src, fp); bool print_swizzle = false; for (unsigned i = 0; i < 4; i++) { - if (read_mask >> i == 0) - break; + if (!nir_alu_instr_channel_used(instr, src, i)) + continue; - if (src->swizzle[i] != i) { + if (instr->src[src].swizzle[i] != i) { print_swizzle = true; break; } @@ -160,14 +160,14 @@ print_alu_src(nir_alu_src *src, uint8_t read_mask, FILE *fp) if (print_swizzle) { fprintf(fp, "."); for (unsigned i = 0; i < 4; i++) { - if (read_mask >> i == 0) - break; + if (!nir_alu_instr_channel_used(instr, src, i)) + continue; - fprintf(fp, "%c", "xyzw"[src->swizzle[i]]); + fprintf(fp, "%c", "xyzw"[instr->src[src].swizzle[i]]); } } - if (src->abs) + if (instr->src[src].abs) fprintf(fp, ")"); } @@ -201,7 +201,7 @@ print_alu_instr(nir_alu_instr *instr, FILE *fp) if (i != 0) fprintf(fp, ", "); - print_alu_src(&instr->src[i], instr->dest.write_mask, fp); + print_alu_src(instr, i, fp); } } From jekstrand at kemper.freedesktop.org Thu Apr 2 17:21:45 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Thu, 2 Apr 2015 10:21:45 -0700 (PDT) Subject: Mesa (master): i965/generator: Get rid of the ! in the unreachable statement Message-ID: <20150402172145.A182976338@kemper.freedesktop.org> Module: Mesa Branch: master Commit: e50cf5faa5709eaeea1da8759f13b140b4b3cea1 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e50cf5faa5709eaeea1da8759f13b140b4b3cea1 Author: Jason Ekstrand Date: Wed Apr 1 16:18:31 2015 -0700 i965/generator: Get rid of the ! in the unreachable statement Reviewed-by: Mark Janes --- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index bd12147..40e51aa 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -1602,7 +1602,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED); break; default: - unreachable(!"Invalid instruction width"); + unreachable("Invalid instruction width"); } switch (inst->opcode) { From anholt at kemper.freedesktop.org Thu Apr 2 17:33:03 2015 From: anholt at kemper.freedesktop.org (Eric Anholt) Date: Thu, 2 Apr 2015 10:33:03 -0700 (PDT) Subject: Mesa (master): vc4: Add support for nir_iabs. Message-ID: <20150402173303.034EF76338@kemper.freedesktop.org> Module: Mesa Branch: master Commit: a9152376b49d8c56debb8023cc6e93d9c071d293 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a9152376b49d8c56debb8023cc6e93d9c071d293 Author: Eric Anholt Date: Wed Apr 1 15:19:38 2015 -0700 vc4: Add support for nir_iabs. Tested using the GLSL 1.30 tests for integer abs(). Not currently used, but it was one of the new opcodes used by robclark's idiv lowering. --- src/gallium/drivers/vc4/vc4_program.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 5ed2165..bcceb3c 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -1069,9 +1069,14 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr) case nir_op_fsign: *dest = ntq_fsign(c, src[0]); break; + case nir_op_fabs: *dest = qir_FMAXABS(c, src[0], src[0]); break; + case nir_op_iabs: + *dest = qir_MAX(c, src[0], + qir_SUB(c, qir_uniform_ui(c, 0), src[0])); + break; default: fprintf(stderr, "unknown NIR ALU inst: "); From imirkin at kemper.freedesktop.org Thu Apr 2 17:47:00 2015 From: imirkin at kemper.freedesktop.org (Ilia Mirkin) Date: Thu, 2 Apr 2015 10:47:00 -0700 (PDT) Subject: Mesa (master): mesa: add ARB_depth_buffer_float to ES3.0 required extension list Message-ID: <20150402174700.645ED76338@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 4609ba6ea3487dd3440596c460062f03526d335c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4609ba6ea3487dd3440596c460062f03526d335c Author: Ilia Mirkin Date: Wed Apr 1 16:19:09 2015 -0400 mesa: add ARB_depth_buffer_float to ES3.0 required extension list Signed-off-by: Ilia Mirkin Reviewed-by: Matt Turner --- src/mesa/main/version.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/main/version.c b/src/mesa/main/version.c index 8e0c3ef..7c6d994 100644 --- a/src/mesa/main/version.c +++ b/src/mesa/main/version.c @@ -382,6 +382,7 @@ compute_version_es2(const struct gl_extensions *extensions) extensions->ARB_shader_texture_lod && extensions->ARB_texture_float && extensions->ARB_texture_rg && + extensions->ARB_depth_buffer_float && extensions->EXT_draw_buffers2 && /* extensions->ARB_framebuffer_object && */ extensions->EXT_framebuffer_sRGB && From jekstrand at kemper.freedesktop.org Thu Apr 2 18:09:57 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Thu, 2 Apr 2015 11:09:57 -0700 (PDT) Subject: Mesa (master): nir/opt_peephole_ffma: Fix a couple typos in a comment Message-ID: <20150402180957.24B4176338@kemper.freedesktop.org> Module: Mesa Branch: master Commit: ca3b4d6d17a0f95b287e87888c9d893be94f0301 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ca3b4d6d17a0f95b287e87888c9d893be94f0301 Author: Jason Ekstrand Date: Thu Apr 2 10:42:12 2015 -0700 nir/opt_peephole_ffma: Fix a couple typos in a comment Acked-by: Matt Turner --- src/glsl/nir/nir_opt_peephole_ffma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/glsl/nir/nir_opt_peephole_ffma.c b/src/glsl/nir/nir_opt_peephole_ffma.c index bf4dbe1..9d5646f 100644 --- a/src/glsl/nir/nir_opt_peephole_ffma.c +++ b/src/glsl/nir/nir_opt_peephole_ffma.c @@ -101,8 +101,8 @@ get_mul_for_src(nir_alu_src *src, uint8_t swizzle[4], bool *negate, bool *abs) break; case nir_op_fmul: - /* Only absorbe a fmul into a ffma if the fmul is is only used in fadd - * operations. This prevents us from being too agressive with our + /* Only absorb a fmul into a ffma if the fmul is is only used in fadd + * operations. This prevents us from being too aggressive with our * fusing which can actually lead to more instructions. */ if (!are_all_uses_fadd(&alu->dest.dest.ssa)) From kwg at kemper.freedesktop.org Thu Apr 2 22:26:13 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Thu, 2 Apr 2015 15:26:13 -0700 (PDT) Subject: Mesa (master): ralloc: Implement a new ralloc_adopt() API. Message-ID: <20150402222613.D14D076338@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 33f0f68d590a460f84a0df0de10f29c4a582d7e7 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=33f0f68d590a460f84a0df0de10f29c4a582d7e7 Author: Kenneth Graunke Date: Fri Mar 27 19:24:33 2015 -0700 ralloc: Implement a new ralloc_adopt() API. ralloc_adopt() reparents all children from one context to another. Conceptually, ralloc_adopt(new_ctx, old_ctx) behaves like this pseudocode: foreach child of old_ctx: ralloc_steal(new_ctx, child) However, ralloc provides no way to iterate over a memory context's children, and ralloc_adopt does this task more efficiently anyway. One potential use of this is to implement a memory-sweeper pass: first, steal all of a context's memory to a temporary context. Then, walk over anything that should be kept, and ralloc_steal it back to the original context. Finally, free the temporary context. This works when the context is something that can't be freed (i.e. an important structure). Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand --- src/util/ralloc.c | 26 ++++++++++++++++++++++++++ src/util/ralloc.h | 7 +++++++ 2 files changed, 33 insertions(+) diff --git a/src/util/ralloc.c b/src/util/ralloc.c index 36bc61f..01719c8 100644 --- a/src/util/ralloc.c +++ b/src/util/ralloc.c @@ -271,6 +271,32 @@ ralloc_steal(const void *new_ctx, void *ptr) add_child(parent, info); } +void +ralloc_adopt(const void *new_ctx, void *old_ctx) +{ + ralloc_header *new_info, *old_info, *child; + + if (unlikely(old_ctx == NULL)) + return; + + old_info = get_header(old_ctx); + new_info = get_header(new_ctx); + + /* If there are no children, bail. */ + if (unlikely(old_info->child == NULL)) + return; + + /* Set all the children's parent to new_ctx; get a pointer to the last child. */ + for (child = old_info->child; child->next != NULL; child = child->next) { + child->parent = new_info; + } + + /* Connect the two lists together; parent them to new_ctx; make old_ctx empty. */ + child->next = new_info->child; + new_info->child = old_info->child; + old_info->child = NULL; +} + void * ralloc_parent(const void *ptr) { diff --git a/src/util/ralloc.h b/src/util/ralloc.h index f088a36..01f102b 100644 --- a/src/util/ralloc.h +++ b/src/util/ralloc.h @@ -235,6 +235,13 @@ void ralloc_free(void *ptr); void ralloc_steal(const void *new_ctx, void *ptr); /** + * Reparent all children from one context to another. + * + * This effectively calls ralloc_steal(new_ctx, child) for all children of \p old_ctx. + */ +void ralloc_adopt(const void *new_ctx, void *old_ctx); + +/** * Return the given pointer's ralloc context. */ void *ralloc_parent(const void *ptr); From kwg at kemper.freedesktop.org Thu Apr 2 22:26:13 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Thu, 2 Apr 2015 15:26:13 -0700 (PDT) Subject: Mesa (master): nir: Combine remove_dead_local_vars() and remove_dead_global_vars(). Message-ID: <20150402222613.DE09076338@kemper.freedesktop.org> Module: Mesa Branch: master Commit: f4e449108060dcaea1b4e1e445b76a8ef43d3a05 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f4e449108060dcaea1b4e1e445b76a8ef43d3a05 Author: Kenneth Graunke Date: Fri Mar 27 16:17:20 2015 -0700 nir: Combine remove_dead_local_vars() and remove_dead_global_vars(). We can just pass a pointer to the list of variables, and reuse the code. Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand --- src/glsl/nir/nir_remove_dead_variables.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/src/glsl/nir/nir_remove_dead_variables.c b/src/glsl/nir/nir_remove_dead_variables.c index e7f8aea..baa321e 100644 --- a/src/glsl/nir/nir_remove_dead_variables.c +++ b/src/glsl/nir/nir_remove_dead_variables.c @@ -98,19 +98,9 @@ add_var_use_shader(nir_shader *shader, struct set *live) } static void -remove_dead_local_vars(nir_function_impl *impl, struct set *live) +remove_dead_vars(struct exec_list *var_list, struct set *live) { - foreach_list_typed_safe(nir_variable, var, node, &impl->locals) { - struct set_entry *entry = _mesa_set_search(live, var); - if (entry == NULL) - exec_node_remove(&var->node); - } -} - -static void -remove_dead_global_vars(nir_shader *shader, struct set *live) -{ - foreach_list_typed_safe(nir_variable, var, node, &shader->globals) { + foreach_list_typed_safe(nir_variable, var, node, var_list) { struct set_entry *entry = _mesa_set_search(live, var); if (entry == NULL) exec_node_remove(&var->node); @@ -125,11 +115,11 @@ nir_remove_dead_variables(nir_shader *shader) add_var_use_shader(shader, live); - remove_dead_global_vars(shader, live); + remove_dead_vars(&shader->globals, live); nir_foreach_overload(shader, overload) { if (overload->impl) - remove_dead_local_vars(overload->impl, live); + remove_dead_vars(&overload->impl->locals, live); } _mesa_set_destroy(live, NULL); From kwg at kemper.freedesktop.org Thu Apr 2 22:26:13 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Thu, 2 Apr 2015 15:26:13 -0700 (PDT) Subject: Mesa (master): nir: Free dead variables when removing them. Message-ID: <20150402222613.EBA3176338@kemper.freedesktop.org> Module: Mesa Branch: master Commit: f61b6c3e48071991c098aa588ee86473f419d5c0 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f61b6c3e48071991c098aa588ee86473f419d5c0 Author: Kenneth Graunke Date: Fri Mar 27 16:19:27 2015 -0700 nir: Free dead variables when removing them. Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand --- src/glsl/nir/nir_remove_dead_variables.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/glsl/nir/nir_remove_dead_variables.c b/src/glsl/nir/nir_remove_dead_variables.c index baa321e..4417e2a 100644 --- a/src/glsl/nir/nir_remove_dead_variables.c +++ b/src/glsl/nir/nir_remove_dead_variables.c @@ -102,8 +102,10 @@ remove_dead_vars(struct exec_list *var_list, struct set *live) { foreach_list_typed_safe(nir_variable, var, node, var_list) { struct set_entry *entry = _mesa_set_search(live, var); - if (entry == NULL) + if (entry == NULL) { exec_node_remove(&var->node); + ralloc_free(var); + } } } From kwg at kemper.freedesktop.org Thu Apr 2 22:26:14 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Thu, 2 Apr 2015 15:26:14 -0700 (PDT) Subject: Mesa (master): nir: Allocate register fields out of the register itself. Message-ID: <20150402222614.17B4676338@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 131444e1c5e08cbac4694489110ab53c9c07816d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=131444e1c5e08cbac4694489110ab53c9c07816d Author: Kenneth Graunke Date: Fri Mar 27 20:21:59 2015 -0700 nir: Allocate register fields out of the register itself. The lifetime of each register's use/def/if_use sets needs to match the register itself. So, allocate them using the register itself as the context. Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand --- src/glsl/nir/nir.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c index 0311d8d..e96f113 100644 --- a/src/glsl/nir/nir.c +++ b/src/glsl/nir/nir.c @@ -58,11 +58,11 @@ reg_create(void *mem_ctx, struct exec_list *list) nir_register *reg = ralloc(mem_ctx, nir_register); reg->parent_instr = NULL; - reg->uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer, + reg->uses = _mesa_set_create(reg, _mesa_hash_pointer, _mesa_key_pointer_equal); - reg->defs = _mesa_set_create(mem_ctx, _mesa_hash_pointer, + reg->defs = _mesa_set_create(reg, _mesa_hash_pointer, _mesa_key_pointer_equal); - reg->if_uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer, + reg->if_uses = _mesa_set_create(reg, _mesa_hash_pointer, _mesa_key_pointer_equal); reg->num_components = 0; From kwg at kemper.freedesktop.org Thu Apr 2 22:26:14 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Thu, 2 Apr 2015 15:26:14 -0700 (PDT) Subject: Mesa (master): nir: Allocate nir_tex_instr:: sources out of the instruction itself. Message-ID: <20150402222614.2E8A376338@kemper.freedesktop.org> Module: Mesa Branch: master Commit: da5ec2ac0bc20b52fefe59081efcdb9b3989f6a7 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=da5ec2ac0bc20b52fefe59081efcdb9b3989f6a7 Author: Kenneth Graunke Date: Sat Mar 28 00:02:37 2015 -0700 nir: Allocate nir_tex_instr::sources out of the instruction itself. The lifetime of the sources array needs to be match the nir_tex_instr itself. So, allocate it using the instruction itself as the context. Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand --- src/glsl/nir/nir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c index 73d3008..5f86eca 100644 --- a/src/glsl/nir/nir.c +++ b/src/glsl/nir/nir.c @@ -460,7 +460,7 @@ nir_tex_instr_create(void *mem_ctx, unsigned num_srcs) dest_init(&instr->dest); instr->num_srcs = num_srcs; - instr->src = ralloc_array(mem_ctx, nir_tex_src, num_srcs); + instr->src = ralloc_array(instr, nir_tex_src, num_srcs); for (unsigned i = 0; i < num_srcs; i++) src_init(&instr->src[i].src); From kwg at kemper.freedesktop.org Thu Apr 2 22:26:14 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Thu, 2 Apr 2015 15:26:14 -0700 (PDT) Subject: Mesa (master): nir: Allocate predecessor and dominance frontier sets from block itself. Message-ID: <20150402222614.236A776338@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 7380c641b116a47d5729c553dcf3ed7143e877cc URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7380c641b116a47d5729c553dcf3ed7143e877cc Author: Kenneth Graunke Date: Fri Mar 27 21:29:07 2015 -0700 nir: Allocate predecessor and dominance frontier sets from block itself. These sets are part of the block, and their lifetime needs to match the block itself. So, allocate them using the block itself as the context. Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand --- src/glsl/nir/nir.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c index e96f113..73d3008 100644 --- a/src/glsl/nir/nir.c +++ b/src/glsl/nir/nir.c @@ -285,10 +285,10 @@ nir_block_create(void *mem_ctx) cf_init(&block->cf_node, nir_cf_node_block); block->successors[0] = block->successors[1] = NULL; - block->predecessors = _mesa_set_create(mem_ctx, _mesa_hash_pointer, + block->predecessors = _mesa_set_create(block, _mesa_hash_pointer, _mesa_key_pointer_equal); block->imm_dom = NULL; - block->dom_frontier = _mesa_set_create(mem_ctx, _mesa_hash_pointer, + block->dom_frontier = _mesa_set_create(block, _mesa_hash_pointer, _mesa_key_pointer_equal); exec_list_make_empty(&block->instr_list); From kwg at kemper.freedesktop.org Thu Apr 2 22:26:14 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Thu, 2 Apr 2015 15:26:14 -0700 (PDT) Subject: Mesa (master): nir: Make nir_create_function() strdup the function name. Message-ID: <20150402222614.0A05976338@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 587b3a20a1d3201467adf90e66b53b9843b2cc0a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=587b3a20a1d3201467adf90e66b53b9843b2cc0a Author: Kenneth Graunke Date: Fri Mar 27 19:23:36 2015 -0700 nir: Make nir_create_function() strdup the function name. glsl_to_nir passes in the ir_function's name field; we were copying the pointer, but not duplicating the memory. We want to be able to free the linked GLSL IR program after translating to NIR, so we'll need to create a copy of the function name that the NIR shader actually owns. Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand --- src/glsl/nir/nir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c index 6459d51..0311d8d 100644 --- a/src/glsl/nir/nir.c +++ b/src/glsl/nir/nir.c @@ -108,7 +108,7 @@ nir_function_create(nir_shader *shader, const char *name) exec_list_push_tail(&shader->functions, &func->node); exec_list_make_empty(&func->overload_list); - func->name = name; + func->name = ralloc_strdup(func, name); func->shader = shader; return func; From imirkin at kemper.freedesktop.org Thu Apr 2 22:37:45 2015 From: imirkin at kemper.freedesktop.org (Ilia Mirkin) Date: Thu, 2 Apr 2015 15:37:45 -0700 (PDT) Subject: Mesa (master): nv50/ir: fix imad emission when dst == src2 Message-ID: <20150402223745.9E59D76338@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 603d28f32c1083921ea9d54a0a606dd832e44aaa URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=603d28f32c1083921ea9d54a0a606dd832e44aaa Author: Ilia Mirkin Date: Thu Apr 2 18:33:55 2015 -0400 nv50/ir: fix imad emission when dst == src2 Commit fb63df22151f added 4-byte mad support, but only supported emission for floats. Disable it for ints for now. Signed-off-by: Ilia Mirkin --- src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp index b1e7409..1bfc8e3 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp @@ -1942,7 +1942,7 @@ CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const // check constraints on short MAD if (info.srcNr >= 2 && i->srcExists(2)) { - if (!i->defExists(0) || + if (!i->defExists(0) || !isFloatType(i->dType) || i->def(0).rep()->reg.data.id != i->src(2).rep()->reg.data.id) return 8; } From imirkin at kemper.freedesktop.org Thu Apr 2 22:49:39 2015 From: imirkin at kemper.freedesktop.org (Ilia Mirkin) Date: Thu, 2 Apr 2015 15:49:39 -0700 (PDT) Subject: Mesa (master): nv50/ir: avoid folding immediates into imad operations Message-ID: <20150402224939.DB35D76338@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 01d3b750b3682f3774f1bd01fa07a6b3c8baf28e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=01d3b750b3682f3774f1bd01fa07a6b3c8baf28e Author: Ilia Mirkin Date: Thu Apr 2 18:42:31 2015 -0400 nv50/ir: avoid folding immediates into imad operations Commit 09ee907266 added logic to fold immediates into mad operations, but the emission code is only there for fmad. Only allow it on float types. Signed-off-by: Ilia Mirkin --- src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index dc048e6..87d9589 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -2315,7 +2315,8 @@ NV50PostRaConstantFolding::visit(BasicBlock *bb) i->src(0).getFile() != FILE_GPR || i->src(1).getFile() != FILE_GPR || i->src(2).getFile() != FILE_GPR || - i->getDef(0)->reg.data.id != i->getSrc(2)->reg.data.id) + i->getDef(0)->reg.data.id != i->getSrc(2)->reg.data.id || + !isFloatType(i->dType)) break; def = i->getSrc(1)->getInsn(); From cworth at kemper.freedesktop.org Fri Apr 3 05:19:00 2015 From: cworth at kemper.freedesktop.org (Carl Worth) Date: Thu, 2 Apr 2015 22:19:00 -0700 (PDT) Subject: Mesa (master): i965: Split out per-stage dirty-bit checking into separate functions Message-ID: <20150403051900.CC323760E6@kemper.freedesktop.org> Module: Mesa Branch: master Commit: a57672f18deca3060eab129a77cf84f5e420bac8 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a57672f18deca3060eab129a77cf84f5e420bac8 Author: Carl Worth Date: Fri Mar 20 12:01:33 2015 -0700 i965: Split out per-stage dirty-bit checking into separate functions The dirty-bit checking from each brw_upload__prog function is split out into its a new brw__state_dirty function. This commit is intended to have no functional change. It exists in preparation for some upcoming code movement in preparation for the shader cache. Reviewed-by: Ian Romanick Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_ff_gs.c | 16 +++++++++----- src/mesa/drivers/dri/i965/brw_gs.c | 16 +++++++++----- src/mesa/drivers/dri/i965/brw_vs.c | 24 +++++++++++++-------- src/mesa/drivers/dri/i965/brw_wm.c | 38 +++++++++++++++++++-------------- 4 files changed, 59 insertions(+), 35 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_ff_gs.c b/src/mesa/drivers/dri/i965/brw_ff_gs.c index 14ae4c1..b3cbbe8 100644 --- a/src/mesa/drivers/dri/i965/brw_ff_gs.c +++ b/src/mesa/drivers/dri/i965/brw_ff_gs.c @@ -147,6 +147,16 @@ static void compile_ff_gs_prog(struct brw_context *brw, ralloc_free(mem_ctx); } +static bool +brw_ff_gs_state_dirty(struct brw_context *brw) +{ + return brw_state_dirty(brw, + _NEW_LIGHT, + BRW_NEW_PRIMITIVE | + BRW_NEW_TRANSFORM_FEEDBACK | + BRW_NEW_VS_PROG_DATA); +} + static void brw_ff_gs_populate_key(struct brw_context *brw, struct brw_ff_gs_prog_key *key) @@ -227,11 +237,7 @@ brw_upload_ff_gs_prog(struct brw_context *brw) { struct brw_ff_gs_prog_key key; - if (!brw_state_dirty(brw, - _NEW_LIGHT, - BRW_NEW_PRIMITIVE | - BRW_NEW_TRANSFORM_FEEDBACK | - BRW_NEW_VS_PROG_DATA)) + if (!brw_ff_gs_state_dirty(brw)) return; /* Populate the key: diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index e233049..6f7f129 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -288,6 +288,16 @@ do_gs_prog(struct brw_context *brw, return true; } +static bool +brw_gs_state_dirty(struct brw_context *brw) +{ + return brw_state_dirty(brw, + _NEW_TEXTURE, + BRW_NEW_GEOMETRY_PROGRAM | + BRW_NEW_TRANSFORM_FEEDBACK | + BRW_NEW_VUE_MAP_VS); +} + static void brw_gs_populate_key(struct brw_context *brw, struct brw_gs_prog_key *key) @@ -322,11 +332,7 @@ brw_upload_gs_prog(struct brw_context *brw) struct brw_geometry_program *gp = (struct brw_geometry_program *) brw->geometry_program; - if (!brw_state_dirty(brw, - _NEW_TEXTURE, - BRW_NEW_GEOMETRY_PROGRAM | - BRW_NEW_TRANSFORM_FEEDBACK | - BRW_NEW_VUE_MAP_VS)) + if (!brw_gs_state_dirty(brw)) return; if (gp == NULL) { diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 9069596..2846ff6 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -401,6 +401,20 @@ brw_setup_vue_key_clip_info(struct brw_context *brw, } } +static bool +brw_vs_state_dirty(struct brw_context *brw) +{ + return brw_state_dirty(brw, + _NEW_BUFFERS | + _NEW_LIGHT | + _NEW_POINT | + _NEW_POLYGON | + _NEW_TEXTURE | + _NEW_TRANSFORM, + BRW_NEW_VERTEX_PROGRAM | + BRW_NEW_VS_ATTRIB_WORKAROUNDS); +} + static void brw_vs_populate_key(struct brw_context *brw, struct brw_vs_prog_key *key) @@ -459,15 +473,7 @@ brw_upload_vs_prog(struct brw_context *brw) struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; - if (!brw_state_dirty(brw, - _NEW_BUFFERS | - _NEW_LIGHT | - _NEW_POINT | - _NEW_POLYGON | - _NEW_TEXTURE | - _NEW_TRANSFORM, - BRW_NEW_VERTEX_PROGRAM | - BRW_NEW_VS_ATTRIB_WORKAROUNDS)) + if (!brw_vs_state_dirty(brw)) return; brw_vs_populate_key(brw, &key); diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index a0eda3a8..b85f236 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -421,6 +421,27 @@ brw_populate_sampler_prog_key_data(struct gl_context *ctx, } } +static bool +brw_wm_state_dirty (struct brw_context *brw) +{ + return brw_state_dirty(brw, + _NEW_BUFFERS | + _NEW_COLOR | + _NEW_DEPTH | + _NEW_FRAG_CLAMP | + _NEW_HINT | + _NEW_LIGHT | + _NEW_LINE | + _NEW_MULTISAMPLE | + _NEW_POLYGON | + _NEW_STENCIL | + _NEW_TEXTURE, + BRW_NEW_FRAGMENT_PROGRAM | + BRW_NEW_REDUCED_PRIMITIVE | + BRW_NEW_STATS_WM | + BRW_NEW_VUE_MAP_GEOM_OUT); +} + static void brw_wm_populate_key( struct brw_context *brw, struct brw_wm_prog_key *key ) { @@ -590,22 +611,7 @@ brw_upload_wm_prog(struct brw_context *brw) struct brw_fragment_program *fp = (struct brw_fragment_program *) brw->fragment_program; - if (!brw_state_dirty(brw, - _NEW_BUFFERS | - _NEW_COLOR | - _NEW_DEPTH | - _NEW_FRAG_CLAMP | - _NEW_HINT | - _NEW_LIGHT | - _NEW_LINE | - _NEW_MULTISAMPLE | - _NEW_POLYGON | - _NEW_STENCIL | - _NEW_TEXTURE, - BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_REDUCED_PRIMITIVE | - BRW_NEW_STATS_WM | - BRW_NEW_VUE_MAP_GEOM_OUT)) + if (!brw_wm_state_dirty(brw)) return; brw_wm_populate_key(brw, &key); From cworth at kemper.freedesktop.org Fri Apr 3 05:19:00 2015 From: cworth at kemper.freedesktop.org (Carl Worth) Date: Thu, 2 Apr 2015 22:19:00 -0700 (PDT) Subject: Mesa (master): i965: Rename do__prog to brw_compile__prog ( and export) Message-ID: <20150403051900.DD6D57635B@kemper.freedesktop.org> Module: Mesa Branch: master Commit: b9b66985c3d33fa0db2b49c0e0231aa6d341e183 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b9b66985c3d33fa0db2b49c0e0231aa6d341e183 Author: Carl Worth Date: Fri Mar 20 12:10:49 2015 -0700 i965: Rename do__prog to brw_compile__prog (and export) This is in preparation for these functions to be called from other files. This commit is intended to have no functional change. It exists in preparation for some upcoming code movement in preparation for the shader cache. Reviewed-by: Ian Romanick Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_ff_gs.c | 7 ++++--- src/mesa/drivers/dri/i965/brw_ff_gs.h | 4 ++++ src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +- src/mesa/drivers/dri/i965/brw_gs.c | 18 +++++++++--------- src/mesa/drivers/dri/i965/brw_gs.h | 7 +++++++ src/mesa/drivers/dri/i965/brw_vs.c | 18 +++++++++--------- src/mesa/drivers/dri/i965/brw_vs.h | 6 ++++++ src/mesa/drivers/dri/i965/brw_wm.c | 13 +++++++------ src/mesa/drivers/dri/i965/brw_wm.h | 8 ++++---- 9 files changed, 51 insertions(+), 32 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_ff_gs.c b/src/mesa/drivers/dri/i965/brw_ff_gs.c index b3cbbe8..e6f837c 100644 --- a/src/mesa/drivers/dri/i965/brw_ff_gs.c +++ b/src/mesa/drivers/dri/i965/brw_ff_gs.c @@ -45,8 +45,9 @@ #include "util/ralloc.h" -static void compile_ff_gs_prog(struct brw_context *brw, - struct brw_ff_gs_prog_key *key) +void +brw_compile_ff_gs_prog(struct brw_context *brw, + struct brw_ff_gs_prog_key *key) { struct brw_ff_gs_compile c; const GLuint *program; @@ -253,7 +254,7 @@ brw_upload_ff_gs_prog(struct brw_context *brw) if (!brw_search_cache(&brw->cache, BRW_CACHE_FF_GS_PROG, &key, sizeof(key), &brw->ff_gs.prog_offset, &brw->ff_gs.prog_data)) { - compile_ff_gs_prog( brw, &key ); + brw_compile_ff_gs_prog(brw, &key); } } } diff --git a/src/mesa/drivers/dri/i965/brw_ff_gs.h b/src/mesa/drivers/dri/i965/brw_ff_gs.h index e4afdab..3dd045f 100644 --- a/src/mesa/drivers/dri/i965/brw_ff_gs.h +++ b/src/mesa/drivers/dri/i965/brw_ff_gs.h @@ -115,4 +115,8 @@ void gen6_brw_upload_ff_gs_prog(struct brw_context *brw); void brw_upload_ff_gs_prog(struct brw_context *brw); +void +brw_compile_ff_gs_prog(struct brw_context *brw, + struct brw_ff_gs_prog_key *key); + #endif diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 9c2ccce..f30176c 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -4152,7 +4152,7 @@ brw_fs_precompile(struct gl_context *ctx, uint32_t old_prog_offset = brw->wm.base.prog_offset; struct brw_wm_prog_data *old_prog_data = brw->wm.prog_data; - bool success = do_wm_prog(brw, shader_prog, bfp, &key); + bool success = brw_compile_wm_prog(brw, shader_prog, bfp, &key); brw->wm.base.prog_offset = old_prog_offset; brw->wm.prog_data = old_prog_data; diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index 6f7f129..bea90d8 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -34,11 +34,11 @@ #include "brw_ff_gs.h" -static bool -do_gs_prog(struct brw_context *brw, - struct gl_shader_program *prog, - struct brw_geometry_program *gp, - struct brw_gs_prog_key *key) +bool +brw_compile_gs_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_geometry_program *gp, + struct brw_gs_prog_key *key) { struct brw_stage_state *stage_state = &brw->gs.base; struct brw_gs_compile c; @@ -326,6 +326,7 @@ void brw_upload_gs_prog(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; + struct gl_shader_program **current = ctx->_Shader->CurrentProgram; struct brw_stage_state *stage_state = &brw->gs.base; struct brw_gs_prog_key key; /* BRW_NEW_GEOMETRY_PROGRAM */ @@ -362,9 +363,8 @@ brw_upload_gs_prog(struct brw_context *brw) if (!brw_search_cache(&brw->cache, BRW_CACHE_GS_PROG, &key, sizeof(key), &stage_state->prog_offset, &brw->gs.prog_data)) { - bool success = - do_gs_prog(brw, ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY], gp, - &key); + bool success = brw_compile_gs_prog(brw, current[MESA_SHADER_GEOMETRY], + gp, &key); assert(success); (void)success; } @@ -400,7 +400,7 @@ brw_gs_precompile(struct gl_context *ctx, */ key.input_varyings = gp->Base.InputsRead; - success = do_gs_prog(brw, shader_prog, bgp, &key); + success = brw_compile_gs_prog(brw, shader_prog, bgp, &key); brw->gs.base.prog_offset = old_prog_offset; brw->gs.prog_data = old_prog_data; diff --git a/src/mesa/drivers/dri/i965/brw_gs.h b/src/mesa/drivers/dri/i965/brw_gs.h index 5f7c437..16dafcc 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.h +++ b/src/mesa/drivers/dri/i965/brw_gs.h @@ -27,6 +27,7 @@ #include #include "brw_context.h" +#include "brw_program.h" #ifdef __cplusplus extern "C" { @@ -41,6 +42,12 @@ bool brw_gs_prog_data_compare(const void *a, const void *b); void brw_upload_gs_prog(struct brw_context *brw); +bool +brw_compile_gs_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_geometry_program *gp, + struct brw_gs_prog_key *key); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 2846ff6..dabff43 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -187,11 +187,11 @@ brw_vs_prog_data_compare(const void *in_a, const void *in_b) return true; } -static bool -do_vs_prog(struct brw_context *brw, - struct gl_shader_program *prog, - struct brw_vertex_program *vp, - struct brw_vs_prog_key *key) +bool +brw_compile_vs_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_vertex_program *vp, + struct brw_vs_prog_key *key) { GLuint program_size; const GLuint *program; @@ -468,6 +468,7 @@ void brw_upload_vs_prog(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; + struct gl_shader_program **current = ctx->_Shader->CurrentProgram; struct brw_vs_prog_key key; /* BRW_NEW_VERTEX_PROGRAM */ struct brw_vertex_program *vp = @@ -481,9 +482,8 @@ brw_upload_vs_prog(struct brw_context *brw) if (!brw_search_cache(&brw->cache, BRW_CACHE_VS_PROG, &key, sizeof(key), &brw->vs.base.prog_offset, &brw->vs.prog_data)) { - bool success = - do_vs_prog(brw, ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX], vp, - &key); + bool success = brw_compile_vs_prog(brw, current[MESA_SHADER_VERTEX], + vp, &key); (void) success; assert(success); } @@ -524,7 +524,7 @@ brw_vs_precompile(struct gl_context *ctx, (prog->OutputsWritten & (VARYING_BIT_COL0 | VARYING_BIT_COL1 | VARYING_BIT_BFC0 | VARYING_BIT_BFC1)); - success = do_vs_prog(brw, shader_prog, bvp, &key); + success = brw_compile_vs_prog(brw, shader_prog, bvp, &key); brw->vs.base.prog_offset = old_prog_offset; brw->vs.prog_data = old_prog_data; diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index bad0f07..f47ecaa 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -75,6 +75,12 @@ bool brw_vs_prog_data_compare(const void *a, const void *b); void brw_upload_vs_prog(struct brw_context *brw); +bool +brw_compile_vs_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_vertex_program *vp, + struct brw_vs_prog_key *key); + #ifdef __cplusplus } /* extern "C" */ diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index b85f236..308eebe 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -159,10 +159,11 @@ brw_wm_prog_data_compare(const void *in_a, const void *in_b) * Depending on the instructions used (i.e. flow control instructions) * we'll use one of two code generators. */ -bool do_wm_prog(struct brw_context *brw, - struct gl_shader_program *prog, - struct brw_fragment_program *fp, - struct brw_wm_prog_key *key) +bool +brw_compile_wm_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_fragment_program *fp, + struct brw_wm_prog_key *key) { struct gl_context *ctx = &brw->ctx; void *mem_ctx = ralloc_context(NULL); @@ -607,6 +608,7 @@ void brw_upload_wm_prog(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; + struct gl_shader_program *current = ctx->_Shader->_CurrentFragmentProgram; struct brw_wm_prog_key key; struct brw_fragment_program *fp = (struct brw_fragment_program *) brw->fragment_program; @@ -619,8 +621,7 @@ brw_upload_wm_prog(struct brw_context *brw) if (!brw_search_cache(&brw->cache, BRW_CACHE_FS_PROG, &key, sizeof(key), &brw->wm.base.prog_offset, &brw->wm.prog_data)) { - bool success = do_wm_prog(brw, ctx->_Shader->_CurrentFragmentProgram, fp, - &key); + bool success = brw_compile_wm_prog(brw, current, fp, &key); (void) success; assert(success); } diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index 32d1ce3..45fbb75 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -78,10 +78,10 @@ GLboolean brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog struct gl_shader *brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type); bool brw_color_buffer_write_enabled(struct brw_context *brw); -bool do_wm_prog(struct brw_context *brw, - struct gl_shader_program *prog, - struct brw_fragment_program *fp, - struct brw_wm_prog_key *key); +bool brw_compile_wm_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_fragment_program *fp, + struct brw_wm_prog_key *key); void brw_wm_debug_recompile(struct brw_context *brw, struct gl_shader_program *prog, const struct brw_wm_prog_key *key); From cworth at kemper.freedesktop.org Fri Apr 3 05:19:00 2015 From: cworth at kemper.freedesktop.org (Carl Worth) Date: Thu, 2 Apr 2015 22:19:00 -0700 (PDT) Subject: Mesa (master): i965: Split out brw_ _populate_key into their own functions Message-ID: <20150403051900.C3FA176338@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 28510d69ff8fc03bc1693be2b7a02bc68791dd2f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=28510d69ff8fc03bc1693be2b7a02bc68791dd2f Author: Carl Worth Date: Mon Feb 23 14:44:39 2015 -0800 i965: Split out brw__populate_key into their own functions This commit splits portions of the existing brw_upload_vs_prog and brw_upload_gs_prog function into new brw_vs_populate_key and brw_gs_populate_key functions. This follows the same style as is already present for all other stages, (see brw_wm_populate_key, etc.). This commit is intended to have no functional change. It exists in preparation for some upcoming code movement in preparation for the shader cache. Reviewed-by: Ian Romanick Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_ff_gs.c | 7 ++-- src/mesa/drivers/dri/i965/brw_gs.c | 39 ++++++++++++++-------- src/mesa/drivers/dri/i965/brw_vs.c | 58 ++++++++++++++++++++------------- 3 files changed, 64 insertions(+), 40 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_ff_gs.c b/src/mesa/drivers/dri/i965/brw_ff_gs.c index 016fcdf..14ae4c1 100644 --- a/src/mesa/drivers/dri/i965/brw_ff_gs.c +++ b/src/mesa/drivers/dri/i965/brw_ff_gs.c @@ -147,8 +147,9 @@ static void compile_ff_gs_prog(struct brw_context *brw, ralloc_free(mem_ctx); } -static void populate_key(struct brw_context *brw, - struct brw_ff_gs_prog_key *key) +static void +brw_ff_gs_populate_key(struct brw_context *brw, + struct brw_ff_gs_prog_key *key) { static const unsigned swizzle_for_offset[4] = { BRW_SWIZZLE4(0, 1, 2, 3), @@ -235,7 +236,7 @@ brw_upload_ff_gs_prog(struct brw_context *brw) /* Populate the key: */ - populate_key(brw, &key); + brw_ff_gs_populate_key(brw, &key); if (brw->ff_gs.prog_active != key.need_gs_prog) { brw->ctx.NewDriverState |= BRW_NEW_FF_GS_PROG_DATA; diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index ffe7476..e233049 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -288,6 +288,30 @@ do_gs_prog(struct brw_context *brw, return true; } +static void +brw_gs_populate_key(struct brw_context *brw, + struct brw_gs_prog_key *key) +{ + struct gl_context *ctx = &brw->ctx; + struct brw_stage_state *stage_state = &brw->gs.base; + struct brw_geometry_program *gp = + (struct brw_geometry_program *) brw->geometry_program; + struct gl_program *prog = &gp->program.Base; + + memset(key, 0, sizeof(*key)); + + key->base.program_string_id = gp->id; + brw_setup_vue_key_clip_info(brw, &key->base, + gp->program.Base.UsesClipDistanceOut); + + /* _NEW_TEXTURE */ + brw_populate_sampler_prog_key_data(ctx, prog, stage_state->sampler_count, + &key->base.tex); + + /* BRW_NEW_VUE_MAP_VS */ + key->input_varyings = brw->vue_map_vs.slots_valid; +} + void brw_upload_gs_prog(struct brw_context *brw) { @@ -327,20 +351,7 @@ brw_upload_gs_prog(struct brw_context *brw) return; } - struct gl_program *prog = &gp->program.Base; - - memset(&key, 0, sizeof(key)); - - key.base.program_string_id = gp->id; - brw_setup_vue_key_clip_info(brw, &key.base, - gp->program.Base.UsesClipDistanceOut); - - /* _NEW_TEXTURE */ - brw_populate_sampler_prog_key_data(ctx, prog, stage_state->sampler_count, - &key.base.tex); - - /* BRW_NEW_VUE_MAP_VS */ - key.input_varyings = brw->vue_map_vs.slots_valid; + brw_gs_populate_key(brw, &key); if (!brw_search_cache(&brw->cache, BRW_CACHE_GS_PROG, &key, sizeof(key), diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index bf16f34..9069596 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -401,64 +401,76 @@ brw_setup_vue_key_clip_info(struct brw_context *brw, } } -void -brw_upload_vs_prog(struct brw_context *brw) +static void +brw_vs_populate_key(struct brw_context *brw, + struct brw_vs_prog_key *key) { struct gl_context *ctx = &brw->ctx; - struct brw_vs_prog_key key; /* BRW_NEW_VERTEX_PROGRAM */ struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; struct gl_program *prog = (struct gl_program *) brw->vertex_program; int i; - if (!brw_state_dirty(brw, - _NEW_BUFFERS | - _NEW_LIGHT | - _NEW_POINT | - _NEW_POLYGON | - _NEW_TEXTURE | - _NEW_TRANSFORM, - BRW_NEW_VERTEX_PROGRAM | - BRW_NEW_VS_ATTRIB_WORKAROUNDS)) - return; - - memset(&key, 0, sizeof(key)); + memset(key, 0, sizeof(*key)); /* Just upload the program verbatim for now. Always send it all * the inputs it asks for, whether they are varying or not. */ - key.base.program_string_id = vp->id; - brw_setup_vue_key_clip_info(brw, &key.base, + key->base.program_string_id = vp->id; + brw_setup_vue_key_clip_info(brw, &key->base, vp->program.Base.UsesClipDistanceOut); /* _NEW_POLYGON */ if (brw->gen < 6) { - key.copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL || - ctx->Polygon.BackMode != GL_FILL); + key->copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL || + ctx->Polygon.BackMode != GL_FILL); } if (prog->OutputsWritten & (VARYING_BIT_COL0 | VARYING_BIT_COL1 | VARYING_BIT_BFC0 | VARYING_BIT_BFC1)) { /* _NEW_LIGHT | _NEW_BUFFERS */ - key.clamp_vertex_color = ctx->Light._ClampVertexColor; + key->clamp_vertex_color = ctx->Light._ClampVertexColor; } /* _NEW_POINT */ if (brw->gen < 6 && ctx->Point.PointSprite) { for (i = 0; i < 8; i++) { if (ctx->Point.CoordReplace[i]) - key.point_coord_replace |= (1 << i); + key->point_coord_replace |= (1 << i); } } /* _NEW_TEXTURE */ brw_populate_sampler_prog_key_data(ctx, prog, brw->vs.base.sampler_count, - &key.base.tex); + &key->base.tex); /* BRW_NEW_VS_ATTRIB_WORKAROUNDS */ - memcpy(key.gl_attrib_wa_flags, brw->vb.attrib_wa_flags, + memcpy(key->gl_attrib_wa_flags, brw->vb.attrib_wa_flags, sizeof(brw->vb.attrib_wa_flags)); +} + +void +brw_upload_vs_prog(struct brw_context *brw) +{ + struct gl_context *ctx = &brw->ctx; + struct brw_vs_prog_key key; + /* BRW_NEW_VERTEX_PROGRAM */ + struct brw_vertex_program *vp = + (struct brw_vertex_program *)brw->vertex_program; + + if (!brw_state_dirty(brw, + _NEW_BUFFERS | + _NEW_LIGHT | + _NEW_POINT | + _NEW_POLYGON | + _NEW_TEXTURE | + _NEW_TRANSFORM, + BRW_NEW_VERTEX_PROGRAM | + BRW_NEW_VS_ATTRIB_WORKAROUNDS)) + return; + + brw_vs_populate_key(brw, &key); if (!brw_search_cache(&brw->cache, BRW_CACHE_VS_PROG, &key, sizeof(key), From sroland at kemper.freedesktop.org Fri Apr 3 17:22:29 2015 From: sroland at kemper.freedesktop.org (Roland Scheidegger) Date: Fri, 3 Apr 2015 10:22:29 -0700 (PDT) Subject: Mesa (master): gallium: fix gcc compile errors when using _XOPEN_SOURCE= 600 but not std=c99 Message-ID: <20150403172229.449C276331@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 328375d2742a3394dc481fc5fae6c4a5d797b4e7 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=328375d2742a3394dc481fc5fae6c4a5d797b4e7 Author: Jose Fonseca Date: Thu Apr 2 17:35:16 2015 +0200 gallium: fix gcc compile errors when using _XOPEN_SOURCE=600 but not std=c99 The fpclassify stuff either needs std=c99 or _XOPEN_SOURCE=600 passed to gcc, but when using the latter the lrint family of function will be defined too. --- include/c99_math.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/include/c99_math.h b/include/c99_math.h index ee0dd10..7ed7cc2 100644 --- a/include/c99_math.h +++ b/include/c99_math.h @@ -83,7 +83,11 @@ roundf(float x) #if (defined(_MSC_VER) && _MSC_VER < 1800) || \ - (!defined(_MSC_VER) && __STDC_VERSION__ < 199901L && !defined(__cplusplus)) + (!defined(_MSC_VER) && \ + __STDC_VERSION__ < 199901L && \ + (!defined(_XOPEN_SOURCE) || _XOPEN_SOURCE < 600) && \ + !defined(__cplusplus)) + static inline long int lrint(double d) { @@ -135,6 +139,7 @@ llrintf(float f) return rounded; } + #endif /* C99 */ From anholt at kemper.freedesktop.org Fri Apr 3 18:53:30 2015 From: anholt at kemper.freedesktop.org (Eric Anholt) Date: Fri, 3 Apr 2015 11:53:30 -0700 (PDT) Subject: Mesa (master): nir: Add an interface for the builder to insert instructions before. Message-ID: <20150403185330.C2EC276331@kemper.freedesktop.org> Module: Mesa Branch: master Commit: ec029702054ddc4e098ebb96e76c7451190d649f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ec029702054ddc4e098ebb96e76c7451190d649f Author: Eric Anholt Date: Fri Mar 27 14:18:54 2015 -0700 nir: Add an interface for the builder to insert instructions before. So far we'd only used nir_builder to build brand new programs. But if we're doing modifications to instructions (like in a lowering pass), then we want to generate new stuff before the instruction we're modifying. Reviewed-by: Jason Ekstrand --- src/glsl/nir/nir_builder.h | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h index 6459e9a..ecbdbe3 100644 --- a/src/glsl/nir/nir_builder.h +++ b/src/glsl/nir/nir_builder.h @@ -28,6 +28,8 @@ struct exec_list; typedef struct nir_builder { struct exec_list *cf_node_list; + nir_instr *before_instr; + nir_shader *shader; nir_function_impl *impl; } nir_builder; @@ -47,6 +49,23 @@ nir_builder_insert_after_cf_list(nir_builder *build, build->cf_node_list = cf_node_list; } +static inline void +nir_builder_insert_before_instr(nir_builder *build, nir_instr *before_instr) +{ + build->before_instr = before_instr; +} + +static inline void +nir_builder_instr_insert(nir_builder *build, nir_instr *instr) +{ + if (build->cf_node_list) { + nir_instr_insert_after_cf_list(build->cf_node_list, instr); + } else { + assert(build->before_instr); + nir_instr_insert_before(build->before_instr, instr); + } +} + static inline nir_ssa_def * nir_build_imm(nir_builder *build, unsigned num_components, nir_const_value value) { @@ -57,7 +76,7 @@ nir_build_imm(nir_builder *build, unsigned num_components, nir_const_value value load_const->value = value; - nir_instr_insert_after_cf_list(build->cf_node_list, &load_const->instr); + nir_builder_instr_insert(build, &load_const->instr); return &load_const->def; } @@ -125,7 +144,7 @@ nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0, nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, NULL); instr->dest.write_mask = (1 << num_components) - 1; - nir_instr_insert_after_cf_list(build->cf_node_list, &instr->instr); + nir_builder_instr_insert(build, &instr->instr); return &instr->dest.dest.ssa; } @@ -172,7 +191,7 @@ nir_fmov_alu(nir_builder *build, nir_alu_src src, unsigned num_components) nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, NULL); mov->dest.write_mask = (1 << num_components) - 1; mov->src[0] = src; - nir_instr_insert_after_cf_list(build->cf_node_list, &mov->instr); + nir_builder_instr_insert(build, &mov->instr); return &mov->dest.dest.ssa; } @@ -184,7 +203,7 @@ nir_imov_alu(nir_builder *build, nir_alu_src src, unsigned num_components) nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, NULL); mov->dest.write_mask = (1 << num_components) - 1; mov->src[0] = src; - nir_instr_insert_after_cf_list(build->cf_node_list, &mov->instr); + nir_builder_instr_insert(build, &mov->instr); return &mov->dest.dest.ssa; } From anholt at kemper.freedesktop.org Fri Apr 3 18:53:30 2015 From: anholt at kemper.freedesktop.org (Eric Anholt) Date: Fri, 3 Apr 2015 11:53:30 -0700 (PDT) Subject: Mesa (master): nir: Add an interface to turn a nir_src into a nir_ssa_def. Message-ID: <20150403185330.C7B507635A@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 64bdfc698d6d9f543f82141330ae32de286b8417 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=64bdfc698d6d9f543f82141330ae32de286b8417 Author: Eric Anholt Date: Fri Mar 27 14:19:46 2015 -0700 nir: Add an interface to turn a nir_src into a nir_ssa_def. We use nir_ssa_defs for nir_builder args, so this takes a nir_src and makes one so it can be passed in. Reviewed-by: Jason Ekstrand --- src/glsl/nir/nir_builder.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h index ecbdbe3..587d014 100644 --- a/src/glsl/nir/nir_builder.h +++ b/src/glsl/nir/nir_builder.h @@ -225,4 +225,23 @@ nir_swizzle(nir_builder *build, nir_ssa_def *src, unsigned swiz[4], nir_imov_alu(build, alu_src, num_components); } +/** + * Turns a nir_src into a nir_ssa_def * so it can be passed to + * nir_build_alu()-based builder calls. + */ +static inline nir_ssa_def * +nir_ssa_for_src(nir_builder *build, nir_src src, int num_components) +{ + if (src.is_ssa && src.ssa->num_components == num_components) + return src.ssa; + + nir_alu_src alu; + memset(&alu, 0, sizeof(alu)); + alu.src = src; + for (int j = 0; j < 4; j++) + alu.swizzle[j] = j; + + return nir_imov_alu(build, alu, num_components); +} + #endif /* NIR_BUILDER_H */ From anholt at kemper.freedesktop.org Fri Apr 3 18:53:30 2015 From: anholt at kemper.freedesktop.org (Eric Anholt) Date: Fri, 3 Apr 2015 11:53:30 -0700 (PDT) Subject: Mesa (master): nir: Add a lowering pass for texture projectors. Message-ID: <20150403185330.D179376331@kemper.freedesktop.org> Module: Mesa Branch: master Commit: ea811b7868d4039499dddf53c109cf0b9da98967 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ea811b7868d4039499dddf53c109cf0b9da98967 Author: Eric Anholt Date: Fri Mar 27 14:18:11 2015 -0700 nir: Add a lowering pass for texture projectors. Not much hardware wants them these days, and it might give us a chance to do CSE or algebraic at the NIR level. Reviewed-by: Jason Ekstrand --- src/glsl/Makefile.sources | 1 + src/glsl/nir/nir.h | 1 + src/glsl/nir/nir_lower_tex_projector.c | 142 ++++++++++++++++++++++++++++++++ 3 files changed, 144 insertions(+) diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index b56fa26..ffce706 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -37,6 +37,7 @@ NIR_FILES = \ nir/nir_lower_phis_to_scalar.c \ nir/nir_lower_samplers.cpp \ nir/nir_lower_system_values.c \ + nir/nir_lower_tex_projector.c \ nir/nir_lower_to_source_mods.c \ nir/nir_lower_vars_to_ssa.c \ nir/nir_lower_var_copies.c \ diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 24deb82..6e2aa97 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1601,6 +1601,7 @@ void nir_lower_samplers(nir_shader *shader, struct gl_program *prog); void nir_lower_system_values(nir_shader *shader); +void nir_lower_tex_projector(nir_shader *shader); void nir_lower_atomics(nir_shader *shader); void nir_lower_to_source_mods(nir_shader *shader); diff --git a/src/glsl/nir/nir_lower_tex_projector.c b/src/glsl/nir/nir_lower_tex_projector.c new file mode 100644 index 0000000..6327b23 --- /dev/null +++ b/src/glsl/nir/nir_lower_tex_projector.c @@ -0,0 +1,142 @@ +/* + * Copyright ? 2015 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* + * This lowering pass converts the coordinate division for texture projection + * to be done in ALU instructions instead of asking the texture operation to + * do so. + */ + +#include "nir.h" +#include "nir_builder.h" + +static nir_ssa_def * +channel(nir_builder *b, nir_ssa_def *def, int c) +{ + return nir_swizzle(b, def, (unsigned[4]){c, c, c, c}, 1, false); +} + +static bool +nir_lower_tex_projector_block(nir_block *block, void *void_state) +{ + nir_builder *b = void_state; + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_tex) + continue; + + nir_tex_instr *tex = nir_instr_as_tex(instr); + nir_builder_insert_before_instr(b, &tex->instr); + + /* Find the projector in the srcs list, if present. */ + int proj_index; + for (proj_index = 0; proj_index < tex->num_srcs; proj_index++) { + if (tex->src[proj_index].src_type == nir_tex_src_projector) + break; + } + if (proj_index == tex->num_srcs) + continue; + nir_ssa_def *inv_proj = + nir_frcp(b, nir_ssa_for_src(b, tex->src[proj_index].src, 1)); + + /* Walk through the sources projecting the arguments. */ + for (int i = 0; i < tex->num_srcs; i++) { + switch (tex->src[i].src_type) { + case nir_tex_src_coord: + case nir_tex_src_comparitor: + break; + default: + continue; + } + nir_ssa_def *unprojected = + nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i)); + nir_ssa_def *projected = nir_fmul(b, unprojected, inv_proj); + + /* Array indices don't get projected, so make an new vector with the + * coordinate's array index untouched. + */ + if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) { + switch (tex->coord_components) { + case 4: + projected = nir_vec4(b, + channel(b, projected, 0), + channel(b, projected, 1), + channel(b, projected, 2), + channel(b, unprojected, 3)); + break; + case 3: + projected = nir_vec3(b, + channel(b, projected, 0), + channel(b, projected, 1), + channel(b, unprojected, 2)); + break; + case 2: + projected = nir_vec2(b, + channel(b, projected, 0), + channel(b, unprojected, 1)); + break; + default: + unreachable("bad texture coord count for array"); + break; + } + } + + nir_instr_rewrite_src(&tex->instr, + &tex->src[i].src, + nir_src_for_ssa(projected)); + } + + /* Now move the later tex sources down the array so that the projector + * disappears. + */ + nir_src dead = {.is_ssa = false, .ssa = NULL}; + nir_instr_rewrite_src(&tex->instr, &tex->src[proj_index].src, dead); + memmove(&tex->src[proj_index], + &tex->src[proj_index + 1], + (tex->num_srcs - proj_index) * sizeof(*tex->src)); + tex->num_srcs--; + } + + return true; +} + +static void +nir_lower_tex_projector_impl(nir_function_impl *impl) +{ + nir_builder b; + nir_builder_init(&b, impl); + + nir_foreach_block(impl, nir_lower_tex_projector_block, &b); + + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); +} + +void +nir_lower_tex_projector(nir_shader *shader) +{ + nir_foreach_overload(shader, overload) { + if (overload->impl) + nir_lower_tex_projector_impl(overload->impl); + } +} From anholt at kemper.freedesktop.org Fri Apr 3 18:53:30 2015 From: anholt at kemper.freedesktop.org (Eric Anholt) Date: Fri, 3 Apr 2015 11:53:30 -0700 (PDT) Subject: Mesa (master): i965: Use the tex projector lowering pass instead of hand-rolling it. Message-ID: <20150403185330.DAE8876331@kemper.freedesktop.org> Module: Mesa Branch: master Commit: cb966fb2bea77b1d7b1bdb6597b7b85d810f2d0a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=cb966fb2bea77b1d7b1bdb6597b7b85d810f2d0a Author: Eric Anholt Date: Wed Apr 1 11:38:53 2015 -0700 i965: Use the tex projector lowering pass instead of hand-rolling it. This only impacts the ARB_fp path. We can't quite disable the GLSL-level lowering pass, because it needs to apply before brw_do_lower_unnormalized_offset(). total instructions in shared programs: 5667857 -> 5667847 (-0.00%) instructions in affected programs: 1114 -> 1104 (-0.90%) helped: 16 HURT: 6 Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 4dfb4d6..0f1659d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -101,6 +101,9 @@ fs_visitor::emit_nir_code() nir_lower_global_vars_to_local(nir); nir_validate_shader(nir); + nir_lower_tex_projector(nir); + nir_validate_shader(nir); + nir_split_var_copies(nir); nir_validate_shader(nir); @@ -1782,7 +1785,6 @@ fs_visitor::nir_emit_texture(nir_tex_instr *instr) int lod_components = 0, offset_components = 0; fs_reg coordinate, shadow_comparitor, lod, lod2, sample_index, mcs, tex_offset; - fs_reg projector; for (unsigned i = 0; i < instr->num_srcs; i++) { fs_reg src = get_nir_src(instr->src[i].src); @@ -1835,8 +1837,7 @@ fs_visitor::nir_emit_texture(nir_tex_instr *instr) offset_components = instr->coord_components; break; case nir_tex_src_projector: - projector = retype(src, BRW_REGISTER_TYPE_F); - break; + unreachable("should be lowered"); case nir_tex_src_sampler_offset: { /* Figure out the highest possible sampler index and mark it as used */ @@ -1860,13 +1861,6 @@ fs_visitor::nir_emit_texture(nir_tex_instr *instr) } } - if (projector.file != BAD_FILE) { - fs_reg invproj = vgrf(glsl_type::float_type); - emit_math(SHADER_OPCODE_RCP, invproj, projector); - for (int i = 0; i < 3; i++) - emit(MUL(offset(coordinate, i), offset(coordinate, i), invproj)); - } - if (instr->op == nir_texop_txf_ms) { if (brw->gen >= 7 && key_tex->compressed_multisample_layout_mask & (1 << sampler)) { From jekstrand at kemper.freedesktop.org Fri Apr 3 21:36:54 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Fri, 3 Apr 2015 14:36:54 -0700 (PDT) Subject: Mesa (master): nir: Add a src_get_parent_instr function Message-ID: <20150403213654.0F89676331@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 7bdba4a24583f4cc2f9bca94f975cec9d384cddc URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7bdba4a24583f4cc2f9bca94f975cec9d384cddc Author: Jason Ekstrand Date: Thu Apr 2 11:07:48 2015 -0700 nir: Add a src_get_parent_instr function Reviewed-by: Jordan Justen --- src/glsl/nir/nir.h | 10 ++++++++++ .../dri/i965/brw_nir_analyze_boolean_resolves.c | 16 ++-------------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 6e2aa97..0509e64 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -529,6 +529,16 @@ nir_src_for_reg(nir_register *reg) return src; } +static inline nir_instr * +nir_src_get_parent_instr(const nir_src *src) +{ + if (src->is_ssa) { + return src->ssa->parent_instr; + } else { + return src->reg.reg->parent_instr; + } +} + static inline nir_dest nir_dest_for_reg(nir_register *reg) { diff --git a/src/mesa/drivers/dri/i965/brw_nir_analyze_boolean_resolves.c b/src/mesa/drivers/dri/i965/brw_nir_analyze_boolean_resolves.c index 3a27cf1..f0b018c 100644 --- a/src/mesa/drivers/dri/i965/brw_nir_analyze_boolean_resolves.c +++ b/src/mesa/drivers/dri/i965/brw_nir_analyze_boolean_resolves.c @@ -43,13 +43,7 @@ static uint8_t get_resolve_status_for_src(nir_src *src) { - nir_instr *src_instr; - if (src->is_ssa) { - src_instr = src->ssa->parent_instr; - } else { - src_instr = src->reg.reg->parent_instr; - } - + nir_instr *src_instr = nir_src_get_parent_instr(src); if (src_instr) { uint8_t resolve_status = src_instr->pass_flags & BRW_NIR_BOOLEAN_MASK; @@ -72,13 +66,7 @@ get_resolve_status_for_src(nir_src *src) static bool src_mark_needs_resolve(nir_src *src, void *void_state) { - nir_instr *src_instr; - if (src->is_ssa) { - src_instr = src->ssa->parent_instr; - } else { - src_instr = src->reg.reg->parent_instr; - } - + nir_instr *src_instr = nir_src_get_parent_instr(src); if (src_instr) { uint8_t resolve_status = src_instr->pass_flags & BRW_NIR_BOOLEAN_MASK; From jekstrand at kemper.freedesktop.org Fri Apr 3 21:36:54 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Fri, 3 Apr 2015 14:36:54 -0700 (PDT) Subject: Mesa (master): nir/from_ssa: Don't set reg-> parent_instr for ssa_undef instructions Message-ID: <20150403213654.19BCC7635A@kemper.freedesktop.org> Module: Mesa Branch: master Commit: dccc57eaba5e5920a6c151e7abb386a636ebf861 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=dccc57eaba5e5920a6c151e7abb386a636ebf861 Author: Jason Ekstrand Date: Thu Apr 2 12:32:39 2015 -0700 nir/from_ssa: Don't set reg->parent_instr for ssa_undef instructions Reviewed-by: Jordan Justen --- src/glsl/nir/nir_from_ssa.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/glsl/nir/nir_from_ssa.c b/src/glsl/nir/nir_from_ssa.c index c3090fb..184698a 100644 --- a/src/glsl/nir/nir_from_ssa.c +++ b/src/glsl/nir/nir_from_ssa.c @@ -509,12 +509,13 @@ get_register_for_ssa_def(nir_ssa_def *def, struct from_ssa_state *state) reg->num_components = def->num_components; reg->num_array_elems = 0; - /* This register comes from an SSA definition that was not part of a - * phi-web. Therefore, we know it has a single unique definition - * that dominates all of its uses. Therefore, we can copy the + /* This register comes from an SSA definition that is defined and not + * part of a phi-web. Therefore, we know it has a single unique + * definition that dominates all of its uses; we can copy the * parent_instr from the SSA def safely. */ - reg->parent_instr = def->parent_instr; + if (def->parent_instr->type != nir_instr_type_ssa_undef) + reg->parent_instr = def->parent_instr; _mesa_hash_table_insert(state->ssa_table, def, reg); return reg; From jekstrand at kemper.freedesktop.org Fri Apr 3 21:36:54 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Fri, 3 Apr 2015 14:36:54 -0700 (PDT) Subject: Mesa (master): i965: Check the INTEL_USE_NIR environment variable once at context creation Message-ID: <20150403213654.2853076331@kemper.freedesktop.org> Module: Mesa Branch: master Commit: bff421332661bfd0f82ab9eee9e4fec9d06ed1a1 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=bff421332661bfd0f82ab9eee9e4fec9d06ed1a1 Author: Jason Ekstrand Date: Thu Apr 2 17:02:43 2015 -0700 i965: Check the INTEL_USE_NIR environment variable once at context creation Reviewed-by: Jordan Justen --- src/mesa/drivers/dri/i965/brw_context.c | 10 +++++++++- src/mesa/drivers/dri/i965/brw_fs.cpp | 4 ++-- src/mesa/drivers/dri/i965/brw_vec4.cpp | 4 +++- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 84818f0..f0de711 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -560,6 +560,12 @@ brw_initialize_context_constants(struct brw_context *brw) .lower_ffma = true, }; + bool use_nir_default[MESA_SHADER_STAGES]; + use_nir_default[MESA_SHADER_VERTEX] = false; + use_nir_default[MESA_SHADER_GEOMETRY] = false; + use_nir_default[MESA_SHADER_FRAGMENT] = false; + use_nir_default[MESA_SHADER_COMPUTE] = false; + /* We want the GLSL compiler to emit code that uses condition codes */ for (int i = 0; i < MESA_SHADER_STAGES; i++) { ctx->Const.ShaderCompilerOptions[i].MaxIfDepth = brw->gen < 6 ? 16 : UINT_MAX; @@ -573,7 +579,9 @@ brw_initialize_context_constants(struct brw_context *brw) (i == MESA_SHADER_FRAGMENT); ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectUniform = false; ctx->Const.ShaderCompilerOptions[i].LowerClipDistance = true; - ctx->Const.ShaderCompilerOptions[i].NirOptions = &nir_options; + + if (brw_env_var_as_boolean("INTEL_USE_NIR", use_nir_default[i])) + ctx->Const.ShaderCompilerOptions[i].NirOptions = &nir_options; } ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = true; diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index f30176c..1f8febc 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -3861,7 +3861,7 @@ fs_visitor::run_vs() if (INTEL_DEBUG & DEBUG_SHADER_TIME) emit_shader_time_begin(); - if (brw_env_var_as_boolean("INTEL_USE_NIR", false)) { + if (brw->ctx.Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].NirOptions) { emit_nir_code(); } else { foreach_in_list(ir_instruction, ir, shader->base.ir) { @@ -3934,7 +3934,7 @@ fs_visitor::run_fs() /* Generate FS IR for main(). (the visitor only descends into * functions called "main"). */ - if (brw_env_var_as_boolean("INTEL_USE_NIR", false)) { + if (brw->ctx.Const.ShaderCompilerOptions[MESA_SHADER_FRAGMENT].NirOptions) { emit_nir_code(); } else if (shader) { foreach_in_list(ir_instruction, ir, shader->base.ir) { diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 480e50c..ef2fd40 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1823,7 +1823,9 @@ brw_vs_emit(struct brw_context *brw, if (unlikely(INTEL_DEBUG & DEBUG_VS)) brw_dump_ir("vertex", prog, &shader->base, &c->vp->program.Base); - if (brw->scalar_vs && (prog || brw_env_var_as_boolean("INTEL_USE_NIR", false))) { + if (brw->scalar_vs && + (prog || + brw->ctx.Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].NirOptions)) { fs_visitor v(brw, mem_ctx, &c->key, prog_data, prog, &c->vp->program, 8); if (!v.run_vs()) { if (prog) { From jekstrand at kemper.freedesktop.org Fri Apr 3 21:36:54 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Fri, 3 Apr 2015 14:36:54 -0700 (PDT) Subject: Mesa (master): nir: Add a cubemap normalizing pass Message-ID: <20150403213654.347A276331@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 52e718097fa7dc808b3e8b43a8fd14a5b2b64797 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=52e718097fa7dc808b3e8b43a8fd14a5b2b64797 Author: Jason Ekstrand Date: Thu Apr 2 16:38:30 2015 -0700 nir: Add a cubemap normalizing pass This commit adds a pass to L1-normalize cube-map coordinates. Some hardware such as i965 requires that largest cube-map coordinate is +-1. We had a pass to perform this normalization in GLSL IR but we need it in NIR for cube maps on ARB programs to work correctly. Reviewed-by: Jordan Justen v2 (Suggested by Eric): - Do a vector fabs and split into components later - Move to core NIR Reviewed-by: Eric Anholt --- src/glsl/Makefile.sources | 1 + src/glsl/nir/nir.h | 2 + src/glsl/nir/nir_normalize_cubemap_coords.c | 110 +++++++++++++++++++++++++++ 3 files changed, 113 insertions(+) diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index ffce706..c3b70d4 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -43,6 +43,7 @@ NIR_FILES = \ nir/nir_lower_var_copies.c \ nir/nir_lower_vec_to_movs.c \ nir/nir_metadata.c \ + nir/nir_normalize_cubemap_coords.c \ nir/nir_opt_constant_folding.c \ nir/nir_opt_copy_propagate.c \ nir/nir_opt_cse.c \ diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 0509e64..199ecc0 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1616,6 +1616,8 @@ void nir_lower_tex_projector(nir_shader *shader); void nir_lower_atomics(nir_shader *shader); void nir_lower_to_source_mods(nir_shader *shader); +void nir_normalize_cubemap_coords(nir_shader *shader); + void nir_live_variables_impl(nir_function_impl *impl); bool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b); diff --git a/src/glsl/nir/nir_normalize_cubemap_coords.c b/src/glsl/nir/nir_normalize_cubemap_coords.c new file mode 100644 index 0000000..0da8447 --- /dev/null +++ b/src/glsl/nir/nir_normalize_cubemap_coords.c @@ -0,0 +1,110 @@ +/* + * Copyright ? 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand + */ + +#include "nir.h" +#include "nir_builder.h" + +/** + * This file implements a NIR lowering pass to perform the normalization of + * the cubemap coordinates to have the largest magnitude component be -1.0 + * or 1.0. This is based on the old GLSL IR based pass by Eric. + */ + +static nir_ssa_def * +channel(nir_builder *b, nir_ssa_def *def, int c) +{ + return nir_swizzle(b, def, (unsigned[4]){c, c, c, c}, 1, false); +} + +static bool +normalize_cubemap_coords_block(nir_block *block, void *void_state) +{ + nir_builder *b = void_state; + + nir_foreach_instr(block, instr) { + if (instr->type != nir_instr_type_tex) + continue; + + nir_tex_instr *tex = nir_instr_as_tex(instr); + if (tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE) + continue; + + nir_builder_insert_before_instr(b, &tex->instr); + + for (unsigned i = 0; i < tex->num_srcs; i++) { + if (tex->src[i].src_type != nir_tex_src_coord) + continue; + + nir_ssa_def *orig_coord = + nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i)); + assert(orig_coord->num_components >= 3); + + nir_ssa_def *abs = nir_fabs(b, orig_coord); + nir_ssa_def *norm = nir_fmax(b, channel(b, abs, 0), + nir_fmax(b, channel(b, abs, 1), + channel(b, abs, 2))); + + nir_ssa_def *normalized = nir_fmul(b, orig_coord, nir_frcp(b, norm)); + + /* Array indices don't have to be normalized, so make a new vector + * with the coordinate's array index untouched. + */ + if (tex->coord_components == 4) { + normalized = nir_vec4(b, + channel(b, normalized, 0), + channel(b, normalized, 1), + channel(b, normalized, 2), + channel(b, orig_coord, 3)); + } + + nir_instr_rewrite_src(&tex->instr, + &tex->src[i].src, + nir_src_for_ssa(normalized)); + } + } + + return true; +} + +static void +normalize_cubemap_coords_impl(nir_function_impl *impl) +{ + nir_builder b; + nir_builder_init(&b, impl); + + nir_foreach_block(impl, normalize_cubemap_coords_block, &b); + + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); +} + +void +nir_normalize_cubemap_coords(nir_shader *shader) +{ + nir_foreach_overload(shader, overload) + if (overload->impl) + normalize_cubemap_coords_impl(overload->impl); +} From jekstrand at kemper.freedesktop.org Fri Apr 3 21:36:54 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Fri, 3 Apr 2015 14:36:54 -0700 (PDT) Subject: Mesa (master): i965: Use brw_nir_cubemap_normalize for NIR shaders Message-ID: <20150403213654.409CA76331@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 1bd1fc248ce5ecc6882309ab64ec61835fea1eda URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1bd1fc248ce5ecc6882309ab64ec61835fea1eda Author: Jason Ekstrand Date: Thu Apr 2 17:04:28 2015 -0700 i965: Use brw_nir_cubemap_normalize for NIR shaders Reviewed-by: Jordan Justen --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 3 +++ src/mesa/drivers/dri/i965/brw_shader.cpp | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 0f1659d..e9692aa 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -104,6 +104,9 @@ fs_visitor::emit_nir_code() nir_lower_tex_projector(nir); nir_validate_shader(nir); + nir_normalize_cubemap_coords(nir); + nir_validate_shader(nir); + nir_split_var_copies(nir); nir_validate_shader(nir); diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 0dda9bb..4ef3882 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -173,7 +173,8 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) brw_lower_texture_gradients(brw, shader->base.ir); do_vec_index_to_cond_assign(shader->base.ir); lower_vector_insert(shader->base.ir, true); - brw_do_cubemap_normalize(shader->base.ir); + if (options->NirOptions == NULL) + brw_do_cubemap_normalize(shader->base.ir); lower_offset_arrays(shader->base.ir); brw_do_lower_unnormalized_offset(shader->base.ir); lower_noise(shader->base.ir); From jekstrand at kemper.freedesktop.org Sat Apr 4 00:03:05 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Fri, 3 Apr 2015 17:03:05 -0700 (PDT) Subject: Mesa (master): nir/lower_samplers: Use the right memory context for realloc'ing tex sources Message-ID: <20150404000305.AD7A676331@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 9c53e80b9b6a637a771328bac98d2292a00869ce URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=9c53e80b9b6a637a771328bac98d2292a00869ce Author: Jason Ekstrand Date: Fri Apr 3 15:23:20 2015 -0700 nir/lower_samplers: Use the right memory context for realloc'ing tex sources As of da5ec2a, we allocate instruction sources out of the instruction itself. When we realloc the texture sources we need to use the right memory context or ralloc will get angry and assert-fail Reviewed-by: Kenneth Graunke --- src/glsl/nir/nir_lower_samplers.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glsl/nir/nir_lower_samplers.cpp b/src/glsl/nir/nir_lower_samplers.cpp index 3015dbd..1e509a9 100644 --- a/src/glsl/nir/nir_lower_samplers.cpp +++ b/src/glsl/nir/nir_lower_samplers.cpp @@ -90,7 +90,7 @@ lower_sampler(nir_tex_instr *instr, struct gl_shader_program *shader_program, ralloc_asprintf_append(&name, "[%u]", deref_array->base_offset); break; case nir_deref_array_type_indirect: { - instr->src = reralloc(mem_ctx, instr->src, nir_tex_src, + instr->src = reralloc(instr, instr->src, nir_tex_src, instr->num_srcs + 1); memset(&instr->src[instr->num_srcs], 0, sizeof *instr->src); instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset; From imirkin at kemper.freedesktop.org Sat Apr 4 15:31:54 2015 From: imirkin at kemper.freedesktop.org (Ilia Mirkin) Date: Sat, 4 Apr 2015 08:31:54 -0700 (PDT) Subject: Mesa (master): nv50: allocate more offset space for occlusion queries Message-ID: <20150404153154.7F90B76333@kemper.freedesktop.org> Module: Mesa Branch: master Commit: ba353935a392d2a43422f1d258456336b40b60ea URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ba353935a392d2a43422f1d258456336b40b60ea Author: Ilia Mirkin Date: Fri Apr 3 23:57:43 2015 -0400 nv50: allocate more offset space for occlusion queries Commit 1a170980a09 started writing to q->data[4]/[5] but kept the per-query space at 16, which meant that in some cases we would write past the end of the buffer. Rotate by 32, like nvc0 does. This ensures that we always have 32 bytes in front of us, and the data writes will go within the allocated space. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89679 Signed-off-by: Ilia Mirkin Tested-by: Nick Tenney Reviewed-by: Samuel Pitoiset Reviewed-by: Tobias Klausmann Cc: "10.4 10.5" --- src/gallium/drivers/nouveau/nv50/nv50_query.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c index e81ac5a..23b6d1e 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c @@ -41,7 +41,7 @@ struct nv50_query { uint32_t sequence; struct nouveau_bo *bo; uint32_t base; - uint32_t offset; /* base + i * 16 */ + uint32_t offset; /* base + i * 32 */ boolean ready; boolean flushed; boolean is64bit; @@ -116,8 +116,8 @@ nv50_query_create(struct pipe_context *pipe, unsigned type, unsigned index) q->type = type; if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { - q->offset -= 16; - q->data -= 16 / sizeof(*q->data); /* we advance before query_begin ! */ + q->offset -= 32; + q->data -= 32 / sizeof(*q->data); /* we advance before query_begin ! */ } return (struct pipe_query *)q; @@ -150,8 +150,8 @@ nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq) * initialized it to TRUE. */ if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { - q->offset += 16; - q->data += 16 / sizeof(*q->data); + q->offset += 32; + q->data += 32 / sizeof(*q->data); if (q->offset - q->base == NV50_QUERY_ALLOC_SPACE) nv50_query_allocate(nv50, q, NV50_QUERY_ALLOC_SPACE); From frohlich at kemper.freedesktop.org Sun Apr 5 06:14:28 2015 From: frohlich at kemper.freedesktop.org (Mathias Fröhlich) Date: Sat, 4 Apr 2015 23:14:28 -0700 (PDT) Subject: Mesa (master): radeon: Make use of _mesa_get_viewport_xform v2. Message-ID: <20150405061428.AA1AF7635A@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 472913ea7563e136b9ad3d33111925147a044a39 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=472913ea7563e136b9ad3d33111925147a044a39 Author: Mathias Froehlich Date: Sun Mar 29 18:57:45 2015 +0200 radeon: Make use of _mesa_get_viewport_xform v2. Instead of _WindowMap just use the translation and scale of the viewport transform directly. Thereby avoid dividing by _DepthMaxF again. v2: Change order of assignments. Reviewed-by: Brian Paul Signed-off-by: Mathias Froehlich --- src/mesa/drivers/dri/r200/r200_state.c | 17 +++++++++-------- src/mesa/drivers/dri/radeon/radeon_state.c | 17 +++++++++-------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/src/mesa/drivers/dri/r200/r200_state.c b/src/mesa/drivers/dri/r200/r200_state.c index 8327187..e4f07b3 100644 --- a/src/mesa/drivers/dri/r200/r200_state.c +++ b/src/mesa/drivers/dri/r200/r200_state.c @@ -42,6 +42,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/framebuffer.h" #include "main/fbobject.h" #include "main/stencil.h" +#include "main/viewport.h" #include "swrast/swrast.h" #include "vbo/vbo.h" @@ -1544,9 +1545,8 @@ void r200UpdateWindow( struct gl_context *ctx ) __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon); GLfloat xoffset = 0; GLfloat yoffset = dPriv ? (GLfloat) dPriv->h : 0; - const GLfloat *v = ctx->ViewportArray[0]._WindowMap.m; const GLboolean render_to_fbo = (ctx->DrawBuffer ? _mesa_is_user_fbo(ctx->DrawBuffer) : 0); - const GLfloat depthScale = 1.0F / ctx->DrawBuffer->_DepthMaxF; + double scale[3], translate[3]; GLfloat y_scale, y_bias; if (render_to_fbo) { @@ -1557,12 +1557,13 @@ void r200UpdateWindow( struct gl_context *ctx ) y_bias = yoffset; } - float_ui32_type sx = { v[MAT_SX] }; - float_ui32_type tx = { v[MAT_TX] + xoffset }; - float_ui32_type sy = { v[MAT_SY] * y_scale }; - float_ui32_type ty = { (v[MAT_TY] * y_scale) + y_bias }; - float_ui32_type sz = { v[MAT_SZ] * depthScale }; - float_ui32_type tz = { v[MAT_TZ] * depthScale }; + _mesa_get_viewport_xform(ctx, 0, scale, translate); + float_ui32_type sx = { scale[0] }; + float_ui32_type sy = { scale[1] * y_scale }; + float_ui32_type sz = { scale[2] }; + float_ui32_type tx = { translate[0] + xoffset }; + float_ui32_type ty = { (translate[1] * y_scale) + y_bias }; + float_ui32_type tz = { translate[2] }; R200_STATECHANGE( rmesa, vpt ); diff --git a/src/mesa/drivers/dri/radeon/radeon_state.c b/src/mesa/drivers/dri/radeon/radeon_state.c index e83a34d..66a50a9 100644 --- a/src/mesa/drivers/dri/radeon/radeon_state.c +++ b/src/mesa/drivers/dri/radeon/radeon_state.c @@ -44,6 +44,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/state.h" #include "main/core.h" #include "main/stencil.h" +#include "main/viewport.h" #include "vbo/vbo.h" #include "tnl/tnl.h" @@ -1352,9 +1353,8 @@ void radeonUpdateWindow( struct gl_context *ctx ) __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon); GLfloat xoffset = 0.0; GLfloat yoffset = dPriv ? (GLfloat) dPriv->h : 0; - const GLfloat *v = ctx->ViewportArray[0]._WindowMap.m; const GLboolean render_to_fbo = (ctx->DrawBuffer ? _mesa_is_user_fbo(ctx->DrawBuffer) : 0); - const GLfloat depthScale = 1.0F / ctx->DrawBuffer->_DepthMaxF; + double scale[3], translate[3]; GLfloat y_scale, y_bias; if (render_to_fbo) { @@ -1365,12 +1365,13 @@ void radeonUpdateWindow( struct gl_context *ctx ) y_bias = yoffset; } - float_ui32_type sx = { v[MAT_SX] }; - float_ui32_type tx = { v[MAT_TX] + xoffset + SUBPIXEL_X }; - float_ui32_type sy = { v[MAT_SY] * y_scale }; - float_ui32_type ty = { (v[MAT_TY] * y_scale) + y_bias + SUBPIXEL_Y }; - float_ui32_type sz = { v[MAT_SZ] * depthScale }; - float_ui32_type tz = { v[MAT_TZ] * depthScale }; + _mesa_get_viewport_xform(ctx, 0, scale, translate); + float_ui32_type sx = { scale[0] }; + float_ui32_type sy = { scale[1] * y_scale }; + float_ui32_type sz = { scale[2] }; + float_ui32_type tx = { translate[0] + xoffset + SUBPIXEL_X }; + float_ui32_type ty = { (translate[1] * y_scale) + y_bias + SUBPIXEL_Y }; + float_ui32_type tz = { translate[2] }; RADEON_STATECHANGE( rmesa, vpt ); From frohlich at kemper.freedesktop.org Sun Apr 5 06:14:28 2015 From: frohlich at kemper.freedesktop.org (Mathias Fröhlich) Date: Sat, 4 Apr 2015 23:14:28 -0700 (PDT) Subject: Mesa (master): mesa: Remove the _WindowMap from gl_viewport_attrib. Message-ID: <20150405061428.C499276333@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 107ae27e57dc2a1ddc6bbb7ea101c1c60794423f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=107ae27e57dc2a1ddc6bbb7ea101c1c60794423f Author: Mathias Froehlich Date: Sun Mar 29 18:57:46 2015 +0200 mesa: Remove the _WindowMap from gl_viewport_attrib. The _WindowMap can be dropped from gl_viewport_attrib now. Simplify gl_viewport_attrib handling where possible. Reviewed-by: Brian Paul Signed-off-by: Mathias Froehlich --- src/mesa/main/context.c | 12 ++---------- src/mesa/main/mtypes.h | 1 - src/mesa/main/state.c | 25 ------------------------- src/mesa/main/viewport.c | 44 ++------------------------------------------ src/mesa/main/viewport.h | 3 --- 5 files changed, 4 insertions(+), 81 deletions(-) diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c index c1acda9..adf6497 100644 --- a/src/mesa/main/context.c +++ b/src/mesa/main/context.c @@ -1288,7 +1288,6 @@ _mesa_free_context_data( struct gl_context *ctx ) _mesa_free_eval_data( ctx ); _mesa_free_texture_data( ctx ); _mesa_free_matrix_data( ctx ); - _mesa_free_viewport_data( ctx ); _mesa_free_pipeline_data(ctx); _mesa_free_program_data(ctx); _mesa_free_shader_state(ctx); @@ -1449,17 +1448,10 @@ _mesa_copy_context( const struct gl_context *src, struct gl_context *dst, dst->Transform = src->Transform; } if (mask & GL_VIEWPORT_BIT) { - /* Cannot use memcpy, because of pointers in GLmatrix _WindowMap */ unsigned i; for (i = 0; i < src->Const.MaxViewports; i++) { - dst->ViewportArray[i].X = src->ViewportArray[i].X; - dst->ViewportArray[i].Y = src->ViewportArray[i].Y; - dst->ViewportArray[i].Width = src->ViewportArray[i].Width; - dst->ViewportArray[i].Height = src->ViewportArray[i].Height; - dst->ViewportArray[i].Near = src->ViewportArray[i].Near; - dst->ViewportArray[i].Far = src->ViewportArray[i].Far; - _math_matrix_copy(&dst->ViewportArray[i]._WindowMap, - &src->ViewportArray[i]._WindowMap); + /* OK to memcpy */ + dst->ViewportArray[i] = src->ViewportArray[i]; } } diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index c1e5dd3..3784e3b 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -1450,7 +1450,6 @@ struct gl_viewport_attrib GLfloat X, Y; /**< position */ GLfloat Width, Height; /**< size */ GLdouble Near, Far; /**< Depth buffer range */ - GLmatrix _WindowMap; /**< Mapping transformation as a matrix. */ }; diff --git a/src/mesa/main/state.c b/src/mesa/main/state.c index dadfb3c..6dc14b2 100644 --- a/src/mesa/main/state.c +++ b/src/mesa/main/state.c @@ -269,28 +269,6 @@ update_program_constants(struct gl_context *ctx) -static void -update_viewport_matrix(struct gl_context *ctx) -{ - const GLfloat depthMax = ctx->DrawBuffer->_DepthMaxF; - unsigned i; - - assert(depthMax > 0); - - /* Compute scale and bias values. This is really driver-specific - * and should be maintained elsewhere if at all. - * NOTE: RasterPos uses this. - */ - for (i = 0; i < ctx->Const.MaxViewports; i++) { - double scale[3], translate[3]; - - _mesa_get_viewport_xform(ctx, i, scale, translate); - _math_matrix_viewport(&ctx->ViewportArray[i]._WindowMap, - scale, translate, depthMax); - } -} - - /** * Update the ctx->Polygon._FrontBit flag. */ @@ -407,9 +385,6 @@ _mesa_update_state_locked( struct gl_context *ctx ) if (new_state & _NEW_PIXEL) _mesa_update_pixel( ctx, new_state ); - if (new_state & (_NEW_BUFFERS | _NEW_VIEWPORT)) - update_viewport_matrix(ctx); - if (new_state & (_NEW_MULTISAMPLE | _NEW_BUFFERS)) update_multisample( ctx ); diff --git a/src/mesa/main/viewport.c b/src/mesa/main/viewport.c index 0adce9c..3f5ca79 100644 --- a/src/mesa/main/viewport.c +++ b/src/mesa/main/viewport.c @@ -72,16 +72,6 @@ set_viewport_no_notify(struct gl_context *ctx, unsigned idx, ctx->ViewportArray[idx].Y = y; ctx->ViewportArray[idx].Height = height; ctx->NewState |= _NEW_VIEWPORT; - -#if 1 - /* XXX remove this someday. Currently the DRI drivers rely on - * the WindowMap matrix being up to date in the driver's Viewport - * and DepthRange functions. - */ - _mesa_get_viewport_xform(ctx, idx, scale, translate); - _math_matrix_viewport(&ctx->ViewportArray[idx]._WindowMap, - scale, translate, ctx->DrawBuffer->_DepthMaxF); -#endif } struct gl_viewport_inputs { @@ -140,8 +130,8 @@ _mesa_Viewport(GLint x, GLint y, GLsizei width, GLsizei height) /** - * Set new viewport parameters and update derived state (the _WindowMap - * matrix). Usually called from _mesa_Viewport(). + * Set new viewport parameters and update derived state. + * Usually called from _mesa_Viewport(). * * \param ctx GL context. * \param idx Index of the viewport to be updated. @@ -255,16 +245,6 @@ set_depth_range_no_notify(struct gl_context *ctx, unsigned idx, ctx->ViewportArray[idx].Near = CLAMP(nearval, 0.0, 1.0); ctx->ViewportArray[idx].Far = CLAMP(farval, 0.0, 1.0); ctx->NewState |= _NEW_VIEWPORT; - -#if 1 - /* XXX remove this someday. Currently the DRI drivers rely on - * the WindowMap matrix being up to date in the driver's Viewport - * and DepthRange functions. - */ - _mesa_get_viewport_xform(ctx, idx, scale, translate); - _math_matrix_viewport(&ctx->ViewportArray[idx]._WindowMap, - scale, translate, ctx->DrawBuffer->_DepthMaxF); -#endif } void @@ -388,7 +368,6 @@ _mesa_DepthRangeIndexed(GLuint index, GLclampd nearval, GLclampd farval) */ void _mesa_init_viewport(struct gl_context *ctx) { - GLfloat depthMax = 65535.0F; /* sorf of arbitrary */ unsigned i; ctx->Transform.ClipOrigin = GL_LOWER_LEFT; @@ -398,8 +377,6 @@ void _mesa_init_viewport(struct gl_context *ctx) * so just initialize all of them. */ for (i = 0; i < MAX_VIEWPORTS; i++) { - double scale[3], translate[3]; - /* Viewport group */ ctx->ViewportArray[i].X = 0; ctx->ViewportArray[i].Y = 0; @@ -407,27 +384,10 @@ void _mesa_init_viewport(struct gl_context *ctx) ctx->ViewportArray[i].Height = 0; ctx->ViewportArray[i].Near = 0.0; ctx->ViewportArray[i].Far = 1.0; - _math_matrix_ctr(&ctx->ViewportArray[i]._WindowMap); - - _mesa_get_viewport_xform(ctx, i, scale, translate); - _math_matrix_viewport(&ctx->ViewportArray[i]._WindowMap, - scale, translate, depthMax); } } -/** - * Free the context viewport attribute group data. - * \param ctx the GL context. - */ -void _mesa_free_viewport_data(struct gl_context *ctx) -{ - unsigned i; - - for (i = 0; i < MAX_VIEWPORTS; i++) - _math_matrix_dtr(&ctx->ViewportArray[i]._WindowMap); -} - extern void GLAPIENTRY _mesa_ClipControl(GLenum origin, GLenum depth) { diff --git a/src/mesa/main/viewport.h b/src/mesa/main/viewport.h index 426e194..899dc2d 100644 --- a/src/mesa/main/viewport.h +++ b/src/mesa/main/viewport.h @@ -68,9 +68,6 @@ extern void _mesa_init_viewport(struct gl_context *ctx); -extern void -_mesa_free_viewport_data(struct gl_context *ctx); - extern void GLAPIENTRY _mesa_ClipControl(GLenum origin, GLenum depth); From frohlich at kemper.freedesktop.org Sun Apr 5 06:14:28 2015 From: frohlich at kemper.freedesktop.org (Mathias Fröhlich) Date: Sat, 4 Apr 2015 23:14:28 -0700 (PDT) Subject: Mesa (master): i965: Implement support for ARB_clip_control. Message-ID: <20150405061428.D156776333@kemper.freedesktop.org> Module: Mesa Branch: master Commit: fdd90fcb15c109f3dcbf5e46fa8a1f8284b9c266 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=fdd90fcb15c109f3dcbf5e46fa8a1f8284b9c266 Author: Mathias Fr?hlich Date: Sun Mar 29 16:52:57 2015 +0200 i965: Implement support for ARB_clip_control. Switch between the two clip space definitions already available in hardware. Update winding order dependent state according to the clip control state. This change did not introduce new piglit quick.test regressions on an Ivybridge Mobile and a GM45 Express chipset. Also it enables and passes the clip-control and clip-control-depth-precision tests on these two chipsets. Reviewed-by: Kenneth Graunke Signed-off-by: Mathias Froehlich --- docs/GL3.txt | 2 +- docs/relnotes/10.6.0.html | 1 + src/mesa/drivers/dri/i965/brw_clip.c | 7 ++----- src/mesa/drivers/dri/i965/brw_clip_state.c | 5 ++++- src/mesa/drivers/dri/i965/brw_sf.c | 2 +- src/mesa/drivers/dri/i965/brw_sf_state.c | 6 +++--- src/mesa/drivers/dri/i965/gen6_clip_state.c | 7 +++++-- src/mesa/drivers/dri/i965/gen6_sf_state.c | 2 +- src/mesa/drivers/dri/i965/gen7_sf_state.c | 2 +- src/mesa/drivers/dri/i965/gen8_sf_state.c | 2 +- src/mesa/drivers/dri/i965/intel_extensions.c | 1 + 11 files changed, 21 insertions(+), 16 deletions(-) diff --git a/docs/GL3.txt b/docs/GL3.txt index 94166e2..456d76b 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -188,7 +188,7 @@ GL 4.4, GLSL 4.40: GL 4.5, GLSL 4.50: GL_ARB_ES3_1_compatibility not started - GL_ARB_clip_control DONE (nv50, nvc0, r600, radeonsi, llvmpipe, softpipe) + GL_ARB_clip_control DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe) GL_ARB_conditional_render_inverted DONE (i965, nv50, nvc0, llvmpipe, softpipe) GL_ARB_cull_distance not started GL_ARB_derivative_control DONE (i965, nv50, nvc0, r600) diff --git a/docs/relnotes/10.6.0.html b/docs/relnotes/10.6.0.html index 22201e1..2378e3d 100644 --- a/docs/relnotes/10.6.0.html +++ b/docs/relnotes/10.6.0.html @@ -51,6 +51,7 @@ Note: some of the new features are only available with certain drivers.
  • GL_ARB_instanced_arrays on freedreno
  • GL_ARB_pipeline_statistics_query on i965, nv50, nvc0, r600, radeonsi, softpipe
  • GL_EXT_draw_buffers2 on freedreno
  • +
  • GL_ARB_clip_control on i965
  • Bug fixes

    diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c index 3fef38c..de78f46 100644 --- a/src/mesa/drivers/dri/i965/brw_clip.c +++ b/src/mesa/drivers/dri/i965/brw_clip.c @@ -224,8 +224,7 @@ brw_upload_clip_prog(struct brw_context *brw) key.offset_factor = ctx->Polygon.OffsetFactor * ctx->DrawBuffer->_MRD; } - switch (ctx->Polygon.FrontFace) { - case GL_CCW: + if (!ctx->Polygon._FrontBit) { key.fill_ccw = fill_front; key.fill_cw = fill_back; key.offset_ccw = offset_front; @@ -233,8 +232,7 @@ brw_upload_clip_prog(struct brw_context *brw) if (ctx->Light.Model.TwoSide && key.fill_cw != CLIP_CULL) key.copy_bfc_cw = 1; - break; - case GL_CW: + } else { key.fill_cw = fill_front; key.fill_ccw = fill_back; key.offset_cw = offset_front; @@ -242,7 +240,6 @@ brw_upload_clip_prog(struct brw_context *brw) if (ctx->Light.Model.TwoSide && key.fill_ccw != CLIP_CULL) key.copy_bfc_ccw = 1; - break; } } } diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c index 4f241ac..3223834 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_state.c +++ b/src/mesa/drivers/dri/i965/brw_clip_state.c @@ -147,7 +147,10 @@ brw_upload_clip_unit(struct brw_context *brw) clip->clip5.viewport_z_clip_enable = 1; clip->clip5.viewport_xy_clip_enable = 1; clip->clip5.vertex_position_space = BRW_CLIP_NDCSPACE; - clip->clip5.api_mode = BRW_CLIP_API_OGL; + if (ctx->Transform.ClipDepthMode == GL_ZERO_TO_ONE) + clip->clip5.api_mode = BRW_CLIP_API_DX; + else + clip->clip5.api_mode = BRW_CLIP_API_OGL; clip->clip5.clip_mode = brw->clip.prog_data->clip_mode; if (brw->is_g4x) diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c index a41a4ad..d5395de 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -204,7 +204,7 @@ brw_upload_sf_prog(struct brw_context *brw) * face orientation, just as we invert the viewport in * sf_unit_create_from_key(). */ - key.frontface_ccw = (ctx->Polygon.FrontFace == GL_CCW) != render_to_fbo; + key.frontface_ccw = ctx->Polygon._FrontBit == render_to_fbo; } if (!brw_search_cache(&brw->cache, BRW_CACHE_SF_PROG, diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index 2ed418b..014b434 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -183,10 +183,10 @@ static void upload_sf_unit( struct brw_context *brw ) sf->sf6.scissor = 1; /* _NEW_POLYGON */ - if (ctx->Polygon.FrontFace == GL_CCW) - sf->sf5.front_winding = BRW_FRONTWINDING_CCW; - else + if (ctx->Polygon._FrontBit) sf->sf5.front_winding = BRW_FRONTWINDING_CW; + else + sf->sf5.front_winding = BRW_FRONTWINDING_CCW; /* _NEW_BUFFERS * The viewport is inverted for rendering to a FBO, and that inverts diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c index e8c16ca..aaf90df 100644 --- a/src/mesa/drivers/dri/i965/gen6_clip_state.c +++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c @@ -54,7 +54,7 @@ upload_clip_state(struct brw_context *brw) if (brw->gen == 7) { /* _NEW_POLYGON */ - if ((ctx->Polygon.FrontFace == GL_CCW) ^ _mesa_is_user_fbo(fb)) + if (ctx->Polygon._FrontBit == _mesa_is_user_fbo(fb)) dw1 |= GEN7_CLIP_WINDING_CCW; if (ctx->Polygon.CullFlag) { @@ -95,6 +95,10 @@ upload_clip_state(struct brw_context *brw) /* _NEW_TRANSFORM */ dw2 |= (ctx->Transform.ClipPlanesEnabled << GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT); + if (ctx->Transform.ClipDepthMode == GL_ZERO_TO_ONE) + dw2 |= GEN6_CLIP_API_D3D; + else + dw2 |= GEN6_CLIP_API_OGL; dw2 |= GEN6_CLIP_GB_TEST; @@ -170,7 +174,6 @@ upload_clip_state(struct brw_context *brw) OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2)); OUT_BATCH(dw1); OUT_BATCH(enable | - GEN6_CLIP_API_OGL | GEN6_CLIP_MODE_NORMAL | GEN6_CLIP_XY_TEST | dw2); diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c index f9d8d27..ea5c47a 100644 --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c @@ -290,7 +290,7 @@ upload_sf_state(struct brw_context *brw) dw4 = 0; /* _NEW_POLYGON */ - if ((ctx->Polygon.FrontFace == GL_CCW) ^ render_to_fbo) + if (ctx->Polygon._FrontBit == render_to_fbo) dw2 |= GEN6_SF_WINDING_CCW; if (ctx->Polygon.OffsetFill) diff --git a/src/mesa/drivers/dri/i965/gen7_sf_state.c b/src/mesa/drivers/dri/i965/gen7_sf_state.c index c9815b0..69853e6 100644 --- a/src/mesa/drivers/dri/i965/gen7_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c @@ -120,7 +120,7 @@ upload_sf_state(struct brw_context *brw) dw1 |= (brw_depthbuffer_format(brw) << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT); /* _NEW_POLYGON */ - if ((ctx->Polygon.FrontFace == GL_CCW) ^ render_to_fbo) + if (ctx->Polygon._FrontBit == render_to_fbo) dw1 |= GEN6_SF_WINDING_CCW; if (ctx->Polygon.OffsetFill) diff --git a/src/mesa/drivers/dri/i965/gen8_sf_state.c b/src/mesa/drivers/dri/i965/gen8_sf_state.c index 27116f7..52a21b6 100644 --- a/src/mesa/drivers/dri/i965/gen8_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen8_sf_state.c @@ -229,7 +229,7 @@ upload_raster(struct brw_context *brw) bool render_to_fbo = _mesa_is_user_fbo(brw->ctx.DrawBuffer); /* _NEW_POLYGON */ - if ((ctx->Polygon.FrontFace == GL_CCW) ^ render_to_fbo) + if (ctx->Polygon._FrontBit == render_to_fbo) dw1 |= GEN8_RASTER_FRONT_WINDING_CCW; if (ctx->Polygon.CullFlag) { diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c index 608bfac9..48064e1 100644 --- a/src/mesa/drivers/dri/i965/intel_extensions.c +++ b/src/mesa/drivers/dri/i965/intel_extensions.c @@ -178,6 +178,7 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.ARB_buffer_storage = true; ctx->Extensions.ARB_clear_texture = true; + ctx->Extensions.ARB_clip_control = true; ctx->Extensions.ARB_copy_image = true; ctx->Extensions.ARB_depth_buffer_float = true; ctx->Extensions.ARB_depth_clamp = true; From frohlich at kemper.freedesktop.org Sun Apr 5 06:14:28 2015 From: frohlich at kemper.freedesktop.org (Mathias Fröhlich) Date: Sat, 4 Apr 2015 23:14:28 -0700 (PDT) Subject: Mesa (master): i965: Make use of _mesa_get_viewport_xform. Message-ID: <20150405061428.A33C176333@kemper.freedesktop.org> Module: Mesa Branch: master Commit: a8ceb8e450354083b0b4141cd7fa0174a4d18f72 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a8ceb8e450354083b0b4141cd7fa0174a4d18f72 Author: Mathias Froehlich Date: Sun Mar 29 18:57:45 2015 +0200 i965: Make use of _mesa_get_viewport_xform. Instead of _WindowMap just use the translation and scale of the viewport transform directly. Thereby avoid dividing by _DepthMaxF again. Reviewed-by: Brian Paul Signed-off-by: Mathias Froehlich --- src/mesa/drivers/dri/i965/brw_sf_state.c | 17 +++++++++-------- src/mesa/drivers/dri/i965/gen6_viewport_state.c | 17 +++++++++-------- src/mesa/drivers/dri/i965/gen7_viewport_state.c | 17 +++++++++-------- src/mesa/drivers/dri/i965/gen8_viewport_state.c | 17 +++++++++-------- 4 files changed, 36 insertions(+), 32 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index e055837..2ed418b 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -34,6 +34,7 @@ #include "main/mtypes.h" #include "main/macros.h" #include "main/fbobject.h" +#include "main/viewport.h" #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" @@ -42,11 +43,10 @@ static void upload_sf_vp(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; - const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF; struct brw_sf_viewport *sfv; GLfloat y_scale, y_bias; + double scale[3], translate[3]; const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); - const GLfloat *v = ctx->ViewportArray[0]._WindowMap.m; sfv = brw_state_batch(brw, AUB_TRACE_SF_VP_STATE, sizeof(*sfv), 32, &brw->sf.vp_offset); @@ -63,12 +63,13 @@ static void upload_sf_vp(struct brw_context *brw) /* _NEW_VIEWPORT */ - sfv->viewport.m00 = v[MAT_SX]; - sfv->viewport.m11 = v[MAT_SY] * y_scale; - sfv->viewport.m22 = v[MAT_SZ] * depth_scale; - sfv->viewport.m30 = v[MAT_TX]; - sfv->viewport.m31 = v[MAT_TY] * y_scale + y_bias; - sfv->viewport.m32 = v[MAT_TZ] * depth_scale; + _mesa_get_viewport_xform(ctx, 0, scale, translate); + sfv->viewport.m00 = scale[0]; + sfv->viewport.m11 = scale[1] * y_scale; + sfv->viewport.m22 = scale[2]; + sfv->viewport.m30 = translate[0]; + sfv->viewport.m31 = translate[1] * y_scale + y_bias; + sfv->viewport.m32 = translate[2]; /* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT * for DrawBuffer->_[XY]{min,max} diff --git a/src/mesa/drivers/dri/i965/gen6_viewport_state.c b/src/mesa/drivers/dri/i965/gen6_viewport_state.c index d804452..0c63283 100644 --- a/src/mesa/drivers/dri/i965/gen6_viewport_state.c +++ b/src/mesa/drivers/dri/i965/gen6_viewport_state.c @@ -30,6 +30,7 @@ #include "brw_defines.h" #include "intel_batchbuffer.h" #include "main/fbobject.h" +#include "main/viewport.h" /* The clip VP defines the guardband region where expensive clipping is skipped * and fragments are allowed to be generated and clipped out cheaply by the SF. @@ -78,11 +79,10 @@ static void gen6_upload_sf_vp(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; - const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF; struct brw_sf_viewport *sfv; GLfloat y_scale, y_bias; + double scale[3], translate[3]; const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); - const GLfloat *v = ctx->ViewportArray[0]._WindowMap.m; sfv = brw_state_batch(brw, AUB_TRACE_SF_VP_STATE, sizeof(*sfv), 32, &brw->sf.vp_offset); @@ -98,12 +98,13 @@ gen6_upload_sf_vp(struct brw_context *brw) } /* _NEW_VIEWPORT */ - sfv->viewport.m00 = v[MAT_SX]; - sfv->viewport.m11 = v[MAT_SY] * y_scale; - sfv->viewport.m22 = v[MAT_SZ] * depth_scale; - sfv->viewport.m30 = v[MAT_TX]; - sfv->viewport.m31 = v[MAT_TY] * y_scale + y_bias; - sfv->viewport.m32 = v[MAT_TZ] * depth_scale; + _mesa_get_viewport_xform(ctx, 0, scale, translate); + sfv->viewport.m00 = scale[0]; + sfv->viewport.m11 = scale[1] * y_scale; + sfv->viewport.m22 = scale[2]; + sfv->viewport.m30 = translate[0]; + sfv->viewport.m31 = translate[1] * y_scale + y_bias; + sfv->viewport.m32 = translate[2]; brw->ctx.NewDriverState |= BRW_NEW_SF_VP; } diff --git a/src/mesa/drivers/dri/i965/gen7_viewport_state.c b/src/mesa/drivers/dri/i965/gen7_viewport_state.c index bd11c3a..eb59684 100644 --- a/src/mesa/drivers/dri/i965/gen7_viewport_state.c +++ b/src/mesa/drivers/dri/i965/gen7_viewport_state.c @@ -26,12 +26,12 @@ #include "brw_defines.h" #include "intel_batchbuffer.h" #include "main/fbobject.h" +#include "main/viewport.h" static void gen7_upload_sf_clip_viewport(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; - const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF; GLfloat y_scale, y_bias; const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); struct gen7_sf_clip_viewport *vp; @@ -52,7 +52,8 @@ gen7_upload_sf_clip_viewport(struct brw_context *brw) } for (unsigned i = 0; i < ctx->Const.MaxViewports; i++) { - const GLfloat *const v = ctx->ViewportArray[i]._WindowMap.m; + double scale[3], translate[3]; + _mesa_get_viewport_xform(ctx, i, scale, translate); /* According to the "Vertex X,Y Clamping and Quantization" section of * the Strips and Fans documentation, objects must not have a @@ -76,12 +77,12 @@ gen7_upload_sf_clip_viewport(struct brw_context *brw) vp[i].guardband.ymax = gby; /* _NEW_VIEWPORT */ - vp[i].viewport.m00 = v[MAT_SX]; - vp[i].viewport.m11 = v[MAT_SY] * y_scale; - vp[i].viewport.m22 = v[MAT_SZ] * depth_scale; - vp[i].viewport.m30 = v[MAT_TX]; - vp[i].viewport.m31 = v[MAT_TY] * y_scale + y_bias; - vp[i].viewport.m32 = v[MAT_TZ] * depth_scale; + vp[i].viewport.m00 = scale[0]; + vp[i].viewport.m11 = scale[1] * y_scale; + vp[i].viewport.m22 = scale[2]; + vp[i].viewport.m30 = translate[0]; + vp[i].viewport.m31 = translate[1] * y_scale + y_bias; + vp[i].viewport.m32 = translate[2]; } BEGIN_BATCH(2); diff --git a/src/mesa/drivers/dri/i965/gen8_viewport_state.c b/src/mesa/drivers/dri/i965/gen8_viewport_state.c index 93198c4..322e466 100644 --- a/src/mesa/drivers/dri/i965/gen8_viewport_state.c +++ b/src/mesa/drivers/dri/i965/gen8_viewport_state.c @@ -26,12 +26,12 @@ #include "brw_defines.h" #include "intel_batchbuffer.h" #include "main/fbobject.h" +#include "main/viewport.h" static void gen8_upload_sf_clip_viewport(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; - const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF; float y_scale, y_bias; const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); @@ -51,15 +51,16 @@ gen8_upload_sf_clip_viewport(struct brw_context *brw) } for (unsigned i = 0; i < ctx->Const.MaxViewports; i++) { - const GLfloat *const v = ctx->ViewportArray[i]._WindowMap.m; + double scale[3], translate[3]; + _mesa_get_viewport_xform(ctx, i, scale, translate); /* _NEW_VIEWPORT: Viewport Matrix Elements */ - vp[0] = v[MAT_SX]; /* m00 */ - vp[1] = v[MAT_SY] * y_scale; /* m11 */ - vp[2] = v[MAT_SZ] * depth_scale; /* m22 */ - vp[3] = v[MAT_TX]; /* m30 */ - vp[4] = v[MAT_TY] * y_scale + y_bias; /* m31 */ - vp[5] = v[MAT_TZ] * depth_scale; /* m32 */ + vp[0] = scale[0]; /* m00 */ + vp[1] = scale[1] * y_scale; /* m11 */ + vp[2] = scale[2]; /* m22 */ + vp[3] = translate[0]; /* m30 */ + vp[4] = translate[1] * y_scale + y_bias; /* m31 */ + vp[5] = translate[2]; /* m32 */ /* Reserved */ vp[6] = 0; From frohlich at kemper.freedesktop.org Sun Apr 5 06:14:28 2015 From: frohlich at kemper.freedesktop.org (Mathias Fröhlich) Date: Sat, 4 Apr 2015 23:14:28 -0700 (PDT) Subject: Mesa (master): tnl: Maintain the _WindowMap matrix in TNLcontext v2. Message-ID: <20150405061428.B5F907635B@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 29e6c7dbc5bacf4f2b741333ac56469a00164e65 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=29e6c7dbc5bacf4f2b741333ac56469a00164e65 Author: Mathias Froehlich Date: Sun Mar 29 18:57:45 2015 +0200 tnl: Maintain the _WindowMap matrix in TNLcontext v2. This is the only real user of _WindowMap which has the depth buffer scaling multiplied in. Maintain the _WindowMap of the one and only viewport inside TNLcontext. v2: Remove unneeded parentheses. Reviewed-by: Brian Paul Signed-off-by: Mathias Froehlich --- src/mesa/swrast_setup/ss_context.c | 5 +++-- src/mesa/tnl/t_context.c | 12 ++++++++++++ src/mesa/tnl/t_context.h | 1 + src/mesa/tnl/t_rasterpos.c | 13 ++++++------- 4 files changed, 22 insertions(+), 9 deletions(-) diff --git a/src/mesa/swrast_setup/ss_context.c b/src/mesa/swrast_setup/ss_context.c index 4fc90c3..74b1da3 100644 --- a/src/mesa/swrast_setup/ss_context.c +++ b/src/mesa/swrast_setup/ss_context.c @@ -167,7 +167,7 @@ setup_vertex_format(struct gl_context *ctx) EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F, pointSize ); _tnl_install_attrs( ctx, map, e, - ctx->ViewportArray[0]._WindowMap.m, + tnl->_WindowMap.m, sizeof(SWvertex) ); swsetup->last_index_bitset = index_bitset; @@ -265,7 +265,8 @@ _swsetup_Wakeup( struct gl_context *ctx ) void _swsetup_Translate( struct gl_context *ctx, const void *vertex, SWvertex *dest ) { - const GLfloat *m = ctx->ViewportArray[0]._WindowMap.m; + TNLcontext *tnl = TNL_CONTEXT(ctx); + const GLfloat *m = tnl->_WindowMap.m; GLfloat tmp[4]; GLuint i; diff --git a/src/mesa/tnl/t_context.c b/src/mesa/tnl/t_context.c index bc705d7..5b9dd54 100644 --- a/src/mesa/tnl/t_context.c +++ b/src/mesa/tnl/t_context.c @@ -35,6 +35,7 @@ #include "math/m_translate.h" #include "math/m_xform.h" #include "main/state.h" +#include "main/viewport.h" #include "tnl.h" #include "t_context.h" @@ -69,6 +70,8 @@ _tnl_CreateContext( struct gl_context *ctx ) _tnl_install_pipeline( ctx, _tnl_default_pipeline ); } + _math_matrix_ctr(&tnl->_WindowMap); + tnl->NeedNdcCoords = GL_TRUE; tnl->AllowVertexFog = GL_TRUE; tnl->AllowPixelFog = GL_TRUE; @@ -108,6 +111,8 @@ _tnl_DestroyContext( struct gl_context *ctx ) struct tnl_shine_tab *s, *tmps; TNLcontext *tnl = TNL_CONTEXT(ctx); + _math_matrix_dtr(&tnl->_WindowMap); + /* Free lighting shininess exponentiation table */ foreach_s( s, tmps, tnl->_ShineTabList ) { free( s ); @@ -182,6 +187,13 @@ _tnl_InvalidateState( struct gl_context *ctx, GLuint new_state ) } } } + + if (new_state & (_NEW_VIEWPORT | _NEW_BUFFERS)) { + double scale[3], translate[3]; + _mesa_get_viewport_xform(ctx, 0, scale, translate); + _math_matrix_viewport(&tnl->_WindowMap, scale, translate, + ctx->DrawBuffer->_DepthMaxF); + } } diff --git a/src/mesa/tnl/t_context.h b/src/mesa/tnl/t_context.h index e89a7f8..e7adb5f 100644 --- a/src/mesa/tnl/t_context.h +++ b/src/mesa/tnl/t_context.h @@ -514,6 +514,7 @@ typedef struct /* Clipspace/ndc/window vertex managment: */ struct tnl_clipspace clipspace; + GLmatrix _WindowMap; /* Probably need a better configuration mechanism: */ diff --git a/src/mesa/tnl/t_rasterpos.c b/src/mesa/tnl/t_rasterpos.c index 1cd3981..d4b45ba 100644 --- a/src/mesa/tnl/t_rasterpos.c +++ b/src/mesa/tnl/t_rasterpos.c @@ -30,6 +30,7 @@ #include "main/macros.h" #include "util/simple_list.h" #include "main/mtypes.h" +#include "main/viewport.h" #include "math/m_matrix.h" #include "tnl/tnl.h" @@ -377,6 +378,7 @@ _tnl_RasterPos(struct gl_context *ctx, const GLfloat vObj[4]) GLfloat eye[4], clip[4], ndc[3], d; GLfloat *norm, eyenorm[3]; GLfloat *objnorm = ctx->Current.Attrib[VERT_ATTRIB_NORMAL]; + double scale[3], translate[3]; /* apply modelview matrix: eye = MV * obj */ TRANSFORM_POINT( eye, ctx->ModelviewMatrixStack.Top->m, vObj ); @@ -409,13 +411,10 @@ _tnl_RasterPos(struct gl_context *ctx, const GLfloat vObj[4]) ndc[1] = clip[1] * d; ndc[2] = clip[2] * d; /* wincoord = viewport_mapping(ndc) */ - ctx->Current.RasterPos[0] = (ndc[0] * ctx->ViewportArray[0]._WindowMap.m[MAT_SX] - + ctx->ViewportArray[0]._WindowMap.m[MAT_TX]); - ctx->Current.RasterPos[1] = (ndc[1] * ctx->ViewportArray[0]._WindowMap.m[MAT_SY] - + ctx->ViewportArray[0]._WindowMap.m[MAT_TY]); - ctx->Current.RasterPos[2] = (ndc[2] * ctx->ViewportArray[0]._WindowMap.m[MAT_SZ] - + ctx->ViewportArray[0]._WindowMap.m[MAT_TZ]) - / ctx->DrawBuffer->_DepthMaxF; + _mesa_get_viewport_xform(ctx, 0, scale, translate); + ctx->Current.RasterPos[0] = ndc[0] * scale[0] + translate[0]; + ctx->Current.RasterPos[1] = ndc[1] * scale[1] + translate[1]; + ctx->Current.RasterPos[2] = ndc[2] * scale[2] + translate[2]; ctx->Current.RasterPos[3] = clip[3]; if (ctx->Transform.DepthClamp) { From frohlich at kemper.freedesktop.org Sun Apr 5 07:42:44 2015 From: frohlich at kemper.freedesktop.org (Mathias Fröhlich) Date: Sun, 5 Apr 2015 00:42:44 -0700 (PDT) Subject: Mesa (master): mesa: Remove unused variables left over from 107ae27e57d. Message-ID: <20150405074244.1057676333@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 24b78fe54e1498dc3f66cb616561894039806642 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=24b78fe54e1498dc3f66cb616561894039806642 Author: Mathias Froehlich Date: Sun Apr 5 09:21:51 2015 +0200 mesa: Remove unused variables left over from 107ae27e57d. Reviewed-by: Dave Airlie Signed-off-by: Mathias Froehlich --- src/mesa/main/viewport.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/mesa/main/viewport.c b/src/mesa/main/viewport.c index 3f5ca79..b270630 100644 --- a/src/mesa/main/viewport.c +++ b/src/mesa/main/viewport.c @@ -40,8 +40,6 @@ set_viewport_no_notify(struct gl_context *ctx, unsigned idx, GLfloat x, GLfloat y, GLfloat width, GLfloat height) { - double scale[3], translate[3]; - /* clamp width and height to the implementation dependent range */ width = MIN2(width, (GLfloat) ctx->Const.MaxViewportWidth); height = MIN2(height, (GLfloat) ctx->Const.MaxViewportHeight); @@ -236,8 +234,6 @@ static void set_depth_range_no_notify(struct gl_context *ctx, unsigned idx, GLclampd nearval, GLclampd farval) { - double scale[3], translate[3]; - if (ctx->ViewportArray[idx].Near == nearval && ctx->ViewportArray[idx].Far == farval) return; From robclark at kemper.freedesktop.org Sun Apr 5 21:35:54 2015 From: robclark at kemper.freedesktop.org (Rob Clark) Date: Sun, 5 Apr 2015 14:35:54 -0700 (PDT) Subject: Mesa (master): 26 new commits Message-ID: <20150405213554.5B8787624F@kemper.freedesktop.org> URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8b0b81339b9c3806981ee2ec7c08501bbd8bb7a3 Author: Rob Clark Date: Thu Mar 12 17:18:33 2015 -0400 freedreno/ir3: add NIR compiler The NIR compiler frontend is an alternative to the TGSI f/e, producing the same ir3 IR and using the same backend passes for scheduling, etc. It is not enabled by default yet, as there are still some regressions. To enable, use 'FD_MESA_DEBUG=nir'. It is enough to use with, for example, xonotic or supertuxkart. With the NIR f/e, scalarizing and a number of other lowering steps happen in NIR, so we don't have to do them in ir3. Which simplifies the f/e and allows the lowered instructions to pass through other optimization stages. Signed-off-by: Rob Clark URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=700d949ea162a5036ffc3056adaa8eb3fecc9c2e Author: Ilia Mirkin Date: Tue Mar 3 20:36:39 2015 -0500 freedreno/a3xx: don't decode srgb on mem2gmem Signed-off-by: Ilia Mirkin URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b060b56772cdb349478757382de036a20a2402ba Author: Ilia Mirkin Date: Sat Apr 4 02:53:52 2015 -0400 freedreno/a3xx: pass sprite coord mode through to program emit Use the correct sprite replacement depending on the flip of the coord mode, using either T or 1-T depending on whether we have an upper-left or lower-left coordinate origin. This fixes all the point sprite piglits. Signed-off-by: Ilia Mirkin URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1de72dfc8a2014069edd1b3d3d46dad478d0680a Author: Ilia Mirkin Date: Tue Mar 31 11:51:00 2015 -0400 freedreno/a3xx: add UBO support Signed-off-by: Ilia Mirkin URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c7811f56c205b113dd820034a99ff3aaa20af636 Author: Ilia Mirkin Date: Sat Apr 4 13:37:45 2015 -0400 freedreno/ir3: insert nop between sfu/mem operations Signed-off-by: Ilia Mirkin URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=14dfd8cc4344a1bb15a63179b497d14eaec9cc0d Author: Ilia Mirkin Date: Fri Apr 3 22:29:44 2015 -0400 freedreno: dirty context when reallocating a bound bo Signed-off-by: Ilia Mirkin URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=bde2045fa247b4d1de98a3bc7585d1b60f9b58b7 Author: Ilia Mirkin Date: Fri Apr 3 02:15:17 2015 -0400 freedreno: keep track of buffer valid ranges Copies nouveau_buffer and radeon_buffer. This allows a write to proceed to an uninitialized part of a buffer even when the GPU is using the previously-initialized portions. Signed-off-by: Ilia Mirkin URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=dacf22e0a34d4dc2595f3cb0dbee52318dc9d0d7 Author: Ilia Mirkin Date: Thu Apr 2 20:48:44 2015 -0400 freedreno: mark resources as being read so that writes flush the queue Signed-off-by: Ilia Mirkin URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=2e1445c8f3df7608ba4522f8d088170de4ec788c Author: Ilia Mirkin Date: Fri Apr 3 23:39:38 2015 -0400 freedreno: don't bother setting resource timestamps Waiting on a bo being ready is handled in fd_bo_cpu_prep. No need to keep separate timestamps around. Signed-off-by: Ilia Mirkin URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1fee3061d531a9f4068952209926ad518aab07ee Author: Ilia Mirkin Date: Fri Apr 3 02:25:58 2015 -0400 freedreno: add a reading flag to indicate gpu is reading rsc Signed-off-by: Ilia Mirkin URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ea0952a9db1b5887f915d8f750f5fa9c45719976 Author: Ilia Mirkin Date: Thu Apr 2 22:28:21 2015 -0400 freedreno: fix resource flushing confusion A resource flush is an upload of a hypothetically-staging texture to the GPU. For a UMA system, this will largely be a no-op or cache-maintenance. Move the render flush logic into transfer_map where it belongs, and clear out the transfer_flush function. Signed-off-by: Ilia Mirkin URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=bfb0a8eb6967065be92e40ba620fc6fededde51a Author: Ilia Mirkin Date: Thu Apr 2 22:26:03 2015 -0400 freedreno: remove tex_resource pipe_sampler_view already contains a texture, remove the redundant tex_resource member which pointed at the same thing. Signed-off-by: Ilia Mirkin URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6cd9c94ce49a2c1a8826c1b1bf2fb73dc595131d Author: Rob Clark Date: Wed Apr 1 12:31:46 2015 -0400 freedreno/ir3: handle FRAG IN's without interpolation specified Fallback to picking based on semantic name. Signed-off-by: Rob Clark URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f513f006ceed825fa09e606d99190e490e559885 Author: Rob Clark Date: Wed Apr 1 10:52:29 2015 -0400 freedreno/ir3/cmdline: add @const headers for immediates Since NIR f/e currently encodes immediates in instructions (rather than passing via const), we need to ensure that when const's are used the get initialized to the proper values. Otherwise comparing NIR to TGSI compiler, it will use proper immediate values in one case, and randomly initialize values in the other. Which confuses ir3test. Signed-off-by: Rob Clark URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6bc12bb5fd29a70967d467a19153bda9404653bd Author: Rob Clark Date: Wed Apr 1 10:28:58 2015 -0400 freedreno/ir3/cmdline: remove hack for old compiler Since we dropped the old compiler, we don't need this hack anymore. Signed-off-by: Rob Clark URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f370e95421f553ace931a02743c96be80fd62dc8 Author: Rob Clark Date: Sun Mar 29 11:24:57 2015 -0400 freedreno/ir3: handle const/immed/abs/neg in cp Be smarter about propagating copies from const or immed, or with abs/neg modifiers. Also, realize that absneg.s and absneg.f are really "fancy" mov instructions. This opens up the possibility to remove more copies. It helps the TGSI frontend a bit, but will be really needed for the NIR f/e which builds everything up in SSA form (ie. will *always* insert a mov from const or immediate). Signed-off-by: Rob Clark URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=104713d9f2dced94a427004a25c54b2c7feee166 Author: Rob Clark Date: Sun Mar 29 11:18:29 2015 -0400 freedreno/ir3: split float/int abs/neg Even though in the end, they map to the same bits, the backend will need to be able to differentiate float abs/neg vs integer abs/neg. Rather than making the backend figure it out based on instruction opcode (which when combined with mov/absneg instructions, can be awkward), just split out different flags for each so the frontend can signal it's intentions more clearly. Also, since (neg) for bitwise op's is actually a bitwise- not, split it out into bnot flag. Signed-off-by: Rob Clark URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=203f37540a698a812f0a66e2f3f1fff954af22ab Author: Rob Clark Date: Wed Mar 18 19:26:13 2015 -0400 freedreno/ir3: add ir3 builder helpers Add helpers for constructing SSA forms of instructions. Only partial cat5/cat6 coverage.. but we can add stuff as needed. Signed-off-by: Rob Clark URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b1c9fb9fcaf34923f69d671fdba0a35ea581b3a0 Author: Rob Clark Date: Mon Mar 30 14:14:31 2015 -0400 freedreno/ir3: fix sam argument order comment Signed-off-by: Rob Clark URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=101142c401038684ca971015d7576029b9dc5e63 Author: Rob Clark Date: Fri Apr 3 14:01:47 2015 -0400 xa: support for drivers which use NIR We need to pull in libnir.la and it's dependency libglsl_util.la. Also, _mesa_error_no_memory() must be defined. Fortunately with libnir.la (vs pulling in all of libglsl.la) we don't also need libstdc++. Signed-off-by: Rob Clark URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1c857727a12a45392b609949046b4abb25e3d7da Author: Rob Clark Date: Fri Apr 3 13:57:46 2015 -0400 build: add libnir.la If we want to use NIR from state trackers that don't already pull in the whole of glsl (ie. anything other than mesa state tracker), we need a separate more minimal libnir. Possibly NIR should be better split out from glsl, but for now, generate a second smaller libnir.la for those who just want NIR but not all of glsl. Signed-off-by: Rob Clark URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=52282fa42d34051b042d20b52e84f32274691e36 Author: Rob Clark Date: Sat Apr 4 18:15:24 2015 -0400 gallium/ttn: MOD is an integer instruction Signed-off-by: Rob Clark Reviewed-by: Eric Anholt Date: Sat Apr 4 18:15:00 2015 -0400 gallium/ttn: add UMAD Signed-off-by: Rob Clark Reviewed-by: Eric Anholt URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f2ecc95e44c5798a15cde4aa67887bc562756ccd Author: Rob Clark Date: Tue Mar 31 17:03:39 2015 -0400 nir: add lowering for idiv/udiv/umod Based on the algo from NV50LegalizeSSA::handleDIV() and handleMOD(). See also trans_idiv() in freedreno/ir3/ir3_compiler.c (which was an adaptation of the nv50 code from Ilia Mirkin). A python/numpy script which implements the same algorithm (and is possibly useful for debugging or analysis) can be found here: http://people.freedesktop.org/~robclark/div-lowering.py I've tested this on i965 hacked up to insert the idiv lowering pass, and on freedreno with NIR frontend. Signed-off-by: Rob Clark Tested-by: Eric Anholt (vc4) URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7880bea2fb587871faead78f624920ad5e8b89f7 Author: Rob Clark Date: Tue Mar 31 15:13:00 2015 -0400 nir: fix typo for f2b/i2b/b2i expressions (v2) v2: discovered that i2b/b2i are also confused Signed-off-by: Rob Clark Reviewed-by: Matt Turner Reviewed-by: Connor Abbott URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6829d76e0285f9ed8091990b908d2e7dc37c5218 Author: Rob Clark Date: Tue Mar 31 11:25:19 2015 -0400 nir: add option to lower slt/sge/seq/sne In freedreno these get implemented as the matching f* instruction plus a u2f to convert the result to float 1.0/0.0. But less lines of code to just let nir_opt_algebraic handle this for us, plus opens up some small window for other opt passes to improve (ie. if some shader ended up with both a flt and slt with same src args, for example). v2: use b2f rather than u2f Signed-off-by: Rob Clark Reviewed-by: Kenneth Graunke Reviewed-by: Matt Turner From itoral at kemper.freedesktop.org Mon Apr 6 06:11:50 2015 From: itoral at kemper.freedesktop.org (Iago Toral Quiroga) Date: Sun, 5 Apr 2015 23:11:50 -0700 (PDT) Subject: Mesa (master): i965: Make sure we always mark array surfaces as such Message-ID: <20150406061150.BFAE07624F@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 47597f8f5cfe950093110d2b777923ef85c5d24f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=47597f8f5cfe950093110d2b777923ef85c5d24f Author: Iago Toral Quiroga Date: Tue Feb 3 12:34:29 2015 +0100 i965: Make sure we always mark array surfaces as such Even if they only have one slice, otherwise textureSize() won't produce correct results for the depth value. Fixes 10 dEQP tests in this category: dEQP-GLES3.functional.shaders.texture_functions.texturesize.sampler2darray* Reviewed-by: Mark Janes --- src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 4 +++- src/mesa/drivers/dri/i965/gen8_surface_state.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c index 7d78515..d9361d3 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c @@ -24,6 +24,7 @@ #include "main/blend.h" #include "main/samplerobj.h" #include "main/texformat.h" +#include "main/teximage.h" #include "program/prog_parameter.h" #include "intel_mipmap_tree.h" @@ -301,7 +302,8 @@ gen7_update_texture_surface(struct gl_context *ctx, if (mt->align_w == 8) surf[0] |= GEN7_SURFACE_HALIGN_8; - if (mt->logical_depth0 > 1 && tObj->Target != GL_TEXTURE_3D) + if (_mesa_is_array_texture(tObj->Target) || + tObj->Target == GL_TEXTURE_CUBE_MAP) surf[0] |= GEN7_SURFACE_IS_ARRAY; /* if this is a view with restricted NumLayers, then diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c index 7f82f53..0007c95 100644 --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c @@ -25,6 +25,7 @@ #include "main/mtypes.h" #include "main/samplerobj.h" #include "main/texformat.h" +#include "main/teximage.h" #include "program/prog_parameter.h" #include "intel_mipmap_tree.h" @@ -208,7 +209,8 @@ gen8_update_texture_surface(struct gl_context *ctx, surf[0] |= BRW_SURFACE_CUBEFACE_ENABLES; } - if (mt->logical_depth0 > 1 && tObj->Target != GL_TEXTURE_3D) + if (_mesa_is_array_texture(tObj->Target) || + tObj->Target == GL_TEXTURE_CUBE_MAP) surf[0] |= GEN8_SURFACE_IS_ARRAY; surf[1] = SET_FIELD(mocs_wb, GEN8_SURFACE_MOCS) | mt->qpitch >> 2; From itoral at kemper.freedesktop.org Mon Apr 6 06:55:45 2015 From: itoral at kemper.freedesktop.org (Iago Toral Quiroga) Date: Sun, 5 Apr 2015 23:55:45 -0700 (PDT) Subject: Mesa (master): glsl: respect the source number set by #line Message-ID: <20150406065545.7D6D37624F@kemper.freedesktop.org> Module: Mesa Branch: master Commit: c0a7014601ca93d30ba9bf1ec3c6d04da904d503 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c0a7014601ca93d30ba9bf1ec3c6d04da904d503 Author: Antia Puentes Date: Tue Mar 17 19:28:25 2015 +0100 glsl: respect the source number set by #line >From GLSL 1.30.10, section 3.3 (Preprocessor): "#line line source-string-number ... After processing this directive (including its new-line), the implementation will behave as if it is compiling at ... source string number source-string-number. Subsequent source strings will be numbered sequentially, until another #line directive overrides that numbering." In the previous implementation the source number was always zero. Subsequent source strings are still not numbered sequentially, because in the glShaderSource implementation we are concatenating the source code strings into one long string. Partially fixes https://bugs.freedesktop.org/show_bug.cgi?id=88815 Reviewed-by: Kenneth Graunke --- src/glsl/glsl_lexer.ll | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/glsl/glsl_lexer.ll b/src/glsl/glsl_lexer.ll index 8dc3d10..f0e047e 100644 --- a/src/glsl/glsl_lexer.ll +++ b/src/glsl/glsl_lexer.ll @@ -36,14 +36,13 @@ static int classify_identifier(struct _mesa_glsl_parse_state *, const char *); #define YY_USER_ACTION \ do { \ - yylloc->source = 0; \ yylloc->first_column = yycolumn + 1; \ yylloc->first_line = yylloc->last_line = yylineno + 1; \ yycolumn += yyleng; \ yylloc->last_column = yycolumn + 1; \ } while(0); -#define YY_USER_INIT yylineno = 0; yycolumn = 0; +#define YY_USER_INIT yylineno = 0; yycolumn = 0; yylloc->source = 0; /* A macro for handling reserved words and keywords across language versions. * From itoral at kemper.freedesktop.org Mon Apr 6 06:55:45 2015 From: itoral at kemper.freedesktop.org (Iago Toral Quiroga) Date: Sun, 5 Apr 2015 23:55:45 -0700 (PDT) Subject: Mesa (master): glsl: Update the #line behaviour on GLSL 3.30+ and GLSL ES+ Message-ID: <20150406065545.89D387635A@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 490621f0f2814ff1713c74f030acaff3f19cf8fb URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=490621f0f2814ff1713c74f030acaff3f19cf8fb Author: Antia Puentes Date: Tue Mar 17 19:12:23 2015 +0100 glsl: Update the #line behaviour on GLSL 3.30+ and GLSL ES+ >From GLSL 3.30 and GLSL ES 1.00 on, after processing the line directive (including its new-line), the implementation should behave as if it is compiling at the line number passed as argument. In previous versions, it behaved as if compiling at the passed line number + 1. Partially fixes https://bugs.freedesktop.org/show_bug.cgi?id=88815 Reviewed-by: Kenneth Graunke --- src/glsl/glsl_lexer.ll | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/glsl/glsl_lexer.ll b/src/glsl/glsl_lexer.ll index f0e047e..2785ed1 100644 --- a/src/glsl/glsl_lexer.ll +++ b/src/glsl/glsl_lexer.ll @@ -187,6 +187,15 @@ HASH ^{SPC}#{SPC} * one-based. */ yylineno = strtol(ptr, &ptr, 0) - 1; + + /* From GLSL 3.30 and GLSL ES on, after processing the + * line directive (including its new-line), the implementation + * will behave as if it is compiling at the line number passed + * as argument. It was line number + 1 in older specifications. + */ + if (yyextra->is_version(330, 100)) + yylineno--; + yylloc->source = strtol(ptr, NULL, 0); } {HASH}line{SPCP}{INT}{SPC}$ { @@ -202,6 +211,14 @@ HASH ^{SPC}#{SPC} * one-based. */ yylineno = strtol(ptr, &ptr, 0) - 1; + + /* From GLSL 3.30 and GLSL ES on, after processing the + * line directive (including its new-line), the implementation + * will behave as if it is compiling at the line number passed + * as argument. It was line number + 1 in older specifications. + */ + if (yyextra->is_version(330, 100)) + yylineno--; } ^{SPC}#{SPC}pragma{SPCP}debug{SPC}\({SPC}on{SPC}\) { BEGIN PP; From mattst88 at kemper.freedesktop.org Mon Apr 6 17:20:40 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Mon, 6 Apr 2015 10:20:40 -0700 (PDT) Subject: Mesa (master): glsl: Remove never used sin_reduced/cos_reduced. Message-ID: <20150406172040.250C676250@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 5c71cf85315320fcb8b1cc2178f7f2d76b3beeef URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5c71cf85315320fcb8b1cc2178f7f2d76b3beeef Author: Matt Turner Date: Wed Apr 1 17:44:16 2015 -0700 glsl: Remove never used sin_reduced/cos_reduced. These were added in commit f2616e56, presumably in preparation for translating ARB vp/fp into GLSL IR. That never happened, and neither did a lowering pass that actually generated these instructions. Reviewed-by: Jason Ekstrand --- src/glsl/ir.cpp | 4 ---- src/glsl/ir.h | 2 -- src/glsl/ir_constant_expression.cpp | 2 -- src/glsl/ir_validate.cpp | 2 -- src/glsl/nir/glsl_to_nir.cpp | 6 ------ src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp | 2 -- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 2 -- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 2 -- src/mesa/program/ir_to_mesa.cpp | 6 ------ src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 6 ------ 10 files changed, 34 deletions(-) diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp index 4b8ca9b..9e32385 100644 --- a/src/glsl/ir.cpp +++ b/src/glsl/ir.cpp @@ -240,8 +240,6 @@ ir_expression::ir_expression(int op, ir_rvalue *op0) case ir_unop_round_even: case ir_unop_sin: case ir_unop_cos: - case ir_unop_sin_reduced: - case ir_unop_cos_reduced: case ir_unop_dFdx: case ir_unop_dFdx_coarse: case ir_unop_dFdx_fine: @@ -542,8 +540,6 @@ static const char *const operator_strs[] = { "round_even", "sin", "cos", - "sin_reduced", - "cos_reduced", "dFdx", "dFdxCoarse", "dFdxFine", diff --git a/src/glsl/ir.h b/src/glsl/ir.h index 09275b3..fab1cd2 100644 --- a/src/glsl/ir.h +++ b/src/glsl/ir.h @@ -1314,8 +1314,6 @@ enum ir_expression_operation { /*@{*/ ir_unop_sin, ir_unop_cos, - ir_unop_sin_reduced, /**< Reduced range sin. [-pi, pi] */ - ir_unop_cos_reduced, /**< Reduced range cos. [-pi, pi] */ /*@}*/ /** diff --git a/src/glsl/ir_constant_expression.cpp b/src/glsl/ir_constant_expression.cpp index ecebc3c..171b8e9 100644 --- a/src/glsl/ir_constant_expression.cpp +++ b/src/glsl/ir_constant_expression.cpp @@ -781,7 +781,6 @@ ir_expression::constant_expression_value(struct hash_table *variable_context) break; case ir_unop_sin: - case ir_unop_sin_reduced: assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); for (unsigned c = 0; c < op[0]->type->components(); c++) { data.f[c] = sinf(op[0]->value.f[c]); @@ -789,7 +788,6 @@ ir_expression::constant_expression_value(struct hash_table *variable_context) break; case ir_unop_cos: - case ir_unop_cos_reduced: assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); for (unsigned c = 0; c < op[0]->type->components(); c++) { data.f[c] = cosf(op[0]->value.f[c]); diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp index 72c5f06..cfe0df3 100644 --- a/src/glsl/ir_validate.cpp +++ b/src/glsl/ir_validate.cpp @@ -334,8 +334,6 @@ ir_validate::visit_leave(ir_expression *ir) break; case ir_unop_sin: case ir_unop_cos: - case ir_unop_sin_reduced: - case ir_unop_cos_reduced: case ir_unop_dFdx: case ir_unop_dFdx_coarse: case ir_unop_dFdx_fine: diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp index f48a34b..80c5b3a 100644 --- a/src/glsl/nir/glsl_to_nir.cpp +++ b/src/glsl/nir/glsl_to_nir.cpp @@ -1087,12 +1087,6 @@ nir_visitor::visit(ir_expression *ir) case ir_unop_round_even: emit(nir_op_fround_even, dest_size, srcs); break; case ir_unop_sin: emit(nir_op_fsin, dest_size, srcs); break; case ir_unop_cos: emit(nir_op_fcos, dest_size, srcs); break; - case ir_unop_sin_reduced: - emit(nir_op_fsin_reduced, dest_size, srcs); - break; - case ir_unop_cos_reduced: - emit(nir_op_fcos_reduced, dest_size, srcs); - break; case ir_unop_dFdx: emit(nir_op_fddx, dest_size, srcs); break; case ir_unop_dFdy: emit(nir_op_fddy, dest_size, srcs); break; case ir_unop_dFdx_fine: emit(nir_op_fddx_fine, dest_size, srcs); break; diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp index 4049b09..d0f6122 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp @@ -232,8 +232,6 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) case ir_unop_round_even: case ir_unop_sin: case ir_unop_cos: - case ir_unop_sin_reduced: - case ir_unop_cos_reduced: case ir_unop_dFdx: case ir_unop_dFdx_coarse: case ir_unop_dFdx_fine: diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index e6fb0cb..305972d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -817,11 +817,9 @@ fs_visitor::visit(ir_expression *ir) case ir_unop_log: unreachable("not reached: should be handled by ir_explog_to_explog2"); case ir_unop_sin: - case ir_unop_sin_reduced: emit_math(SHADER_OPCODE_SIN, this->result, op[0]); break; case ir_unop_cos: - case ir_unop_cos_reduced: emit_math(SHADER_OPCODE_COS, this->result, op[0]); break; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index ca1a995..ffbe04d 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1411,11 +1411,9 @@ vec4_visitor::visit(ir_expression *ir) case ir_unop_log: unreachable("not reached: should be handled by ir_explog_to_explog2"); case ir_unop_sin: - case ir_unop_sin_reduced: emit_math(SHADER_OPCODE_SIN, result_dst, op[0]); break; case ir_unop_cos: - case ir_unop_cos_reduced: emit_math(SHADER_OPCODE_COS, result_dst, op[0]); break; diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 39790ec..3a78dc4 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -1122,12 +1122,6 @@ ir_to_mesa_visitor::visit(ir_expression *ir) case ir_unop_cos: emit_scalar(ir, OPCODE_COS, result_dst, op[0]); break; - case ir_unop_sin_reduced: - emit_scs(ir, OPCODE_SIN, result_dst, op[0]); - break; - case ir_unop_cos_reduced: - emit_scs(ir, OPCODE_COS, result_dst, op[0]); - break; case ir_unop_dFdx: emit(ir, OPCODE_DDX, result_dst, op[0]); diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index e97ab83..96c1f35 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1611,12 +1611,6 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) case ir_unop_cos: emit_scalar(ir, TGSI_OPCODE_COS, result_dst, op[0]); break; - case ir_unop_sin_reduced: - emit_scs(ir, TGSI_OPCODE_SIN, result_dst, op[0]); - break; - case ir_unop_cos_reduced: - emit_scs(ir, TGSI_OPCODE_COS, result_dst, op[0]); - break; case ir_unop_saturate: { glsl_to_tgsi_instruction *inst; inst = emit(ir, TGSI_OPCODE_MOV, result_dst, op[0]); From mattst88 at kemper.freedesktop.org Mon Apr 6 17:20:40 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Mon, 6 Apr 2015 10:20:40 -0700 (PDT) Subject: Mesa (master): program: Remove unused emit_scs(). Message-ID: <20150406172040.3AE2A76250@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 5fb735b75675bc4d6287f2a87c9baac65da5f405 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5fb735b75675bc4d6287f2a87c9baac65da5f405 Author: Matt Turner Date: Wed Apr 1 18:22:52 2015 -0700 program: Remove unused emit_scs(). Was only used by the sin_reduced/cos_reduced cases, which themselves were impossible to reach. Reviewed-by: Jason Ekstrand --- src/mesa/program/ir_to_mesa.cpp | 98 --------------------------------------- 1 file changed, 98 deletions(-) diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 3a78dc4..3dcb537 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -303,9 +303,6 @@ public: void emit_scalar(ir_instruction *ir, enum prog_opcode op, dst_reg dst, src_reg src0, src_reg src1); - void emit_scs(ir_instruction *ir, enum prog_opcode op, - dst_reg dst, const src_reg &src); - bool try_emit_mad(ir_expression *ir, int mul_operand); bool try_emit_mad_for_and_not(ir_expression *ir, @@ -479,101 +476,6 @@ ir_to_mesa_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op, emit_scalar(ir, op, dst, src0, undef); } -/** - * Emit an OPCODE_SCS instruction - * - * The \c SCS opcode functions a bit differently than the other Mesa (or - * ARB_fragment_program) opcodes. Instead of splatting its result across all - * four components of the destination, it writes one value to the \c x - * component and another value to the \c y component. - * - * \param ir IR instruction being processed - * \param op Either \c OPCODE_SIN or \c OPCODE_COS depending on which - * value is desired. - * \param dst Destination register - * \param src Source register - */ -void -ir_to_mesa_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op, - dst_reg dst, - const src_reg &src) -{ - /* Vertex programs cannot use the SCS opcode. - */ - if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) { - emit_scalar(ir, op, dst, src); - return; - } - - const unsigned component = (op == OPCODE_SIN) ? 0 : 1; - const unsigned scs_mask = (1U << component); - int done_mask = ~dst.writemask; - src_reg tmp; - - assert(op == OPCODE_SIN || op == OPCODE_COS); - - /* If there are compnents in the destination that differ from the component - * that will be written by the SCS instrution, we'll need a temporary. - */ - if (scs_mask != unsigned(dst.writemask)) { - tmp = get_temp(glsl_type::vec4_type); - } - - for (unsigned i = 0; i < 4; i++) { - unsigned this_mask = (1U << i); - src_reg src0 = src; - - if ((done_mask & this_mask) != 0) - continue; - - /* The source swizzle specified which component of the source generates - * sine / cosine for the current component in the destination. The SCS - * instruction requires that this value be swizzle to the X component. - * Replace the current swizzle with a swizzle that puts the source in - * the X component. - */ - unsigned src0_swiz = GET_SWZ(src.swizzle, i); - - src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, - src0_swiz, src0_swiz); - for (unsigned j = i + 1; j < 4; j++) { - /* If there is another enabled component in the destination that is - * derived from the same inputs, generate its value on this pass as - * well. - */ - if (!(done_mask & (1 << j)) && - GET_SWZ(src0.swizzle, j) == src0_swiz) { - this_mask |= (1 << j); - } - } - - if (this_mask != scs_mask) { - ir_to_mesa_instruction *inst; - dst_reg tmp_dst = dst_reg(tmp); - - /* Emit the SCS instruction. - */ - inst = emit(ir, OPCODE_SCS, tmp_dst, src0); - inst->dst.writemask = scs_mask; - - /* Move the result of the SCS instruction to the desired location in - * the destination. - */ - tmp.swizzle = MAKE_SWIZZLE4(component, component, - component, component); - inst = emit(ir, OPCODE_SCS, dst, tmp); - inst->dst.writemask = this_mask; - } else { - /* Emit the SCS instruction to write directly to the destination. - */ - ir_to_mesa_instruction *inst = emit(ir, OPCODE_SCS, dst, src0); - inst->dst.writemask = scs_mask; - } - - done_mask |= this_mask; - } -} - src_reg ir_to_mesa_visitor::src_reg_for_float(float val) { From mattst88 at kemper.freedesktop.org Mon Apr 6 17:20:40 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Mon, 6 Apr 2015 10:20:40 -0700 (PDT) Subject: Mesa (master): i965/vec4: Remove emit_scs() prototype. Message-ID: <20150406172040.2EF367635A@kemper.freedesktop.org> Module: Mesa Branch: master Commit: cdb1eb9a3fa096b0eeef239a602cd1c42cf27498 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=cdb1eb9a3fa096b0eeef239a602cd1c42cf27498 Author: Matt Turner Date: Wed Apr 1 18:21:41 2015 -0700 i965/vec4: Remove emit_scs() prototype. This has never existed. Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_vec4.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 6ec00d5..700ca69 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -314,9 +314,6 @@ public: void emit_scalar(ir_instruction *ir, enum prog_opcode op, dst_reg dst, src_reg src0, src_reg src1); - void emit_scs(ir_instruction *ir, enum prog_opcode op, - dst_reg dst, const src_reg &src); - src_reg fix_3src_operand(src_reg src); void emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0, From mattst88 at kemper.freedesktop.org Mon Apr 6 17:20:40 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Mon, 6 Apr 2015 10:20:40 -0700 (PDT) Subject: Mesa (master): st/mesa: Remove unused emit_scs(). Message-ID: <20150406172040.4893876250@kemper.freedesktop.org> Module: Mesa Branch: master Commit: c8d65dd7138ba5683be16e692882e41d6dae7bf8 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c8d65dd7138ba5683be16e692882e41d6dae7bf8 Author: Matt Turner Date: Wed Apr 1 18:23:58 2015 -0700 st/mesa: Remove unused emit_scs(). Was only used by the sin_reduced/cos_reduced cases, which themselves were impossible to reach. Reviewed-by: Jason Ekstrand --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 98 ---------------------------- 1 file changed, 98 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 96c1f35..67a4da7 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -441,9 +441,6 @@ public: void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0); - void emit_scs(ir_instruction *ir, unsigned op, - st_dst_reg dst, const st_src_reg &src); - bool try_emit_mad(ir_expression *ir, int mul_operand); bool try_emit_mad_for_and_not(ir_expression *ir, @@ -966,101 +963,6 @@ glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir, emit(NULL, op, dst, src0); } -/** - * Emit an TGSI_OPCODE_SCS instruction - * - * The \c SCS opcode functions a bit differently than the other TGSI opcodes. - * Instead of splatting its result across all four components of the - * destination, it writes one value to the \c x component and another value to - * the \c y component. - * - * \param ir IR instruction being processed - * \param op Either \c TGSI_OPCODE_SIN or \c TGSI_OPCODE_COS depending - * on which value is desired. - * \param dst Destination register - * \param src Source register - */ -void -glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op, - st_dst_reg dst, - const st_src_reg &src) -{ - /* Vertex programs cannot use the SCS opcode. - */ - if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) { - emit_scalar(ir, op, dst, src); - return; - } - - const unsigned component = (op == TGSI_OPCODE_SIN) ? 0 : 1; - const unsigned scs_mask = (1U << component); - int done_mask = ~dst.writemask; - st_src_reg tmp; - - assert(op == TGSI_OPCODE_SIN || op == TGSI_OPCODE_COS); - - /* If there are compnents in the destination that differ from the component - * that will be written by the SCS instrution, we'll need a temporary. - */ - if (scs_mask != unsigned(dst.writemask)) { - tmp = get_temp(glsl_type::vec4_type); - } - - for (unsigned i = 0; i < 4; i++) { - unsigned this_mask = (1U << i); - st_src_reg src0 = src; - - if ((done_mask & this_mask) != 0) - continue; - - /* The source swizzle specified which component of the source generates - * sine / cosine for the current component in the destination. The SCS - * instruction requires that this value be swizzle to the X component. - * Replace the current swizzle with a swizzle that puts the source in - * the X component. - */ - unsigned src0_swiz = GET_SWZ(src.swizzle, i); - - src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, - src0_swiz, src0_swiz); - for (unsigned j = i + 1; j < 4; j++) { - /* If there is another enabled component in the destination that is - * derived from the same inputs, generate its value on this pass as - * well. - */ - if (!(done_mask & (1 << j)) && - GET_SWZ(src0.swizzle, j) == src0_swiz) { - this_mask |= (1 << j); - } - } - - if (this_mask != scs_mask) { - glsl_to_tgsi_instruction *inst; - st_dst_reg tmp_dst = st_dst_reg(tmp); - - /* Emit the SCS instruction. - */ - inst = emit(ir, TGSI_OPCODE_SCS, tmp_dst, src0); - inst->dst[0].writemask = scs_mask; - - /* Move the result of the SCS instruction to the desired location in - * the destination. - */ - tmp.swizzle = MAKE_SWIZZLE4(component, component, - component, component); - inst = emit(ir, TGSI_OPCODE_SCS, dst, tmp); - inst->dst[0].writemask = this_mask; - } else { - /* Emit the SCS instruction to write directly to the destination. - */ - glsl_to_tgsi_instruction *inst = emit(ir, TGSI_OPCODE_SCS, dst, src0); - inst->dst[0].writemask = scs_mask; - } - - done_mask |= this_mask; - } -} - int glsl_to_tgsi_visitor::add_constant(gl_register_file file, gl_constant_value values[8], int size, int datatype, From mattst88 at kemper.freedesktop.org Mon Apr 6 17:20:40 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Mon, 6 Apr 2015 10:20:40 -0700 (PDT) Subject: Mesa (master): nir: Remove fsin_reduced/fcos_reduced. Message-ID: <20150406172040.551597635A@kemper.freedesktop.org> Module: Mesa Branch: master Commit: d131630c0825f199768965c504b6fa1e593d03d5 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d131630c0825f199768965c504b6fa1e593d03d5 Author: Matt Turner Date: Wed Apr 1 18:25:02 2015 -0700 nir: Remove fsin_reduced/fcos_reduced. Reviewed-by: Jason Ekstrand --- src/glsl/nir/nir_opcodes.py | 2 -- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 2 -- 2 files changed, 4 deletions(-) diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py index f96424f..264806f 100644 --- a/src/glsl/nir/nir_opcodes.py +++ b/src/glsl/nir/nir_opcodes.py @@ -191,8 +191,6 @@ unop("fround_even", tfloat, "_mesa_roundevenf(src0)") unop("fsin", tfloat, "sinf(src0)") unop("fcos", tfloat, "cosf(src0)") -unop("fsin_reduced", tfloat, "sinf(src0)") -unop("fcos_reduced", tfloat, "cosf(src0)") # Partial derivatives. diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index e9692aa..a874337 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -895,13 +895,11 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) unreachable("not reached: should be handled by ir_explog_to_explog2"); case nir_op_fsin: - case nir_op_fsin_reduced: inst = emit_math(SHADER_OPCODE_SIN, result, op[0]); inst->saturate = instr->dest.saturate; break; case nir_op_fcos: - case nir_op_fcos_reduced: inst = emit_math(SHADER_OPCODE_COS, result, op[0]); inst->saturate = instr->dest.saturate; break; From idr at kemper.freedesktop.org Mon Apr 6 20:14:43 2015 From: idr at kemper.freedesktop.org (Ian Romanick) Date: Mon, 6 Apr 2015 13:14:43 -0700 (PDT) Subject: Mesa (master): glsl/cse: Maintain a list of free ae_entry objects Message-ID: <20150406201443.3D0E976250@kemper.freedesktop.org> Module: Mesa Branch: master Commit: dd7d0687848fb6c310debef4a6ff61b2159d2a4d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=dd7d0687848fb6c310debef4a6ff61b2159d2a4d Author: Ian Romanick Date: Wed Mar 25 17:23:40 2015 -0700 glsl/cse: Maintain a list of free ae_entry objects The CSE algorithm will continuously allocate new ae_entry objects. As each new basic block is exited, all of the previously allocated objects are dumped. Instead, put them in a free list and re-use them in the next basic block. Reduce, reuse, recycle! Signed-off-by: Ian Romanick Reviewed-by: Jordan Justen Reviewed-by: Thomas Helland --- src/glsl/opt_cse.cpp | 63 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 55 insertions(+), 8 deletions(-) diff --git a/src/glsl/opt_cse.cpp b/src/glsl/opt_cse.cpp index b0b67f4..4b8e9a0 100644 --- a/src/glsl/opt_cse.cpp +++ b/src/glsl/opt_cse.cpp @@ -63,6 +63,17 @@ public: var = NULL; } + void init(ir_instruction *base_ir, ir_rvalue **val) + { + this->val = val; + this->base_ir = base_ir; + this->var = NULL; + + assert(val); + assert(*val); + assert(base_ir); + } + /** * The pointer to the expression that we might be able to reuse * @@ -116,6 +127,18 @@ private: ir_rvalue *try_cse(ir_rvalue *rvalue); void add_to_ae(ir_rvalue **rvalue); + /** + * Move all nodes from the ae list to the free list + */ + void empty_ae_list(); + + /** + * Get and initialize a new ae_entry + * + * This will either come from the free list or be freshly allocated. + */ + ae_entry *get_ae_entry(ir_rvalue **rvalue); + /** List of ae_entry: The available expressions to reuse */ exec_list *ae; @@ -126,6 +149,11 @@ private: * right. */ exec_list *validate_instructions; + + /** + * List of available-for-use ae_entry objects. + */ + exec_list free_ae_entries; }; /** @@ -322,6 +350,25 @@ cse_visitor::try_cse(ir_rvalue *rvalue) return NULL; } +void +cse_visitor::empty_ae_list() +{ + free_ae_entries.append_list(ae); +} + +ae_entry * +cse_visitor::get_ae_entry(ir_rvalue **rvalue) +{ + ae_entry *entry = (ae_entry *) free_ae_entries.pop_head(); + if (entry) { + entry->init(base_ir, rvalue); + } else { + entry = new(mem_ctx) ae_entry(base_ir, rvalue); + } + + return entry; +} + /** Add the rvalue to the list of available expressions for CSE. */ void cse_visitor::add_to_ae(ir_rvalue **rvalue) @@ -332,7 +379,7 @@ cse_visitor::add_to_ae(ir_rvalue **rvalue) printf("\n"); } - ae->push_tail(new(mem_ctx) ae_entry(base_ir, rvalue)); + ae->push_tail(get_ae_entry(rvalue)); if (debug) dump_ae(ae); @@ -370,33 +417,33 @@ cse_visitor::visit_enter(ir_if *ir) { handle_rvalue(&ir->condition); - ae->make_empty(); + empty_ae_list(); visit_list_elements(this, &ir->then_instructions); - ae->make_empty(); + empty_ae_list(); visit_list_elements(this, &ir->else_instructions); - ae->make_empty(); + empty_ae_list(); return visit_continue_with_parent; } ir_visitor_status cse_visitor::visit_enter(ir_function_signature *ir) { - ae->make_empty(); + empty_ae_list(); visit_list_elements(this, &ir->body); - ae->make_empty(); + empty_ae_list(); return visit_continue_with_parent; } ir_visitor_status cse_visitor::visit_enter(ir_loop *ir) { - ae->make_empty(); + empty_ae_list(); visit_list_elements(this, &ir->body_instructions); - ae->make_empty(); + empty_ae_list(); return visit_continue_with_parent; } From kwg at kemper.freedesktop.org Mon Apr 6 21:35:48 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Mon, 6 Apr 2015 14:35:48 -0700 (PDT) Subject: Mesa (master): i965/fp: Set coord_components correctly for cube textures. Message-ID: <20150406213548.8305476250@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 62050886c85b7b3b6c94e3c2363966b4f14df0bd URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=62050886c85b7b3b6c94e3c2363966b4f14df0bd Author: Kenneth Graunke Date: Fri Apr 3 18:30:57 2015 -0700 i965/fp: Set coord_components correctly for cube textures. I've no idea why this was 4. It certainly seems wrong. Prevents assertion failures in fp-incomplete-tex with some upcoming patches of mine. Signed-off-by: Kenneth Graunke Reviewed-by: Jordan Justen --- src/mesa/drivers/dri/i965/brw_fs_fp.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp index c4064da..21d677a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp @@ -448,7 +448,7 @@ fs_visitor::emit_fragment_program_code() break; case TEXTURE_CUBE_INDEX: { - coord_components = 4; + coord_components = 3; fs_reg temp = vgrf(glsl_type::float_type); fs_reg cubecoord = vgrf(glsl_type::vec3_type); From kwg at kemper.freedesktop.org Mon Apr 6 21:35:48 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Mon, 6 Apr 2015 14:35:48 -0700 (PDT) Subject: Mesa (master): i965: Use SIMD16 instead of SIMD8 on Gen4 when possible. Message-ID: <20150406213548.9DA7376250@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 8aee87fe4cce0a883867df3546db0e0a36908086 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8aee87fe4cce0a883867df3546db0e0a36908086 Author: Kenneth Graunke Date: Fri Feb 20 14:09:17 2015 -0800 i965: Use SIMD16 instead of SIMD8 on Gen4 when possible. Gen5+ systems allow you to specify multiple shader programs - both SIMD8 and SIMD16 - and the hardware will automatically dispatch to the most appropriate one, given the number of subspans to be processed. However, that is not the case on Gen4. Instead, you program a single shader. If you enable multiple dispatch modes (SIMD8 and SIMD16), the shader is supposed to contain a series of jump instructions at the beginning. The hardware will launch the shader at a small offset, hitting one of the jumps. We've always thought that sounds like a pain, and weren't clear how it affected performance - is it worth having multiple shader types? So, we never bothered with SIMD16 until now. This patch takes a simpler approach: try and compile a SIMD16 shader. If possible, set the no_8 flag, telling the hardware to just use the SIMD16 variant all the time. Signed-off-by: Kenneth Graunke Reviewed-by: Jordan Justen --- src/mesa/drivers/dri/i965/brw_fs.cpp | 5 ++--- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 2 ++ 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 1f8febc..5c475b2 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -4030,8 +4030,7 @@ brw_wm_fs_emit(struct brw_context *brw, cfg_t *simd16_cfg = NULL; fs_visitor v2(brw, mem_ctx, key, prog_data, prog, fp, 16); - if (brw->gen >= 5 && likely(!(INTEL_DEBUG & DEBUG_NO16) || - brw->use_rep_send)) { + if (likely(!(INTEL_DEBUG & DEBUG_NO16) || brw->use_rep_send)) { if (!v.simd16_unsupported) { /* Try a SIMD16 compile */ v2.import_uniforms(&v); @@ -4049,7 +4048,7 @@ brw_wm_fs_emit(struct brw_context *brw, cfg_t *simd8_cfg; int no_simd8 = (INTEL_DEBUG & DEBUG_NO8) || brw->no_simd8; - if (no_simd8 && simd16_cfg) { + if ((no_simd8 || brw->gen < 5) && simd16_cfg) { simd8_cfg = NULL; prog_data->no_8 = true; } else { diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 305972d..c538dcf 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1433,6 +1433,8 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst, bool simd16 = false; fs_reg orig_dst; + no16("SIMD16 texturing on Gen4 not supported yet."); + /* g0 header. */ mlen = 1; From kwg at kemper.freedesktop.org Mon Apr 6 21:35:48 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Mon, 6 Apr 2015 14:35:48 -0700 (PDT) Subject: Mesa (master): i965: Implement SIMD16 texturing on Gen4. Message-ID: <20150406213548.AD81076250@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 797d606127c131a6ccff28150495d2b1f3f7e46e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=797d606127c131a6ccff28150495d2b1f3f7e46e Author: Kenneth Graunke Date: Fri Feb 20 15:11:49 2015 -0800 i965: Implement SIMD16 texturing on Gen4. This allows SIMD16 mode to work for a lot more programs. Texturing is also more efficient in SIMD16 mode than SIMD8. Several messages don't actually exist in SIMD8 mode, so we did SIMD16 messages and threw away half of the data. Now we compute real data in both halves. Also, the SIMD16 "sample" message doesn't require all three coordinate components to exist (like the SIMD8 one), so we can shorten the message lengths, cutting register usage a bit. I chose to implement the visitor functionality in a separate function, since mixing true SIMD16 with SIMD8 code that uses SIMD16 fallbacks seemed like a mess. The new code bails on a few cases where we'd have to do two SIMD8 messages - we just fall back to SIMD8 for now. Improves performance in "Shadowrun: Dragonfall - Director's Cut" by about 20% on GM45 (measured with LIBGL_SHOW_FPS=1 while standing around in the first mission). v2: Add ir_txf to the has_lod case (caught by Jordan Justen). Signed-off-by: Kenneth Graunke Reviewed-by: Jordan Justen --- src/mesa/drivers/dri/i965/brw_fs.h | 4 ++ src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 28 +++++++--- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 68 +++++++++++++++++++++++- 3 files changed, 90 insertions(+), 10 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 278a8ee..cfdbf55 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -271,6 +271,10 @@ public: fs_reg shadow_comp, fs_reg lod, fs_reg lod2, int grad_components, uint32_t sampler); + fs_inst *emit_texture_gen4_simd16(ir_texture_opcode op, fs_reg dst, + fs_reg coordinate, int vector_elements, + fs_reg shadow_c, fs_reg lod, + uint32_t sampler); fs_inst *emit_texture_gen5(ir_texture_opcode op, fs_reg dst, fs_reg coordinate, int coord_components, fs_reg shadow_comp, diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 40e51aa..2743297 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -622,16 +622,26 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src /* Note that G45 and older determines shadow compare and dispatch width * from message length for most messages. */ - assert(dispatch_width == 8); - msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; - if (inst->shadow_compare) { - assert(inst->mlen == 6); - } else { - assert(inst->mlen <= 4); - } + if (dispatch_width == 8) { + msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; + if (inst->shadow_compare) { + assert(inst->mlen == 6); + } else { + assert(inst->mlen <= 4); + } + } else { + if (inst->shadow_compare) { + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE; + assert(inst->mlen == 9); + } else { + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE; + assert(inst->mlen <= 7 && inst->mlen % 2 == 1); + } + } break; case FS_OPCODE_TXB: if (inst->shadow_compare) { + assert(dispatch_width == 8); assert(inst->mlen == 6); msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE; } else { @@ -642,6 +652,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src break; case SHADER_OPCODE_TXL: if (inst->shadow_compare) { + assert(dispatch_width == 8); assert(inst->mlen == 6); msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE; } else { @@ -652,11 +663,12 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src break; case SHADER_OPCODE_TXD: /* There is no sample_d_c message; comparisons are done manually */ + assert(dispatch_width == 8); assert(inst->mlen == 7 || inst->mlen == 10); msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS; break; case SHADER_OPCODE_TXF: - assert(inst->mlen == 9); + assert(inst->mlen <= 9 && inst->mlen % 2 == 1); msg_type = BRW_SAMPLER_MESSAGE_SIMD16_LD; simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; break; diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index c538dcf..3622e65 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1433,8 +1433,6 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst, bool simd16 = false; fs_reg orig_dst; - no16("SIMD16 texturing on Gen4 not supported yet."); - /* g0 header. */ mlen = 1; @@ -1586,6 +1584,69 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst, return inst; } +fs_inst * +fs_visitor::emit_texture_gen4_simd16(ir_texture_opcode op, fs_reg dst, + fs_reg coordinate, int vector_elements, + fs_reg shadow_c, fs_reg lod, + uint32_t sampler) +{ + fs_reg message(MRF, 2, BRW_REGISTER_TYPE_F, dispatch_width); + bool has_lod = op == ir_txl || op == ir_txb || op == ir_txf; + + if (has_lod && shadow_c.file != BAD_FILE) + no16("TXB and TXL with shadow comparison unsupported in SIMD16."); + + if (op == ir_txd) + no16("textureGrad unsupported in SIMD16."); + + /* Copy the coordinates. */ + for (int i = 0; i < vector_elements; i++) { + emit(MOV(retype(offset(message, i), coordinate.type), coordinate)); + coordinate = offset(coordinate, 1); + } + + fs_reg msg_end = offset(message, vector_elements); + + /* Messages other than sample and ld require all three components */ + if (has_lod || shadow_c.file != BAD_FILE) { + for (int i = vector_elements; i < 3; i++) { + emit(MOV(offset(message, i), fs_reg(0.0f))); + } + } + + if (has_lod) { + fs_reg msg_lod = retype(offset(message, 3), op == ir_txf ? + BRW_REGISTER_TYPE_UD : BRW_REGISTER_TYPE_F); + emit(MOV(msg_lod, lod)); + msg_end = offset(msg_lod, 1); + } + + if (shadow_c.file != BAD_FILE) { + fs_reg msg_ref = offset(message, 3 + has_lod); + emit(MOV(msg_ref, shadow_c)); + msg_end = offset(msg_ref, 1); + } + + enum opcode opcode; + switch (op) { + case ir_tex: opcode = SHADER_OPCODE_TEX; break; + case ir_txb: opcode = FS_OPCODE_TXB; break; + case ir_txd: opcode = SHADER_OPCODE_TXD; break; + case ir_txl: opcode = SHADER_OPCODE_TXL; break; + case ir_txs: opcode = SHADER_OPCODE_TXS; break; + case ir_txf: opcode = SHADER_OPCODE_TXF; break; + default: unreachable("not reached"); + } + + fs_inst *inst = emit(opcode, dst, reg_undef, fs_reg(sampler)); + inst->base_mrf = message.reg - 1; + inst->mlen = msg_end.reg - inst->base_mrf; + inst->header_present = true; + inst->regs_written = 8; + + return inst; +} + /* gen5's sampler has slots for u, v, r, array index, then optional * parameters like shadow comparitor or LOD bias. If optional * parameters aren't present, those base slots are optional and don't @@ -2148,6 +2209,9 @@ fs_visitor::emit_texture(ir_texture_opcode op, shadow_c, lod, lod2, grad_components, sample_index, sampler, offset_value.file != BAD_FILE); + } else if (dispatch_width == 16) { + inst = emit_texture_gen4_simd16(op, dst, coordinate, coord_components, + shadow_c, lod, sampler); } else { inst = emit_texture_gen4(op, dst, coordinate, coord_components, shadow_c, lod, lod2, grad_components, From kwg at kemper.freedesktop.org Mon Apr 6 21:35:48 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Mon, 6 Apr 2015 14:35:48 -0700 (PDT) Subject: Mesa (master): i965: Free dead GLSL IR one last time. Message-ID: <20150406213548.B98F476250@kemper.freedesktop.org> Module: Mesa Branch: master Commit: a09c5b8527c2b28d30c0b11111a66fc7d283c06f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a09c5b8527c2b28d30c0b11111a66fc7d283c06f Author: Kenneth Graunke Date: Thu Apr 2 00:55:45 2015 -0700 i965: Free dead GLSL IR one last time. While working on NIR's memory allocation model, I realized the GLSL IR memory model was broken. During glCompileShader, we allocate everything out of the _mesa_glsl_parse_state context, and reparent it to gl_shader at the end. During glLinkProgram, we allocate everything out of a temporary context, then reparent it to the exec_list containing the linked IR. But during brw_link_shader - the driver's final opportunity to do lowering and optimization - we just allocated everything out of the permanent context given to us by the linker. That memory stayed forever. Notably, passes like brw_fs_channel_expressions cause us to churn the majority of the code, so we really want to free dead IR here. Saves 125MB of memory when replaying a Dota 2 trace on Broadwell. Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_shader.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 4ef3882..54d6d71 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -144,6 +144,11 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) _mesa_copy_linked_program_data((gl_shader_stage) stage, shProg, prog); + /* Temporary memory context for any new IR. */ + void *mem_ctx = ralloc_context(NULL); + + ralloc_adopt(mem_ctx, shader->base.ir); + bool progress; /* lower_packing_builtins() inserts arithmetic instructions, so it @@ -250,6 +255,13 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) _mesa_reference_program(ctx, &prog, NULL); + /* Now that we've finished altering the linked IR, reparent any live IR back + * to the permanent memory context, and free the temporary one (discarding any + * junk we optimized away). + */ + reparent_ir(shader->base.ir, shader->base.ir); + ralloc_free(mem_ctx); + if (ctx->_Shader->Flags & GLSL_DUMP) { fprintf(stderr, "\n"); fprintf(stderr, "GLSL IR for linked %s program %d:\n", From kwg at kemper.freedesktop.org Mon Apr 6 21:35:48 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Mon, 6 Apr 2015 14:35:48 -0700 (PDT) Subject: Mesa (master): i965: Fix instanced geometry shaders on Gen8+. Message-ID: <20150406213548.C4DB476250@kemper.freedesktop.org> Module: Mesa Branch: master Commit: f9e5dc0a85df8dbfb8213ff772dfeb218972db12 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f9e5dc0a85df8dbfb8213ff772dfeb218972db12 Author: Kenneth Graunke Date: Sat Apr 4 02:00:52 2015 -0700 i965: Fix instanced geometry shaders on Gen8+. Jordan added this in commit 741782b5948bb3d01d699f062a37513c2e73b076 for Gen7 platforms. I missed this when adding the Broadwell code. Fixes Piglit's spec/arb_gpu_shader5/invocation-id-{basic,in-separate-gs} with MESA_EXTENSION_OVERRIDE=GL_ARB_gpu_shader5 set. Signed-off-by: Kenneth Graunke Reviewed-by: Jordan Justen Reviewed-by: Chris Forbes Cc: mesa-stable at lists.freedesktop.org --- src/mesa/drivers/dri/i965/gen8_gs_state.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mesa/drivers/dri/i965/gen8_gs_state.c b/src/mesa/drivers/dri/i965/gen8_gs_state.c index 95cc123..46b9713 100644 --- a/src/mesa/drivers/dri/i965/gen8_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen8_gs_state.c @@ -82,6 +82,8 @@ gen8_upload_gs_state(struct brw_context *brw) uint32_t dw7 = (brw->gs.prog_data->control_data_header_size_hwords << GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT) | brw->gs.prog_data->dispatch_mode | + ((brw->gs.prog_data->invocations - 1) << + GEN7_GS_INSTANCE_CONTROL_SHIFT) | GEN6_GS_STATISTICS_ENABLE | (brw->gs.prog_data->include_primitive_id ? GEN7_GS_INCLUDE_PRIMITIVE_ID : 0) | From kwg at kemper.freedesktop.org Mon Apr 6 21:35:48 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Mon, 6 Apr 2015 14:35:48 -0700 (PDT) Subject: Mesa (master): i965: Add forgotten multi-stream code to Gen8 SOL state. Message-ID: <20150406213548.D004776250@kemper.freedesktop.org> Module: Mesa Branch: master Commit: f368d0fa1fe37a58780ee555d4a9ccf15474782b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f368d0fa1fe37a58780ee555d4a9ccf15474782b Author: Kenneth Graunke Date: Thu Mar 26 17:21:10 2015 -0700 i965: Add forgotten multi-stream code to Gen8 SOL state. Fixes Piglit's arb_gpu_shader5-xfb-streams-without-invocations. Signed-off-by: Kenneth Graunke Reviewed-by: Chris Forbes Cc: mesa-stable at lists.freedesktop.org --- src/mesa/drivers/dri/i965/gen8_sol_state.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/mesa/drivers/dri/i965/gen8_sol_state.c b/src/mesa/drivers/dri/i965/gen8_sol_state.c index 1f122ec..d98a226 100644 --- a/src/mesa/drivers/dri/i965/gen8_sol_state.c +++ b/src/mesa/drivers/dri/i965/gen8_sol_state.c @@ -128,6 +128,15 @@ gen8_upload_3dstate_streamout(struct brw_context *brw, bool active, dw2 |= urb_entry_read_offset << SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT; dw2 |= (urb_entry_read_length - 1) << SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT; + dw2 |= urb_entry_read_offset << SO_STREAM_1_VERTEX_READ_OFFSET_SHIFT; + dw2 |= (urb_entry_read_length - 1) << SO_STREAM_1_VERTEX_READ_LENGTH_SHIFT; + + dw2 |= urb_entry_read_offset << SO_STREAM_2_VERTEX_READ_OFFSET_SHIFT; + dw2 |= (urb_entry_read_length - 1) << SO_STREAM_2_VERTEX_READ_LENGTH_SHIFT; + + dw2 |= urb_entry_read_offset << SO_STREAM_3_VERTEX_READ_OFFSET_SHIFT; + dw2 |= (urb_entry_read_length - 1) << SO_STREAM_3_VERTEX_READ_LENGTH_SHIFT; + /* Set buffer pitches; 0 means unbound. */ if (xfb_obj->Buffers[0]) dw3 |= linked_xfb_info->BufferStride[0] * 4; From kwg at kemper.freedesktop.org Mon Apr 6 21:35:48 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Mon, 6 Apr 2015 14:35:48 -0700 (PDT) Subject: Mesa (master): i965: Respect the no_8 flag on Gen4-5. Message-ID: <20150406213548.8C2BB7635A@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 108b92b1e9f645e9d2ff33b24648f5d089cb89c9 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=108b92b1e9f645e9d2ff33b24648f5d089cb89c9 Author: Kenneth Graunke Date: Sat Jan 10 13:31:48 2015 -0800 i965: Respect the no_8 flag on Gen4-5. This flag means to ignore the SIMD8 program and only use the SIMD16 one. It was originally meant for repdata clear shaders, but I plan to use it for other things on Gen4 as well. Signed-off-by: Kenneth Graunke Reviewed-by: Jordan Justen --- src/mesa/drivers/dri/i965/brw_wm_state.c | 55 ++++++++++++++++++------------ 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index afb4ebf..28a4aa4 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -95,23 +95,40 @@ brw_upload_wm_unit(struct brw_context *brw) } /* BRW_NEW_PROGRAM_CACHE | BRW_NEW_FS_PROG_DATA */ - wm->thread0.grf_reg_count = prog_data->reg_blocks; - wm->wm9.grf_reg_count_2 = prog_data->reg_blocks_16; - - wm->thread0.kernel_start_pointer = - brw_program_reloc(brw, - brw->wm.base.state_offset + - offsetof(struct brw_wm_unit_state, thread0), - brw->wm.base.prog_offset + - (wm->thread0.grf_reg_count << 1)) >> 6; - - wm->wm9.kernel_start_pointer_2 = - brw_program_reloc(brw, - brw->wm.base.state_offset + - offsetof(struct brw_wm_unit_state, wm9), - brw->wm.base.prog_offset + - prog_data->prog_offset_16 + - (wm->wm9.grf_reg_count_2 << 1)) >> 6; + if (prog_data->no_8) { + wm->wm5.enable_16_pix = 1; + wm->thread0.grf_reg_count = prog_data->reg_blocks_16; + wm->thread0.kernel_start_pointer = + brw_program_reloc(brw, + brw->wm.base.state_offset + + offsetof(struct brw_wm_unit_state, thread0), + brw->wm.base.prog_offset + + prog_data->prog_offset_16 + + (prog_data->reg_blocks_16 << 1)) >> 6; + + } else { + wm->thread0.grf_reg_count = prog_data->reg_blocks; + wm->wm9.grf_reg_count_2 = prog_data->reg_blocks_16; + + wm->wm5.enable_8_pix = 1; + if (prog_data->prog_offset_16) + wm->wm5.enable_16_pix = 1; + + wm->thread0.kernel_start_pointer = + brw_program_reloc(brw, + brw->wm.base.state_offset + + offsetof(struct brw_wm_unit_state, thread0), + brw->wm.base.prog_offset + + (wm->thread0.grf_reg_count << 1)) >> 6; + + wm->wm9.kernel_start_pointer_2 = + brw_program_reloc(brw, + brw->wm.base.state_offset + + offsetof(struct brw_wm_unit_state, wm9), + brw->wm.base.prog_offset + + prog_data->prog_offset_16 + + (wm->wm9.grf_reg_count_2 << 1)) >> 6; + } wm->thread1.depth_coef_urb_read_offset = 1; if (prog_data->base.use_alt_mode) @@ -172,10 +189,6 @@ brw_upload_wm_unit(struct brw_context *brw) wm->wm5.program_uses_killpixel = prog_data->uses_kill || ctx->Color.AlphaEnabled; - wm->wm5.enable_8_pix = 1; - if (prog_data->prog_offset_16) - wm->wm5.enable_16_pix = 1; - wm->wm5.max_threads = brw->max_wm_threads - 1; /* _NEW_BUFFERS | _NEW_COLOR */ From kwg at kemper.freedesktop.org Mon Apr 6 23:06:14 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Mon, 6 Apr 2015 16:06:14 -0700 (PDT) Subject: Mesa (master): i965: Do not render primitives in non-zero streams then TF is disabled Message-ID: <20150406230614.19A6776250@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 2042a2f961a07e04eaca0347e42859c249325531 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=2042a2f961a07e04eaca0347e42859c249325531 Author: Iago Toral Quiroga Date: Mon Mar 9 15:17:03 2015 +0100 i965: Do not render primitives in non-zero streams then TF is disabled Haswell hardware seems to ignore Render Stream Select bits from 3DSTATE_STREAMOUT packet when the SOL stage is disabled even if the PRM says otherwise. Because of this, all primitives are sent down the pipeline for rasterization, which is wrong. If SOL is enabled, Render Stream Select is honored and primitives bound to non-zero streams are discarded after stream output. Since the only purpose of primives sent to non-zero streams is to be recorded by transform feedback, we can simply discard all geometry bound to non-zero streams then transform feedback is disabled to prevent it from ever reaching the rasterization stage. Notice that this patch introduces a small change in the behavior we get when a geometry shader emits more vertices than the maximum declared: before, a vertex that was emitted to a non-zero stream when TF was disabled would still count for the purposes of checking that we don't exceed the maximum number of output vertices declared by the shader. With this change, these vertices are completely ignored and won't increase the output vertex count, making more room for other (hopefully more useful) vertices. Fixes piglit test arb_gpu_shader5-emitstreamvertex_nodraw on Haswell and Broadwell. v2 (Ken): Drop is_haswell check in favor of doing this unconditionally. Broadwell needs the workaround as well, and it doesn't hurt to do it in general. Also tweak comments - the Haswell PRM does actually mention this ("Command Reference: Instructions" page 797). Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=83962 Reviewed-by: Kenneth Graunke Cc: mesa-stable at lists.freedesktop.org --- src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index 2002ffd..18a44be 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -476,6 +476,19 @@ vec4_gs_visitor::visit(ir_emit_vertex *ir) { this->current_annotation = "emit vertex: safety check"; + /* Haswell and later hardware ignores the "Render Stream Select" bits + * from the 3DSTATE_STREAMOUT packet when the SOL stage is disabled, + * and instead sends all primitives down the pipeline for rasterization. + * If the SOL stage is enabled, "Render Stream Select" is honored and + * primitives bound to non-zero streams are discarded after stream output. + * + * Since the only purpose of primives sent to non-zero streams is to + * be recorded by transform feedback, we can simply discard all geometry + * bound to these streams when transform feedback is disabled. + */ + if (ir->stream_id() > 0 && shader_prog->TransformFeedback.NumVarying == 0) + return; + /* To ensure that we don't output more vertices than the shader specified * using max_vertices, do the logic inside a conditional of the form "if * (vertex_count < MAX)" From airlied at kemper.freedesktop.org Tue Apr 7 01:40:59 2015 From: airlied at kemper.freedesktop.org (Dave Airlie) Date: Mon, 6 Apr 2015 18:40:59 -0700 (PDT) Subject: Mesa (master): r600g: fix op3 abs issue Message-ID: <20150407014059.797CB76250@kemper.freedesktop.org> Module: Mesa Branch: master Commit: ad84689f737edefe549688f9b36d66027ca3fcb2 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ad84689f737edefe549688f9b36d66027ca3fcb2 Author: Dave Airlie Date: Tue Mar 31 15:18:47 2015 +1000 r600g: fix op3 abs issue This code to handle absolute values on op3 srcs was a bit too simple, it really needs a temp reg per src, not one per channel, make it easier and let sb clean up the mess. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89831 Reviewed-by: Glenn Kennard Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/r600_shader.c | 51 +++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 17 deletions(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 28b290a..ec75400 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -4864,10 +4864,9 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru } static int tgsi_make_src_for_op3(struct r600_shader_ctx *ctx, - unsigned temp, int temp_chan, - struct r600_bytecode_alu_src *bc_src, - const struct r600_shader_src *shader_src, - unsigned chan) + unsigned temp, int chan, + struct r600_bytecode_alu_src *bc_src, + const struct r600_shader_src *shader_src) { struct r600_bytecode_alu alu; int r; @@ -4880,7 +4879,7 @@ static int tgsi_make_src_for_op3(struct r600_shader_ctx *ctx, memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP1_MOV; alu.dst.sel = temp; - alu.dst.chan = temp_chan; + alu.dst.chan = chan; alu.dst.write = 1; alu.src[0] = *bc_src; @@ -4891,7 +4890,7 @@ static int tgsi_make_src_for_op3(struct r600_shader_ctx *ctx, memset(bc_src, 0, sizeof(*bc_src)); bc_src->sel = temp; - bc_src->chan = temp_chan; + bc_src->chan = chan; } return 0; } @@ -4902,7 +4901,13 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) struct r600_bytecode_alu alu; int i, j, r; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); + int temp_regs[4]; + for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { + temp_regs[j] = 0; + if (ctx->src[j].abs) + temp_regs[j] = r600_get_temp(ctx); + } for (i = 0; i < lasti + 1; i++) { if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; @@ -4910,7 +4915,7 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ctx->inst_info->op; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - r = tgsi_make_src_for_op3(ctx, ctx->temp_reg, j, &alu.src[j], &ctx->src[j], i); + r = tgsi_make_src_for_op3(ctx, temp_regs[j], i, &alu.src[j], &ctx->src[j]); if (r) return r; } @@ -6003,7 +6008,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bytecode_alu alu; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); - unsigned i, extra_temp; + unsigned i, temp_regs[2]; int r; /* optimize if it's just an equal balance */ @@ -6073,10 +6078,15 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) } /* src0 * src1 + (1 - src0) * src2 */ - if (ctx->src[0].abs || ctx->src[1].abs) /* XXX avoid dupliating condition */ - extra_temp = r600_get_temp(ctx); + if (ctx->src[0].abs) + temp_regs[0] = r600_get_temp(ctx); + else + temp_regs[0] = 0; + if (ctx->src[1].abs) + temp_regs[1] = r600_get_temp(ctx); else - extra_temp = 0; + temp_regs[1] = 0; + for (i = 0; i < lasti + 1; i++) { if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; @@ -6084,10 +6094,10 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP3_MULADD; alu.is_op3 = 1; - r = tgsi_make_src_for_op3(ctx, extra_temp, 0, &alu.src[0], &ctx->src[0], i); + r = tgsi_make_src_for_op3(ctx, temp_regs[0], i, &alu.src[0], &ctx->src[0]); if (r) return r; - r = tgsi_make_src_for_op3(ctx, extra_temp, 1, &alu.src[1], &ctx->src[1], i); + r = tgsi_make_src_for_op3(ctx, temp_regs[1], i, &alu.src[1], &ctx->src[1]); if (r) return r; alu.src[2].sel = ctx->temp_reg; @@ -6109,8 +6119,15 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bytecode_alu alu; - int i, r; + int i, r, j; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); + int temp_regs[3]; + + for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { + temp_regs[j] = 0; + if (ctx->src[j].abs) + temp_regs[j] = r600_get_temp(ctx); + } for (i = 0; i < lasti + 1; i++) { if (!(inst->Dst[0].Register.WriteMask & (1 << i))) @@ -6118,13 +6135,13 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP3_CNDGE; - r = tgsi_make_src_for_op3(ctx, ctx->temp_reg, 0, &alu.src[0], &ctx->src[0], i); + r = tgsi_make_src_for_op3(ctx, temp_regs[0], i, &alu.src[0], &ctx->src[0]); if (r) return r; - r = tgsi_make_src_for_op3(ctx, ctx->temp_reg, 1, &alu.src[1], &ctx->src[2], i); + r = tgsi_make_src_for_op3(ctx, temp_regs[1], i, &alu.src[1], &ctx->src[2]); if (r) return r; - r = tgsi_make_src_for_op3(ctx, ctx->temp_reg, 2, &alu.src[2], &ctx->src[1], i); + r = tgsi_make_src_for_op3(ctx, temp_regs[2], i, &alu.src[2], &ctx->src[1]); if (r) return r; tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); From imirkin at kemper.freedesktop.org Tue Apr 7 03:11:56 2015 From: imirkin at kemper.freedesktop.org (Ilia Mirkin) Date: Mon, 6 Apr 2015 20:11:56 -0700 (PDT) Subject: Mesa (master): nv50,nvc0: limit the y-tiling of 3d textures to the first level's tiling Message-ID: <20150407031156.2624276250@kemper.freedesktop.org> Module: Mesa Branch: master Commit: ae720c66cb91c2640dfd6707446899694a24ab5b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ae720c66cb91c2640dfd6707446899694a24ab5b Author: Ilia Mirkin Date: Sun Apr 5 17:40:44 2015 -0400 nv50,nvc0: limit the y-tiling of 3d textures to the first level's tiling We limit y-tiling to 0x20 when depth is involved. However the function is run for each miplevel, and the hardware expects miplevel 0 to have the highest tiling settings. Perform the y-tiling limit on all levels of a 3d texture, not just the ones that have depth. Fixes: texelFetch fs sampler3D 98x129x1-98x129x9 Signed-off-by: Ilia Mirkin Tested-by: Nick Tenney # GT216 Cc: "10.4 10.5" --- src/gallium/drivers/nouveau/nv50/nv50_miptree.c | 14 ++++++++------ src/gallium/drivers/nouveau/nv50/nv50_resource.h | 3 ++- src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c | 6 +++--- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c index 2e41091..744a3a5 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c @@ -29,7 +29,8 @@ #include "nv50/nv50_resource.h" uint32_t -nv50_tex_choose_tile_dims_helper(unsigned nx, unsigned ny, unsigned nz) +nv50_tex_choose_tile_dims_helper(unsigned nx, unsigned ny, unsigned nz, + boolean is_3d) { uint32_t tile_mode = 0x000; @@ -41,7 +42,7 @@ nv50_tex_choose_tile_dims_helper(unsigned nx, unsigned ny, unsigned nz) else if (ny > 8) tile_mode = 0x010; /* height 16 tiles */ - if (nz == 1) + if (!is_3d) return tile_mode; else if (tile_mode > 0x020) @@ -52,14 +53,15 @@ nv50_tex_choose_tile_dims_helper(unsigned nx, unsigned ny, unsigned nz) if (nz > 8) return tile_mode | 0x400; /* depth 16 tiles */ if (nz > 4) return tile_mode | 0x300; /* depth 8 tiles */ if (nz > 2) return tile_mode | 0x200; /* depth 4 tiles */ + if (nz > 1) return tile_mode | 0x100; /* depth 2 tiles */ - return tile_mode | 0x100; + return tile_mode; } static uint32_t -nv50_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz) +nv50_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz, boolean is_3d) { - return nv50_tex_choose_tile_dims_helper(nx, ny * 2, nz); + return nv50_tex_choose_tile_dims_helper(nx, ny * 2, nz, is_3d); } static uint32_t @@ -304,7 +306,7 @@ nv50_miptree_init_layout_tiled(struct nv50_miptree *mt) lvl->offset = mt->total_size; - lvl->tile_mode = nv50_tex_choose_tile_dims(nbx, nby, d); + lvl->tile_mode = nv50_tex_choose_tile_dims(nbx, nby, d, mt->layout_3d); tsx = NV50_TILE_SIZE_X(lvl->tile_mode); /* x is tile row pitch in bytes */ tsy = NV50_TILE_SIZE_Y(lvl->tile_mode); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_resource.h b/src/gallium/drivers/nouveau/nv50/nv50_resource.h index c06daa3..36d70d8 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_resource.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_resource.h @@ -34,7 +34,8 @@ nv50_screen_init_resource_functions(struct pipe_screen *pscreen); #endif /* __NVC0_RESOURCE_H__ */ uint32_t -nv50_tex_choose_tile_dims_helper(unsigned nx, unsigned ny, unsigned nz); +nv50_tex_choose_tile_dims_helper(unsigned nx, unsigned ny, unsigned nz, + boolean is_3d); struct nv50_miptree_level { uint32_t offset; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c index 1beda7d..fc75fc6 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c @@ -29,9 +29,9 @@ #include "nvc0/nvc0_resource.h" static uint32_t -nvc0_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz) +nvc0_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz, boolean is_3d) { - return nv50_tex_choose_tile_dims_helper(nx, ny, nz); + return nv50_tex_choose_tile_dims_helper(nx, ny, nz, is_3d); } static uint32_t @@ -211,7 +211,7 @@ nvc0_miptree_init_layout_tiled(struct nv50_miptree *mt) lvl->offset = mt->total_size; - lvl->tile_mode = nvc0_tex_choose_tile_dims(nbx, nby, d); + lvl->tile_mode = nvc0_tex_choose_tile_dims(nbx, nby, d, mt->layout_3d); tsx = NVC0_TILE_SIZE_X(lvl->tile_mode); /* x is tile row pitch in bytes */ tsy = NVC0_TILE_SIZE_Y(lvl->tile_mode); From tpalli at kemper.freedesktop.org Tue Apr 7 05:11:42 2015 From: tpalli at kemper.freedesktop.org (Tapani Pälli) Date: Mon, 6 Apr 2015 22:11:42 -0700 (PDT) Subject: Mesa (master): glsl: relax input->output validation for SSO programs Message-ID: <20150407051142.C1BBC76250@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 1aa5738e666a9534c7e5b46f077327e6d647c64f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1aa5738e666a9534c7e5b46f077327e6d647c64f Author: Tapani P?lli Date: Thu Apr 2 12:59:55 2015 +0300 glsl: relax input->output validation for SSO programs Commit 18004c3 introduced more restrictive validation to linker between inputs and outputs. This patch skips the additional check for programs that utilize GL_ARB_separate_shader_objects, there inputs and outputs might not make exact match during linking but only when constructing the final pipeline. This made some of the GL_ARB_program_interface_query tests shaders fail to link, these tests can be used to verify the change. Signed-off-by: Tapani P?lli Reviewed-by: Anuj Phogat --- src/glsl/link_varyings.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp index d6fb1ea..605748a 100644 --- a/src/glsl/link_varyings.cpp +++ b/src/glsl/link_varyings.cpp @@ -270,7 +270,7 @@ cross_validate_outputs_to_inputs(struct gl_shader_program *prog, */ assert(!input->data.assigned); if (input->data.used && !input->get_interface_type() && - !input->data.explicit_location) + !input->data.explicit_location && !prog->SeparateShader) linker_error(prog, "%s shader input `%s' " "has no matching output in the previous stage\n", From jekstrand at kemper.freedesktop.org Tue Apr 7 18:50:46 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Tue, 7 Apr 2015 11:50:46 -0700 (PDT) Subject: Mesa (master): nir/lower_tex_projector: Don't use designated initializers Message-ID: <20150407185046.C9CBD762DA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 2e3b35a1cbe9e1c187b9a284c556821795ebaeea URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=2e3b35a1cbe9e1c187b9a284c556821795ebaeea Author: Jason Ekstrand Date: Mon Apr 6 21:04:42 2015 -0700 nir/lower_tex_projector: Don't use designated initializers These don't work in MSVC or in older versions of GCC Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89899 Reviewed-by: Mark Janes --- src/glsl/nir/nir_lower_tex_projector.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/glsl/nir/nir_lower_tex_projector.c b/src/glsl/nir/nir_lower_tex_projector.c index 6327b23..6b0e9c3 100644 --- a/src/glsl/nir/nir_lower_tex_projector.c +++ b/src/glsl/nir/nir_lower_tex_projector.c @@ -109,7 +109,8 @@ nir_lower_tex_projector_block(nir_block *block, void *void_state) /* Now move the later tex sources down the array so that the projector * disappears. */ - nir_src dead = {.is_ssa = false, .ssa = NULL}; + nir_src dead; + memset(&dead, 0, sizeof dead); nir_instr_rewrite_src(&tex->instr, &tex->src[proj_index].src, dead); memmove(&tex->src[proj_index], &tex->src[proj_index + 1], From kwg at kemper.freedesktop.org Tue Apr 7 21:35:38 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Tue, 7 Apr 2015 14:35:38 -0700 (PDT) Subject: Mesa (master): i965: Add the ability to render to I8/L8 and I16/ L16 UNORM formats. Message-ID: <20150407213538.A86D97633A@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 73d106822e3d0e851ef1308afff7490007b1af8c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=73d106822e3d0e851ef1308afff7490007b1af8c Author: Kenneth Graunke Date: Sun Mar 29 03:45:16 2015 -0700 i965: Add the ability to render to I8/L8 and I16/L16 UNORM formats. This allows those formats to work with the meta PBO upload path. Signed-off-by: Kenneth Graunke Reviewed-by: Topi Pohjolainen Reviewed-by: Anuj Phogat --- src/mesa/drivers/dri/i965/brw_surface_formats.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_surface_formats.c b/src/mesa/drivers/dri/i965/brw_surface_formats.c index 7261c01..7524ad9 100644 --- a/src/mesa/drivers/dri/i965/brw_surface_formats.c +++ b/src/mesa/drivers/dri/i965/brw_surface_formats.c @@ -582,6 +582,14 @@ brw_init_surface_formats(struct brw_context *brw) case BRW_SURFACEFORMAT_L16_FLOAT: render = BRW_SURFACEFORMAT_R16_FLOAT; break; + case BRW_SURFACEFORMAT_I8_UNORM: + case BRW_SURFACEFORMAT_L8_UNORM: + render = BRW_SURFACEFORMAT_R8_UNORM; + break; + case BRW_SURFACEFORMAT_I16_UNORM: + case BRW_SURFACEFORMAT_L16_UNORM: + render = BRW_SURFACEFORMAT_R16_UNORM; + break; case BRW_SURFACEFORMAT_B8G8R8X8_UNORM: /* XRGB is handled as ARGB because the chips in this family * cannot render to XRGB targets. This means that we have to From kwg at kemper.freedesktop.org Tue Apr 7 21:35:38 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Tue, 7 Apr 2015 14:35:38 -0700 (PDT) Subject: Mesa (master): nir: Allocate nir_call_instr:: params out of the nir_call itself. Message-ID: <20150407213538.B14637635A@kemper.freedesktop.org> Module: Mesa Branch: master Commit: b05d53404ccfe28b0011e93c388e5e5b8beaf6db URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b05d53404ccfe28b0011e93c388e5e5b8beaf6db Author: Kenneth Graunke Date: Tue Apr 7 00:32:55 2015 -0700 nir: Allocate nir_call_instr::params out of the nir_call itself. The lifetime of the params array needs to be match the nir_call_instr itself. So, allocate it using the instruction itself as the context. Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand --- src/glsl/nir/nir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c index 5f86eca..0f807dd 100644 --- a/src/glsl/nir/nir.c +++ b/src/glsl/nir/nir.c @@ -445,7 +445,7 @@ nir_call_instr_create(void *mem_ctx, nir_function_overload *callee) instr->callee = callee; instr->num_params = callee->num_params; - instr->params = ralloc_array(mem_ctx, nir_deref_var *, instr->num_params); + instr->params = ralloc_array(instr, nir_deref_var *, instr->num_params); instr->return_deref = NULL; return instr; From kwg at kemper.freedesktop.org Tue Apr 7 21:35:38 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Tue, 7 Apr 2015 14:35:38 -0700 (PDT) Subject: Mesa (master): i965: Use SET_FIELD in 3DSTATE_STREAMOUT packets. Message-ID: <20150407213538.A2367762DA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 60dcd972574b11fa5f29ceb173d4b3ae430e618d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=60dcd972574b11fa5f29ceb173d4b3ae430e618d Author: Kenneth Graunke Date: Mon Apr 6 16:09:35 2015 -0700 i965: Use SET_FIELD in 3DSTATE_STREAMOUT packets. Suggested by Topi Pohjolainen. Signed-off-by: Kenneth Graunke Reviewed-by: Topi Pohjolainen Reviewed-by: Anuj Phogat --- src/mesa/drivers/dri/i965/gen7_sol_state.c | 16 ++++++++-------- src/mesa/drivers/dri/i965/gen8_sol_state.c | 16 ++++++++-------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/mesa/drivers/dri/i965/gen7_sol_state.c b/src/mesa/drivers/dri/i965/gen7_sol_state.c index 7e9b285..3f99df9 100644 --- a/src/mesa/drivers/dri/i965/gen7_sol_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sol_state.c @@ -245,17 +245,17 @@ upload_3dstate_streamout(struct brw_context *brw, bool active, * point by reading less and offsetting the register index in the * SO_DECLs. */ - dw2 |= urb_entry_read_offset << SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT; - dw2 |= (urb_entry_read_length - 1) << SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT; + dw2 |= SET_FIELD(urb_entry_read_offset, SO_STREAM_0_VERTEX_READ_OFFSET); + dw2 |= SET_FIELD(urb_entry_read_length - 1, SO_STREAM_0_VERTEX_READ_LENGTH); - dw2 |= urb_entry_read_offset << SO_STREAM_1_VERTEX_READ_OFFSET_SHIFT; - dw2 |= (urb_entry_read_length - 1) << SO_STREAM_1_VERTEX_READ_LENGTH_SHIFT; + dw2 |= SET_FIELD(urb_entry_read_offset, SO_STREAM_1_VERTEX_READ_OFFSET); + dw2 |= SET_FIELD(urb_entry_read_length - 1, SO_STREAM_1_VERTEX_READ_LENGTH); - dw2 |= urb_entry_read_offset << SO_STREAM_2_VERTEX_READ_OFFSET_SHIFT; - dw2 |= (urb_entry_read_length - 1) << SO_STREAM_2_VERTEX_READ_LENGTH_SHIFT; + dw2 |= SET_FIELD(urb_entry_read_offset, SO_STREAM_2_VERTEX_READ_OFFSET); + dw2 |= SET_FIELD(urb_entry_read_length - 1, SO_STREAM_2_VERTEX_READ_LENGTH); - dw2 |= urb_entry_read_offset << SO_STREAM_3_VERTEX_READ_OFFSET_SHIFT; - dw2 |= (urb_entry_read_length - 1) << SO_STREAM_3_VERTEX_READ_LENGTH_SHIFT; + dw2 |= SET_FIELD(urb_entry_read_offset, SO_STREAM_3_VERTEX_READ_OFFSET); + dw2 |= SET_FIELD(urb_entry_read_length - 1, SO_STREAM_3_VERTEX_READ_LENGTH); } BEGIN_BATCH(3); diff --git a/src/mesa/drivers/dri/i965/gen8_sol_state.c b/src/mesa/drivers/dri/i965/gen8_sol_state.c index d98a226..58ead68 100644 --- a/src/mesa/drivers/dri/i965/gen8_sol_state.c +++ b/src/mesa/drivers/dri/i965/gen8_sol_state.c @@ -125,17 +125,17 @@ gen8_upload_3dstate_streamout(struct brw_context *brw, bool active, * point by reading less and offsetting the register index in the * SO_DECLs. */ - dw2 |= urb_entry_read_offset << SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT; - dw2 |= (urb_entry_read_length - 1) << SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT; + dw2 |= SET_FIELD(urb_entry_read_offset, SO_STREAM_0_VERTEX_READ_OFFSET); + dw2 |= SET_FIELD(urb_entry_read_length - 1, SO_STREAM_0_VERTEX_READ_LENGTH); - dw2 |= urb_entry_read_offset << SO_STREAM_1_VERTEX_READ_OFFSET_SHIFT; - dw2 |= (urb_entry_read_length - 1) << SO_STREAM_1_VERTEX_READ_LENGTH_SHIFT; + dw2 |= SET_FIELD(urb_entry_read_offset, SO_STREAM_1_VERTEX_READ_OFFSET); + dw2 |= SET_FIELD(urb_entry_read_length - 1, SO_STREAM_1_VERTEX_READ_LENGTH); - dw2 |= urb_entry_read_offset << SO_STREAM_2_VERTEX_READ_OFFSET_SHIFT; - dw2 |= (urb_entry_read_length - 1) << SO_STREAM_2_VERTEX_READ_LENGTH_SHIFT; + dw2 |= SET_FIELD(urb_entry_read_offset, SO_STREAM_2_VERTEX_READ_OFFSET); + dw2 |= SET_FIELD(urb_entry_read_length - 1, SO_STREAM_2_VERTEX_READ_LENGTH); - dw2 |= urb_entry_read_offset << SO_STREAM_3_VERTEX_READ_OFFSET_SHIFT; - dw2 |= (urb_entry_read_length - 1) << SO_STREAM_3_VERTEX_READ_LENGTH_SHIFT; + dw2 |= SET_FIELD(urb_entry_read_offset, SO_STREAM_3_VERTEX_READ_OFFSET); + dw2 |= SET_FIELD(urb_entry_read_length - 1, SO_STREAM_3_VERTEX_READ_LENGTH); /* Set buffer pitches; 0 means unbound. */ if (xfb_obj->Buffers[0]) From kwg at kemper.freedesktop.org Tue Apr 7 21:35:38 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Tue, 7 Apr 2015 14:35:38 -0700 (PDT) Subject: Mesa (master): nir: Allocate nir_phi_src values out of the nir_phi_instr. Message-ID: <20150407213538.C0833762DA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 900498bd111091dfda79d5ca6d84fffd427a866d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=900498bd111091dfda79d5ca6d84fffd427a866d Author: Kenneth Graunke Date: Thu Apr 2 16:15:11 2015 -0700 nir: Allocate nir_phi_src values out of the nir_phi_instr. Phi sources are part of the phi instruction and should have the same lifetime. Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand --- src/glsl/nir/nir_lower_phis_to_scalar.c | 2 +- src/glsl/nir/nir_lower_vars_to_ssa.c | 2 +- src/glsl/nir/nir_to_ssa.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/glsl/nir/nir_lower_phis_to_scalar.c b/src/glsl/nir/nir_lower_phis_to_scalar.c index 7cd93ea..4bdb800 100644 --- a/src/glsl/nir/nir_lower_phis_to_scalar.c +++ b/src/glsl/nir/nir_lower_phis_to_scalar.c @@ -223,7 +223,7 @@ lower_phis_to_scalar_block(nir_block *block, void *void_state) else nir_instr_insert_after_block(src->pred, &mov->instr); - nir_phi_src *new_src = ralloc(state->mem_ctx, nir_phi_src); + nir_phi_src *new_src = ralloc(new_phi, nir_phi_src); new_src->pred = src->pred; new_src->src = nir_src_for_ssa(&mov->dest.dest.ssa); diff --git a/src/glsl/nir/nir_lower_vars_to_ssa.c b/src/glsl/nir/nir_lower_vars_to_ssa.c index 86e6ab4..2ca74d7 100644 --- a/src/glsl/nir/nir_lower_vars_to_ssa.c +++ b/src/glsl/nir/nir_lower_vars_to_ssa.c @@ -642,7 +642,7 @@ add_phi_sources(nir_block *block, nir_block *pred, struct deref_node *node = entry->data; - nir_phi_src *src = ralloc(state->mem_ctx, nir_phi_src); + nir_phi_src *src = ralloc(phi, nir_phi_src); src->pred = pred; src->src.is_ssa = true; src->src.ssa = get_ssa_def_for_block(node, pred, state); diff --git a/src/glsl/nir/nir_to_ssa.c b/src/glsl/nir/nir_to_ssa.c index 47cf453..53ff547 100644 --- a/src/glsl/nir/nir_to_ssa.c +++ b/src/glsl/nir/nir_to_ssa.c @@ -47,7 +47,7 @@ insert_trivial_phi(nir_register *reg, nir_block *block, void *mem_ctx) set_foreach(block->predecessors, entry) { nir_block *pred = (nir_block *) entry->key; - nir_phi_src *src = ralloc(mem_ctx, nir_phi_src); + nir_phi_src *src = ralloc(instr, nir_phi_src); src->pred = pred; src->src.is_ssa = false; src->src.reg.base_offset = 0; From kwg at kemper.freedesktop.org Tue Apr 7 21:35:38 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Tue, 7 Apr 2015 14:35:38 -0700 (PDT) Subject: Mesa (master): nir: Allocate nir_ssa_def::uses/ if_uses out of the instruction. Message-ID: <20150407213538.CC434762DA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 4f4b04b7c7ee1ce27da990190a740473db0f2ecb URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4f4b04b7c7ee1ce27da990190a740473db0f2ecb Author: Kenneth Graunke Date: Sat Mar 28 10:02:17 2015 -0700 nir: Allocate nir_ssa_def::uses/if_uses out of the instruction. We can't allocate them out of the nir_ssa_def itself, because it may not be ralloc'd (for example, nir_dest embeds a nir_ssa_def). However, allocating them out of the instruction should work. Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand --- src/glsl/nir/nir.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c index 0f807dd..85ff0f4 100644 --- a/src/glsl/nir/nir.c +++ b/src/glsl/nir/nir.c @@ -1834,13 +1834,11 @@ void nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def, unsigned num_components, const char *name) { - void *mem_ctx = ralloc_parent(instr); - def->name = name; def->parent_instr = instr; - def->uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer, + def->uses = _mesa_set_create(instr, _mesa_hash_pointer, _mesa_key_pointer_equal); - def->if_uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer, + def->if_uses = _mesa_set_create(instr, _mesa_hash_pointer, _mesa_key_pointer_equal); def->num_components = num_components; From kwg at kemper.freedesktop.org Tue Apr 7 21:35:38 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Tue, 7 Apr 2015 14:35:38 -0700 (PDT) Subject: Mesa (master): nir: Implement a nir_sweep() pass. Message-ID: <20150407213538.E166E762DA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: a10d493715cc3669a6e7647c5e514a386de886c0 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a10d493715cc3669a6e7647c5e514a386de886c0 Author: Kenneth Graunke Date: Fri Mar 27 19:50:29 2015 -0700 nir: Implement a nir_sweep() pass. This pass performs a mark and sweep pass over a nir_shader's associated memory - anything still connected to the program will be kept, and any dead memory we dropped on the floor will be freed. The expectation is that this will be called when finished building and optimizing the shader. However, it's also fine to call it earlier, and many times, to free up memory earlier. v2: (feedback from Jason Ekstrand) - Skip sweeping impl->start_block, as it's already in the CF list. - Don't sweep SSA defs (they're owned by their defining instruction) - Don't steal phi sources (they're owned by nir_phi_instr). - Don't steal tex->src (it's owned by the tex_inst itself) - Don't sweep dereference chains (top-level dereferences are owned by the instruction; sub-dereferences are owned by the parent deref). - Don't sweep sources and destinations (SSA defs are handled as part of the defining instruction, and registers are handled as part of function implementations). - Just steal instructions; don't walk them (no longer required). v3: (feedback from Jason Ekstrand) - Steal indirect sources from nir_src/nir_dest. Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand --- src/glsl/Makefile.sources | 1 + src/glsl/nir/nir.h | 2 + src/glsl/nir/nir_sweep.c | 172 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 175 insertions(+) diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index 9bdcb80..c471eca 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -59,6 +59,7 @@ NIR_FILES = \ nir/nir_search.c \ nir/nir_search.h \ nir/nir_split_var_copies.c \ + nir/nir_sweep.c \ nir/nir_to_ssa.c \ nir/nir_types.h \ nir/nir_validate.c \ diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index e6b7684..0f72301 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1650,6 +1650,8 @@ bool nir_opt_peephole_ffma(nir_shader *shader); bool nir_opt_remove_phis(nir_shader *shader); +void nir_sweep(nir_shader *shader); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/src/glsl/nir/nir_sweep.c b/src/glsl/nir/nir_sweep.c new file mode 100644 index 0000000..d354975 --- /dev/null +++ b/src/glsl/nir/nir_sweep.c @@ -0,0 +1,172 @@ +/* + * Copyright ? 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir.h" + +/** + * \file nir_sweep.c + * + * The nir_sweep() pass performs a mark and sweep pass over a nir_shader's associated + * memory - anything still connected to the program will be kept, and any dead memory + * we dropped on the floor will be freed. + * + * The expectation is that drivers should call this when finished compiling the shader + * (after any optimization, lowering, and so on). However, it's also fine to call it + * earlier, and even many times, trading CPU cycles for memory savings. + */ + +#define steal_list(mem_ctx, type, list) \ + foreach_list_typed(type, obj, node, list) { ralloc_steal(mem_ctx, obj); } + +static void sweep_cf_node(nir_shader *nir, nir_cf_node *cf_node); + +static bool +sweep_src_indirect(nir_src *src, void *nir) +{ + if (!src->is_ssa && src->reg.indirect) + ralloc_steal(nir, src->reg.indirect); + + return true; +} + +static bool +sweep_dest_indirect(nir_dest *dest, void *nir) +{ + if (!dest->is_ssa && dest->reg.indirect) + ralloc_steal(nir, dest->reg.indirect); + + return true; +} + +static void +sweep_block(nir_shader *nir, nir_block *block) +{ + ralloc_steal(nir, block); + + nir_foreach_instr(block, instr) { + ralloc_steal(nir, instr); + + nir_foreach_src(instr, sweep_src_indirect, nir); + nir_foreach_dest(instr, sweep_dest_indirect, nir); + } +} + +static void +sweep_if(nir_shader *nir, nir_if *iff) +{ + ralloc_steal(nir, iff); + + foreach_list_typed(nir_cf_node, cf_node, node, &iff->then_list) { + sweep_cf_node(nir, cf_node); + } + + foreach_list_typed(nir_cf_node, cf_node, node, &iff->else_list) { + sweep_cf_node(nir, cf_node); + } +} + +static void +sweep_loop(nir_shader *nir, nir_loop *loop) +{ + ralloc_steal(nir, loop); + + foreach_list_typed(nir_cf_node, cf_node, node, &loop->body) { + sweep_cf_node(nir, cf_node); + } +} + +static void +sweep_cf_node(nir_shader *nir, nir_cf_node *cf_node) +{ + switch (cf_node->type) { + case nir_cf_node_block: + sweep_block(nir, nir_cf_node_as_block(cf_node)); + break; + case nir_cf_node_if: + sweep_if(nir, nir_cf_node_as_if(cf_node)); + break; + case nir_cf_node_loop: + sweep_loop(nir, nir_cf_node_as_loop(cf_node)); + break; + default: + unreachable("Invalid CF node type"); + } +} + +static void +sweep_impl(nir_shader *nir, nir_function_impl *impl) +{ + ralloc_steal(nir, impl); + + ralloc_steal(nir, impl->params); + ralloc_steal(nir, impl->return_var); + steal_list(nir, nir_variable, &impl->locals); + steal_list(nir, nir_register, &impl->registers); + + foreach_list_typed(nir_cf_node, cf_node, node, &impl->body) { + sweep_cf_node(nir, cf_node); + } + + sweep_block(nir, impl->end_block); + + /* Wipe out all the metadata, if any. */ + nir_metadata_preserve(impl, nir_metadata_none); +} + +static void +sweep_function(nir_shader *nir, nir_function *f) +{ + ralloc_steal(nir, f); + + foreach_list_typed(nir_function_overload, overload, node, &f->overload_list) { + ralloc_steal(nir, overload); + ralloc_steal(nir, overload->params); + if (overload->impl) + sweep_impl(nir, overload->impl); + } +} + +void +nir_sweep(nir_shader *nir) +{ + void *rubbish = ralloc_context(NULL); + + /* First, move ownership of all the memory to a temporary context; assume dead. */ + ralloc_adopt(rubbish, nir); + + /* Variables and registers are not dead. Steal them back. */ + steal_list(nir, nir_variable, &nir->uniforms); + steal_list(nir, nir_variable, &nir->inputs); + steal_list(nir, nir_variable, &nir->outputs); + steal_list(nir, nir_variable, &nir->globals); + steal_list(nir, nir_variable, &nir->system_values); + steal_list(nir, nir_register, &nir->registers); + + /* Recurse into functions, stealing their contents back. */ + foreach_list_typed(nir_function, func, node, &nir->functions) { + sweep_function(nir, func); + } + + /* Free everything we didn't steal back. */ + ralloc_free(rubbish); +} From kwg at kemper.freedesktop.org Tue Apr 7 21:35:38 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Tue, 7 Apr 2015 14:35:38 -0700 (PDT) Subject: Mesa (master): nir: Make nir_*_instr_create take a nir_shader instead of a void * context Message-ID: <20150407213538.E9E37762DA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 11694737fc3b2d7f31367dbbbb8f5c02b40a1773 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=11694737fc3b2d7f31367dbbbb8f5c02b40a1773 Author: Jason Ekstrand Date: Tue Apr 7 12:33:17 2015 -0700 nir: Make nir_*_instr_create take a nir_shader instead of a void * context Signed-off-by: Jason Ekstrand Reviewed-by: Kenneth Graunke --- src/glsl/nir/nir.c | 36 ++++++++++++++++++------------------ src/glsl/nir/nir.h | 18 +++++++++--------- 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c index 1c6b603..c6e5361 100644 --- a/src/glsl/nir/nir.c +++ b/src/glsl/nir/nir.c @@ -381,11 +381,11 @@ alu_src_init(nir_alu_src *src) } nir_alu_instr * -nir_alu_instr_create(void *mem_ctx, nir_op op) +nir_alu_instr_create(nir_shader *shader, nir_op op) { unsigned num_srcs = nir_op_infos[op].num_inputs; nir_alu_instr *instr = - ralloc_size(mem_ctx, + ralloc_size(shader, sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src)); instr_init(&instr->instr, nir_instr_type_alu); @@ -398,18 +398,18 @@ nir_alu_instr_create(void *mem_ctx, nir_op op) } nir_jump_instr * -nir_jump_instr_create(void *mem_ctx, nir_jump_type type) +nir_jump_instr_create(nir_shader *shader, nir_jump_type type) { - nir_jump_instr *instr = ralloc(mem_ctx, nir_jump_instr); + nir_jump_instr *instr = ralloc(shader, nir_jump_instr); instr_init(&instr->instr, nir_instr_type_jump); instr->type = type; return instr; } nir_load_const_instr * -nir_load_const_instr_create(void *mem_ctx, unsigned num_components) +nir_load_const_instr_create(nir_shader *shader, unsigned num_components) { - nir_load_const_instr *instr = ralloc(mem_ctx, nir_load_const_instr); + nir_load_const_instr *instr = ralloc(shader, nir_load_const_instr); instr_init(&instr->instr, nir_instr_type_load_const); nir_ssa_def_init(&instr->instr, &instr->def, num_components, NULL); @@ -418,11 +418,11 @@ nir_load_const_instr_create(void *mem_ctx, unsigned num_components) } nir_intrinsic_instr * -nir_intrinsic_instr_create(void *mem_ctx, nir_intrinsic_op op) +nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op) { unsigned num_srcs = nir_intrinsic_infos[op].num_srcs; nir_intrinsic_instr *instr = - ralloc_size(mem_ctx, + ralloc_size(shader, sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src)); instr_init(&instr->instr, nir_instr_type_intrinsic); @@ -438,9 +438,9 @@ nir_intrinsic_instr_create(void *mem_ctx, nir_intrinsic_op op) } nir_call_instr * -nir_call_instr_create(void *mem_ctx, nir_function_overload *callee) +nir_call_instr_create(nir_shader *shader, nir_function_overload *callee) { - nir_call_instr *instr = ralloc(mem_ctx, nir_call_instr); + nir_call_instr *instr = ralloc(shader, nir_call_instr); instr_init(&instr->instr, nir_instr_type_call); instr->callee = callee; @@ -452,9 +452,9 @@ nir_call_instr_create(void *mem_ctx, nir_function_overload *callee) } nir_tex_instr * -nir_tex_instr_create(void *mem_ctx, unsigned num_srcs) +nir_tex_instr_create(nir_shader *shader, unsigned num_srcs) { - nir_tex_instr *instr = ralloc(mem_ctx, nir_tex_instr); + nir_tex_instr *instr = ralloc(shader, nir_tex_instr); instr_init(&instr->instr, nir_instr_type_tex); dest_init(&instr->dest); @@ -472,9 +472,9 @@ nir_tex_instr_create(void *mem_ctx, unsigned num_srcs) } nir_phi_instr * -nir_phi_instr_create(void *mem_ctx) +nir_phi_instr_create(nir_shader *shader) { - nir_phi_instr *instr = ralloc(mem_ctx, nir_phi_instr); + nir_phi_instr *instr = ralloc(shader, nir_phi_instr); instr_init(&instr->instr, nir_instr_type_phi); dest_init(&instr->dest); @@ -483,9 +483,9 @@ nir_phi_instr_create(void *mem_ctx) } nir_parallel_copy_instr * -nir_parallel_copy_instr_create(void *mem_ctx) +nir_parallel_copy_instr_create(nir_shader *shader) { - nir_parallel_copy_instr *instr = ralloc(mem_ctx, nir_parallel_copy_instr); + nir_parallel_copy_instr *instr = ralloc(shader, nir_parallel_copy_instr); instr_init(&instr->instr, nir_instr_type_parallel_copy); exec_list_make_empty(&instr->entries); @@ -494,9 +494,9 @@ nir_parallel_copy_instr_create(void *mem_ctx) } nir_ssa_undef_instr * -nir_ssa_undef_instr_create(void *mem_ctx, unsigned num_components) +nir_ssa_undef_instr_create(nir_shader *shader, unsigned num_components) { - nir_ssa_undef_instr *instr = ralloc(mem_ctx, nir_ssa_undef_instr); + nir_ssa_undef_instr *instr = ralloc(shader, nir_ssa_undef_instr); instr_init(&instr->instr, nir_instr_type_ssa_undef); nir_ssa_def_init(&instr->instr, &instr->def, num_components, NULL); diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 0f72301..f9ca0f7 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1480,26 +1480,26 @@ void nir_metadata_require(nir_function_impl *impl, nir_metadata required); void nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved); /** creates an instruction with default swizzle/writemask/etc. with NULL registers */ -nir_alu_instr *nir_alu_instr_create(void *mem_ctx, nir_op op); +nir_alu_instr *nir_alu_instr_create(nir_shader *shader, nir_op op); -nir_jump_instr *nir_jump_instr_create(void *mem_ctx, nir_jump_type type); +nir_jump_instr *nir_jump_instr_create(nir_shader *shader, nir_jump_type type); -nir_load_const_instr *nir_load_const_instr_create(void *mem_ctx, +nir_load_const_instr *nir_load_const_instr_create(nir_shader *shader, unsigned num_components); -nir_intrinsic_instr *nir_intrinsic_instr_create(void *mem_ctx, +nir_intrinsic_instr *nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op); -nir_call_instr *nir_call_instr_create(void *mem_ctx, +nir_call_instr *nir_call_instr_create(nir_shader *shader, nir_function_overload *callee); -nir_tex_instr *nir_tex_instr_create(void *mem_ctx, unsigned num_srcs); +nir_tex_instr *nir_tex_instr_create(nir_shader *shader, unsigned num_srcs); -nir_phi_instr *nir_phi_instr_create(void *mem_ctx); +nir_phi_instr *nir_phi_instr_create(nir_shader *shader); -nir_parallel_copy_instr *nir_parallel_copy_instr_create(void *mem_ctx); +nir_parallel_copy_instr *nir_parallel_copy_instr_create(nir_shader *shader); -nir_ssa_undef_instr *nir_ssa_undef_instr_create(void *mem_ctx, +nir_ssa_undef_instr *nir_ssa_undef_instr_create(nir_shader *shader, unsigned num_components); nir_deref_var *nir_deref_var_create(void *mem_ctx, nir_variable *var); From kwg at kemper.freedesktop.org Tue Apr 7 21:35:38 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Tue, 7 Apr 2015 14:35:38 -0700 (PDT) Subject: Mesa (master): nir: Allocate dereferences out of their parent instruction or deref. Message-ID: <20150407213538.D7262762DA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: de2014cf1e12826a53a1132f6d80c889f375b2e8 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=de2014cf1e12826a53a1132f6d80c889f375b2e8 Author: Kenneth Graunke Date: Thu Apr 2 21:24:38 2015 -0700 nir: Allocate dereferences out of their parent instruction or deref. Jason pointed out that variable dereferences in NIR are really part of their parent instruction, and should have the same lifetime. Unlike in GLSL IR, they're not used very often - just for intrinsic variables, call parameters & return, and indirect samplers for texturing. Also, nir_deref_var is the top-level concept, and nir_deref_array/nir_deref_record are child nodes. This patch attempts to allocate nir_deref_vars out of their parent instruction, and any sub-dereferences out of their parent deref. It enforces these restrictions in the validator as well. This means that freeing an instruction should free its associated dereference chain as well. The memory sweeper pass can also happily ignore them. v2: Rename make_deref to evaluate_deref and make it take a nir_instr * instead of void *. This involves adding &instr->instr everywhere. (Requested by Jason Ekstrand.) Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand --- src/glsl/nir/glsl_to_nir.cpp | 47 +++++++++++++++++++---------------- src/glsl/nir/nir.c | 6 ++--- src/glsl/nir/nir_lower_var_copies.c | 8 +++--- src/glsl/nir/nir_split_var_copies.c | 4 +-- src/glsl/nir/nir_validate.c | 13 ++++++---- src/mesa/program/prog_to_nir.c | 9 +++---- 6 files changed, 45 insertions(+), 42 deletions(-) diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp index 80c5b3a..f6b8331 100644 --- a/src/glsl/nir/glsl_to_nir.cpp +++ b/src/glsl/nir/glsl_to_nir.cpp @@ -88,6 +88,8 @@ private: exec_list *cf_node_list; nir_instr *result; /* result of the expression tree last visited */ + nir_deref_var *evaluate_deref(nir_instr *mem_ctx, ir_instruction *ir); + /* the head of the dereference chain we're creating */ nir_deref_var *deref_head; /* the tail of the dereference chain we're creating */ @@ -156,6 +158,14 @@ nir_visitor::~nir_visitor() _mesa_hash_table_destroy(this->overload_table, NULL); } +nir_deref_var * +nir_visitor::evaluate_deref(nir_instr *mem_ctx, ir_instruction *ir) +{ + ir->accept(this); + ralloc_steal(mem_ctx, this->deref_head); + return this->deref_head; +} + static nir_constant * constant_copy(ir_constant *ir, void *mem_ctx) { @@ -582,13 +592,11 @@ void nir_visitor::visit(ir_return *ir) { if (ir->value != NULL) { - ir->value->accept(this); nir_intrinsic_instr *copy = nir_intrinsic_instr_create(this->shader, nir_intrinsic_copy_var); - copy->variables[0] = nir_deref_var_create(this->shader, - this->impl->return_var); - copy->variables[1] = this->deref_head; + copy->variables[0] = nir_deref_var_create(copy, this->impl->return_var); + copy->variables[1] = evaluate_deref(©->instr, ir->value); } nir_jump_instr *instr = nir_jump_instr_create(this->shader, nir_jump_return); @@ -613,8 +621,7 @@ nir_visitor::visit(ir_call *ir) nir_intrinsic_instr *instr = nir_intrinsic_instr_create(shader, op); ir_dereference *param = (ir_dereference *) ir->actual_parameters.get_head(); - param->accept(this); - instr->variables[0] = this->deref_head; + instr->variables[0] = evaluate_deref(&instr->instr, param); nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL); nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr); @@ -623,8 +630,7 @@ nir_visitor::visit(ir_call *ir) nir_intrinsic_instr_create(shader, nir_intrinsic_store_var); store_instr->num_components = 1; - ir->return_deref->accept(this); - store_instr->variables[0] = this->deref_head; + store_instr->variables[0] = evaluate_deref(&store_instr->instr, ir->return_deref); store_instr->src[0].is_ssa = true; store_instr->src[0].ssa = &instr->dest.ssa; @@ -642,13 +648,11 @@ nir_visitor::visit(ir_call *ir) unsigned i = 0; foreach_in_list(ir_dereference, param, &ir->actual_parameters) { - param->accept(this); - instr->params[i] = this->deref_head; + instr->params[i] = evaluate_deref(&instr->instr, param); i++; } - ir->return_deref->accept(this); - instr->return_deref = this->deref_head; + instr->return_deref = evaluate_deref(&instr->instr, ir->return_deref); nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr); } @@ -663,12 +667,8 @@ nir_visitor::visit(ir_assignment *ir) nir_intrinsic_instr *copy = nir_intrinsic_instr_create(this->shader, nir_intrinsic_copy_var); - ir->lhs->accept(this); - copy->variables[0] = this->deref_head; - - ir->rhs->accept(this); - copy->variables[1] = this->deref_head; - + copy->variables[0] = evaluate_deref(©->instr, ir->lhs); + copy->variables[1] = evaluate_deref(©->instr, ir->rhs); if (ir->condition) { nir_if *if_stmt = nir_if_create(this->shader); @@ -700,6 +700,7 @@ nir_visitor::visit(ir_assignment *ir) load->num_components = ir->lhs->type->vector_elements; nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL); load->variables[0] = lhs_deref; + ralloc_steal(load, load->variables[0]); nir_instr_insert_after_cf_list(this->cf_node_list, &load->instr); nir_op vec_op; @@ -741,7 +742,7 @@ nir_visitor::visit(ir_assignment *ir) nir_intrinsic_instr *store = nir_intrinsic_instr_create(this->shader, nir_intrinsic_store_var); store->num_components = ir->lhs->type->vector_elements; - nir_deref *store_deref = nir_copy_deref(this->shader, &lhs_deref->deref); + nir_deref *store_deref = nir_copy_deref(store, &lhs_deref->deref); store->variables[0] = nir_deref_as_var(store_deref); store->src[0] = src; @@ -816,6 +817,7 @@ nir_visitor::evaluate_rvalue(ir_rvalue* ir) nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_var); load_instr->num_components = ir->type->vector_elements; load_instr->variables[0] = this->deref_head; + ralloc_steal(load_instr, load_instr->variables[0]); add_instr(&load_instr->instr, ir->type->vector_elements); } @@ -959,6 +961,7 @@ nir_visitor::visit(ir_expression *ir) nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(shader, op); intrin->num_components = deref->type->vector_elements; intrin->variables[0] = this->deref_head; + ralloc_steal(intrin, intrin->variables[0]); if (intrin->intrinsic == nir_intrinsic_interp_var_at_offset || intrin->intrinsic == nir_intrinsic_interp_var_at_sample) @@ -1630,8 +1633,7 @@ nir_visitor::visit(ir_texture *ir) unreachable("not reached"); } - ir->sampler->accept(this); - instr->sampler = this->deref_head; + instr->sampler = evaluate_deref(&instr->instr, ir->sampler); unsigned src_number = 0; @@ -1756,7 +1758,7 @@ nir_visitor::visit(ir_dereference_record *ir) int field_index = this->deref_tail->type->field_index(ir->field); assert(field_index >= 0); - nir_deref_struct *deref = nir_deref_struct_create(this->shader, field_index); + nir_deref_struct *deref = nir_deref_struct_create(this->deref_tail, field_index); deref->deref.type = ir->type; this->deref_tail->child = &deref->deref; this->deref_tail = &deref->deref; @@ -1780,5 +1782,6 @@ nir_visitor::visit(ir_dereference_array *ir) ir->array->accept(this); this->deref_tail->child = &deref->deref; + ralloc_steal(this->deref_tail, deref); this->deref_tail = &deref->deref; } diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c index 85ff0f4..1c6b603 100644 --- a/src/glsl/nir/nir.c +++ b/src/glsl/nir/nir.c @@ -543,7 +543,7 @@ copy_deref_var(void *mem_ctx, nir_deref_var *deref) nir_deref_var *ret = nir_deref_var_create(mem_ctx, deref->var); ret->deref.type = deref->deref.type; if (deref->deref.child) - ret->deref.child = nir_copy_deref(mem_ctx, deref->deref.child); + ret->deref.child = nir_copy_deref(ret, deref->deref.child); return ret; } @@ -558,7 +558,7 @@ copy_deref_array(void *mem_ctx, nir_deref_array *deref) } ret->deref.type = deref->deref.type; if (deref->deref.child) - ret->deref.child = nir_copy_deref(mem_ctx, deref->deref.child); + ret->deref.child = nir_copy_deref(ret, deref->deref.child); return ret; } @@ -568,7 +568,7 @@ copy_deref_struct(void *mem_ctx, nir_deref_struct *deref) nir_deref_struct *ret = nir_deref_struct_create(mem_ctx, deref->index); ret->deref.type = deref->deref.type; if (deref->deref.child) - ret->deref.child = nir_copy_deref(mem_ctx, deref->deref.child); + ret->deref.child = nir_copy_deref(ret, deref->deref.child); return ret; } diff --git a/src/glsl/nir/nir_lower_var_copies.c b/src/glsl/nir/nir_lower_var_copies.c index 85ebb28..58389a7 100644 --- a/src/glsl/nir/nir_lower_var_copies.c +++ b/src/glsl/nir/nir_lower_var_copies.c @@ -148,13 +148,10 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr, unsigned num_components = glsl_get_vector_elements(src_tail->type); - nir_deref *src_deref = nir_copy_deref(mem_ctx, &src_head->deref); - nir_deref *dest_deref = nir_copy_deref(mem_ctx, &dest_head->deref); - nir_intrinsic_instr *load = nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_load_var); load->num_components = num_components; - load->variables[0] = nir_deref_as_var(src_deref); + load->variables[0] = nir_deref_as_var(nir_copy_deref(load, &src_head->deref)); nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL); nir_instr_insert_before(©_instr->instr, &load->instr); @@ -162,7 +159,8 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr, nir_intrinsic_instr *store = nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_store_var); store->num_components = num_components; - store->variables[0] = nir_deref_as_var(dest_deref); + store->variables[0] = nir_deref_as_var(nir_copy_deref(store, &dest_head->deref)); + store->src[0].is_ssa = true; store->src[0].ssa = &load->dest.ssa; diff --git a/src/glsl/nir/nir_split_var_copies.c b/src/glsl/nir/nir_split_var_copies.c index 4d663b5..fc72c07 100644 --- a/src/glsl/nir/nir_split_var_copies.c +++ b/src/glsl/nir/nir_split_var_copies.c @@ -188,8 +188,8 @@ split_var_copy_instr(nir_intrinsic_instr *old_copy, * belongs to the copy instruction and b) the deref chains may * have some of the same links due to the way we constructed them */ - nir_deref *src = nir_copy_deref(state->mem_ctx, src_head); - nir_deref *dest = nir_copy_deref(state->mem_ctx, dest_head); + nir_deref *src = nir_copy_deref(new_copy, src_head); + nir_deref *dest = nir_copy_deref(new_copy, dest_head); new_copy->variables[0] = nir_deref_as_var(dest); new_copy->variables[1] = nir_deref_as_var(src); diff --git a/src/glsl/nir/nir_validate.c b/src/glsl/nir/nir_validate.c index e8c9d7b..a7aa798 100644 --- a/src/glsl/nir/nir_validate.c +++ b/src/glsl/nir/nir_validate.c @@ -295,6 +295,8 @@ validate_alu_instr(nir_alu_instr *instr, validate_state *state) static void validate_deref_chain(nir_deref *deref, validate_state *state) { + assert(deref->child == NULL || ralloc_parent(deref->child) == deref); + nir_deref *parent = NULL; while (deref != NULL) { switch (deref->deref_type) { @@ -336,9 +338,10 @@ validate_var_use(nir_variable *var, validate_state *state) } static void -validate_deref_var(nir_deref_var *deref, validate_state *state) +validate_deref_var(void *parent_mem_ctx, nir_deref_var *deref, validate_state *state) { assert(deref != NULL); + assert(ralloc_parent(deref) == parent_mem_ctx); assert(deref->deref.type == deref->var->type); validate_var_use(deref->var, state); @@ -386,7 +389,7 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state) unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables; for (unsigned i = 0; i < num_vars; i++) { - validate_deref_var(instr->variables[i], state); + validate_deref_var(instr, instr->variables[i], state); } switch (instr->intrinsic) { @@ -423,7 +426,7 @@ validate_tex_instr(nir_tex_instr *instr, validate_state *state) } if (instr->sampler != NULL) - validate_deref_var(instr->sampler, state); + validate_deref_var(instr, instr->sampler, state); } static void @@ -438,10 +441,10 @@ validate_call_instr(nir_call_instr *instr, validate_state *state) for (unsigned i = 0; i < instr->num_params; i++) { assert(instr->callee->params[i].type == instr->params[i]->deref.type); - validate_deref_var(instr->params[i], state); + validate_deref_var(instr, instr->params[i], state); } - validate_deref_var(instr->return_deref, state); + validate_deref_var(instr, instr->return_deref, state); } static void diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c index 5f00a8b..b298d07 100644 --- a/src/mesa/program/prog_to_nir.c +++ b/src/mesa/program/prog_to_nir.c @@ -153,8 +153,7 @@ ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src) nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var); load->num_components = 4; - load->variables[0] = - nir_deref_var_create(b->shader, c->input_vars[prog_src->Index]); + load->variables[0] = nir_deref_var_create(load, c->input_vars[prog_src->Index]); nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL); nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr); @@ -918,7 +917,7 @@ ptn_add_output_stores(struct ptn_compile *c) nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var); store->num_components = 4; store->variables[0] = - nir_deref_var_create(b->shader, c->output_vars[var->data.location]); + nir_deref_var_create(store, c->output_vars[var->data.location]); store->src[0].reg.reg = c->output_regs[var->data.location]; nir_instr_insert_after_cf_list(c->build.cf_node_list, &store->instr); } @@ -962,7 +961,7 @@ setup_registers_and_variables(struct ptn_compile *c) nir_intrinsic_instr *load_x = nir_intrinsic_instr_create(shader, nir_intrinsic_load_var); load_x->num_components = 1; - load_x->variables[0] = nir_deref_var_create(shader, var); + load_x->variables[0] = nir_deref_var_create(load_x, var); nir_ssa_dest_init(&load_x->instr, &load_x->dest, 1, NULL); nir_instr_insert_after_cf_list(b->cf_node_list, &load_x->instr); @@ -978,7 +977,7 @@ setup_registers_and_variables(struct ptn_compile *c) nir_intrinsic_instr *store = nir_intrinsic_instr_create(shader, nir_intrinsic_store_var); store->num_components = 4; - store->variables[0] = nir_deref_var_create(shader, fullvar); + store->variables[0] = nir_deref_var_create(store, fullvar); store->src[0] = nir_src_for_ssa(f001); nir_instr_insert_after_cf_list(b->cf_node_list, &store->instr); From airlied at kemper.freedesktop.org Tue Apr 7 22:20:10 2015 From: airlied at kemper.freedesktop.org (Dave Airlie) Date: Tue, 7 Apr 2015 15:20:10 -0700 (PDT) Subject: Mesa (master): r600g/sb: Enable SB for geometry shaders Message-ID: <20150407222010.C0181762DA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f2947807c8c7eae4b98eb37263b8a1d9ebbcafb5 Author: Glenn Kennard Date: Tue Apr 7 03:00:20 2015 +0200 r600g/sb: Enable SB for geometry shaders Add SV_GEOMETRY_EMIT special variable type to track the implicit dependencies between CUT/EMIT_VERTEX/MEM_RING instructions so GCM/scheduler doesn't reorder them. Mark emit instructions as unkillable so DCE doesn't eat them. Enable only for evergreen/cayman as there are a few unexplained GS piglit regressions on R6xx/R7xx with SB enabled otherwise. Signed-off-by: Glenn Kennard Reviewed-by: Dave Airlie Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/r600_isa.h | 8 ++++---- src/gallium/drivers/r600/r600_shader.c | 12 ++++++++---- src/gallium/drivers/r600/sb/sb_bc_dump.cpp | 2 +- src/gallium/drivers/r600/sb/sb_bc_finalize.cpp | 2 +- src/gallium/drivers/r600/sb/sb_bc_parser.cpp | 25 ++++++++++++++++++++++++ src/gallium/drivers/r600/sb/sb_core.cpp | 5 ++++- src/gallium/drivers/r600/sb/sb_dump.cpp | 4 +++- src/gallium/drivers/r600/sb/sb_ir.h | 6 +++++- src/gallium/drivers/r600/sb/sb_ra_init.cpp | 4 ++-- src/gallium/drivers/r600/sb/sb_sched.cpp | 2 +- src/gallium/drivers/r600/sb/sb_valtable.cpp | 1 + 11 files changed, 55 insertions(+), 16 deletions(-) diff --git a/src/gallium/drivers/r600/r600_isa.h b/src/gallium/drivers/r600/r600_isa.h index ec3f702..381f06d 100644 --- a/src/gallium/drivers/r600/r600_isa.h +++ b/src/gallium/drivers/r600/r600_isa.h @@ -641,7 +641,7 @@ static const struct cf_op_info cf_op_table[] = { {"MEM_SCRATCH", { 0x24, 0x24, 0x50, 0x50 }, CF_MEM }, {"MEM_REDUCT", { 0x25, 0x25, -1, -1 }, CF_MEM }, - {"MEM_RING", { 0x26, 0x26, 0x52, 0x52 }, CF_MEM }, + {"MEM_RING", { 0x26, 0x26, 0x52, 0x52 }, CF_MEM | CF_EMIT }, {"EXPORT", { 0x27, 0x27, 0x53, 0x53 }, CF_EXP }, {"EXPORT_DONE", { 0x28, 0x28, 0x54, 0x54 }, CF_EXP }, @@ -649,9 +649,9 @@ static const struct cf_op_info cf_op_table[] = { {"MEM_EXPORT", { -1, 0x3A, 0x55, 0x55 }, CF_MEM }, {"MEM_RAT", { -1, -1, 0x56, 0x56 }, CF_MEM | CF_RAT }, {"MEM_RAT_NOCACHE", { -1, -1, 0x57, 0x57 }, CF_MEM | CF_RAT }, - {"MEM_RING1", { -1, -1, 0x58, 0x58 }, CF_MEM }, - {"MEM_RING2", { -1, -1, 0x59, 0x59 }, CF_MEM }, - {"MEM_RING3", { -1, -1, 0x5A, 0x5A }, CF_MEM }, + {"MEM_RING1", { -1, -1, 0x58, 0x58 }, CF_MEM | CF_EMIT }, + {"MEM_RING2", { -1, -1, 0x59, 0x59 }, CF_MEM | CF_EMIT }, + {"MEM_RING3", { -1, -1, 0x5A, 0x5A }, CF_MEM | CF_EMIT }, {"MEM_MEM_COMBINED", { -1, -1, 0x5B, 0x5B }, CF_MEM }, {"MEM_RAT_COMBINED_NOCACHE", { -1, -1, 0x5C, 0x5C }, CF_MEM | CF_RAT }, {"MEM_RAT_COMBINED", { -1, -1, -1, 0x5D }, CF_MEM | CF_RAT }, /* ??? not in cayman isa doc */ diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index ec75400..87b6e6e 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -159,8 +159,10 @@ int r600_pipe_shader_create(struct pipe_context *ctx, goto error; } - /* disable SB for geom shaders - it can't handle the CF_EMIT instructions */ - use_sb &= (shader->shader.processor_type != TGSI_PROCESSOR_GEOMETRY); + /* disable SB for geom shaders on R6xx/R7xx due to some mysterious gs piglit regressions with it enabled. */ + if (rctx->b.chip_class <= R700) { + use_sb &= (shader->shader.processor_type != TGSI_PROCESSOR_GEOMETRY); + } /* disable SB for shaders using CF_INDEX_0/1 (sampler/ubo array indexing) as it doesn't handle those currently */ use_sb &= !shader->shader.uses_index_registers; @@ -1141,6 +1143,8 @@ static int fetch_gs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_regi for (i = 0; i < 3; i++) { treg[i] = r600_get_temp(ctx); } + r600_add_gpr_array(ctx->shader, treg[0], 3, 0x0F); + t2 = r600_get_temp(ctx); for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); @@ -1935,9 +1939,9 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, ctx.bc->index_reg[1] = ctx.bc->ar_reg + 3; } + shader->max_arrays = 0; + shader->num_arrays = 0; if (indirect_gprs) { - shader->max_arrays = 0; - shader->num_arrays = 0; if (ctx.info.indirect_files & (1 << TGSI_FILE_INPUT)) { r600_add_gpr_array(shader, ctx.file_offset[TGSI_FILE_INPUT], diff --git a/src/gallium/drivers/r600/sb/sb_bc_dump.cpp b/src/gallium/drivers/r600/sb/sb_bc_dump.cpp index 6f6a57e..5232782 100644 --- a/src/gallium/drivers/r600/sb/sb_bc_dump.cpp +++ b/src/gallium/drivers/r600/sb/sb_bc_dump.cpp @@ -448,7 +448,7 @@ void bc_dump::dump(fetch_node& n) { s << " FWQ"; if (ctx.is_egcm() && n.bc.resource_index_mode) s << " RIM:SQ_CF_INDEX_" << n.bc.resource_index_mode; - if (ctx.is_egcm() && n.bc.resource_index_mode) + if (ctx.is_egcm() && n.bc.sampler_index_mode) s << " SID:SQ_CF_INDEX_" << n.bc.sampler_index_mode; s << " UCF:" << n.bc.use_const_fields diff --git a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp index 08b7d77..8c2cd14 100644 --- a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp +++ b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp @@ -290,7 +290,7 @@ void bc_finalizer::finalize_alu_group(alu_group_node* g, node *prev_node) { value *d = n->dst.empty() ? NULL : n->dst[0]; if (d && d->is_special_reg()) { - assert(n->bc.op_ptr->flags & AF_MOVA); + assert((n->bc.op_ptr->flags & AF_MOVA) || d->is_geometry_emit()); d = NULL; } diff --git a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp index 08e7f5c..4879c03 100644 --- a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp +++ b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp @@ -135,6 +135,16 @@ int bc_parser::parse_decls() { } } + // GS inputs can add indirect addressing + if (sh->target == TARGET_GS) { + if (pshader->num_arrays) { + for (unsigned i = 0; i < pshader->num_arrays; ++i) { + r600_shader_array &a = pshader->arrays[i]; + sh->add_gpr_array(a.gpr_start, a.gpr_count, a.comp_mask); + } + } + } + if (sh->target == TARGET_VS || sh->target == TARGET_ES) sh->add_input(0, 1, 0x0F); else if (sh->target == TARGET_GS) { @@ -720,6 +730,16 @@ int bc_parser::prepare_ir() { c->flags |= NF_DONT_HOIST | NF_DONT_MOVE; } + if (flags & CF_EMIT) { + // Instruction implicitly depends on prior [EMIT_][CUT]_VERTEX + c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT)); + c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT)); + if (sh->target == TARGET_ES) { + // For ES shaders this is an export + c->flags |= NF_DONT_KILL; + } + } + if (!burst_count--) break; @@ -736,6 +756,11 @@ int bc_parser::prepare_ir() { c->bc.end_of_program = eop; + } else if (flags & CF_EMIT) { + c->flags |= NF_DONT_KILL | NF_DONT_HOIST | NF_DONT_MOVE; + + c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT)); + c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT)); } } diff --git a/src/gallium/drivers/r600/sb/sb_core.cpp b/src/gallium/drivers/r600/sb/sb_core.cpp index 7db8008..afea818 100644 --- a/src/gallium/drivers/r600/sb/sb_core.cpp +++ b/src/gallium/drivers/r600/sb/sb_core.cpp @@ -189,7 +189,10 @@ int r600_sb_bytecode_process(struct r600_context *rctx, sh->set_undef(sh->root->live_before); - SB_RUN_PASS(if_conversion, 1); + // if conversion breaks the dependency tracking between CF_EMIT ops when it removes + // the phi nodes for SV_GEOMETRY_EMIT. Just disable it for GS + if (sh->target != TARGET_GS) + SB_RUN_PASS(if_conversion, 1); // if_conversion breaks info about uses, but next pass (peephole) // doesn't need it, so we can skip def/use update here diff --git a/src/gallium/drivers/r600/sb/sb_dump.cpp b/src/gallium/drivers/r600/sb/sb_dump.cpp index b2130a4..d605170 100644 --- a/src/gallium/drivers/r600/sb/sb_dump.cpp +++ b/src/gallium/drivers/r600/sb/sb_dump.cpp @@ -354,7 +354,9 @@ void dump::dump_op(node &n, const char *name) { "WRITE_IND_ACK"}; sblog << " " << exp_type[c->bc.type] << " " << c->bc.array_base << " ES:" << c->bc.elem_size; - has_dst = false; + if (!(c->bc.op_ptr->flags & CF_EMIT)) { + has_dst = false; + } } } diff --git a/src/gallium/drivers/r600/sb/sb_ir.h b/src/gallium/drivers/r600/sb/sb_ir.h index 711c2eb..560a4a9 100644 --- a/src/gallium/drivers/r600/sb/sb_ir.h +++ b/src/gallium/drivers/r600/sb/sb_ir.h @@ -41,7 +41,8 @@ enum special_regs { SV_ALU_PRED = 128, SV_EXEC_MASK, SV_AR_INDEX, - SV_VALID_MASK + SV_VALID_MASK, + SV_GEOMETRY_EMIT }; class node; @@ -506,6 +507,9 @@ public: bool is_AR() { return is_special_reg() && select == sel_chan(SV_AR_INDEX, 0); } + bool is_geometry_emit() { + return is_special_reg() && select == sel_chan(SV_GEOMETRY_EMIT, 0); + } node* any_def() { assert(!(def && adef)); diff --git a/src/gallium/drivers/r600/sb/sb_ra_init.cpp b/src/gallium/drivers/r600/sb/sb_ra_init.cpp index e53aba5..95b9290 100644 --- a/src/gallium/drivers/r600/sb/sb_ra_init.cpp +++ b/src/gallium/drivers/r600/sb/sb_ra_init.cpp @@ -707,7 +707,7 @@ void ra_split::split_vec(vvec &vv, vvec &v1, vvec &v2, bool allow_swz) { assert(!o->is_dead()); - if (o->is_undef()) + if (o->is_undef() || o->is_geometry_emit()) continue; if (allow_swz && o->is_float_0_or_1()) @@ -751,7 +751,7 @@ void ra_split::split_vector_inst(node* n) { // src vectors 1 (src[4-7] and 2 (src[8-11]) unsigned nvec = n->src.size() >> 2; - assert(nvec << 2 == n->src.size()); + assert(nvec << 2 <= n->src.size()); for (unsigned nv = 0; nv < nvec; ++nv) { vvec sv, tv, nsrc(4); diff --git a/src/gallium/drivers/r600/sb/sb_sched.cpp b/src/gallium/drivers/r600/sb/sb_sched.cpp index 63e7464..4248a3f 100644 --- a/src/gallium/drivers/r600/sb/sb_sched.cpp +++ b/src/gallium/drivers/r600/sb/sb_sched.cpp @@ -1463,7 +1463,7 @@ unsigned post_scheduler::try_add_instruction(node *n) { value *d = a->dst.empty() ? NULL : a->dst[0]; if (d && d->is_special_reg()) { - assert(a->bc.op_ptr->flags & AF_MOVA); + assert((a->bc.op_ptr->flags & AF_MOVA) || d->is_geometry_emit()); d = NULL; } diff --git a/src/gallium/drivers/r600/sb/sb_valtable.cpp b/src/gallium/drivers/r600/sb/sb_valtable.cpp index 0d39e9c..eb242b1 100644 --- a/src/gallium/drivers/r600/sb/sb_valtable.cpp +++ b/src/gallium/drivers/r600/sb/sb_valtable.cpp @@ -55,6 +55,7 @@ sb_ostream& operator << (sb_ostream &o, value &v) { case SV_ALU_PRED: o << "PR"; break; case SV_EXEC_MASK: o << "EM"; break; case SV_VALID_MASK: o << "VM"; break; + case SV_GEOMETRY_EMIT: o << "GEOMETRY_EMIT"; break; default: o << "???specialreg"; break; } break; From airlied at kemper.freedesktop.org Tue Apr 7 22:20:10 2015 From: airlied at kemper.freedesktop.org (Dave Airlie) Date: Tue, 7 Apr 2015 15:20:10 -0700 (PDT) Subject: Mesa (master): u_tile: fix stencil texturing tests under softpipe Message-ID: <20150407222010.9F1B2762DA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 61393bdcdc3b63624bf6e9730444f5e9deeedfc8 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=61393bdcdc3b63624bf6e9730444f5e9deeedfc8 Author: Dave Airlie Date: Tue Apr 7 09:52:41 2015 +1000 u_tile: fix stencil texturing tests under softpipe arb_stencil_texturing-draw failed under softpipe because we got a float back from the texturing function, and then tried to U2F it, stencil texturing returns ints, so we should fix the tiling to retrieve the stencil values as integers not floats. Signed-off-by: Dave Airlie --- src/gallium/auxiliary/util/u_tile.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c index 6252e5d..f5edb8b 100644 --- a/src/gallium/auxiliary/util/u_tile.c +++ b/src/gallium/auxiliary/util/u_tile.c @@ -214,13 +214,13 @@ s8x24_get_tile_rgba(const unsigned *src, unsigned i, j; for (i = 0; i < h; i++) { - float *pRow = p; + uint32_t *pRow = p; for (j = 0; j < w; j++, pRow += 4) { pRow[0] = pRow[1] = pRow[2] = - pRow[3] = (float)((*src++ >> 24) & 0xff); + pRow[3] = ((*src++ >> 24) & 0xff); } p += dst_stride; @@ -241,12 +241,12 @@ x24s8_get_tile_rgba(const unsigned *src, unsigned i, j; for (i = 0; i < h; i++) { - float *pRow = p; + uint32_t *pRow = p; for (j = 0; j < w; j++, pRow += 4) { pRow[0] = pRow[1] = pRow[2] = - pRow[3] = (float)(*src++ & 0xff); + pRow[3] = (*src++ & 0xff); } p += dst_stride; } @@ -265,12 +265,12 @@ s8_get_tile_rgba(const unsigned char *src, unsigned i, j; for (i = 0; i < h; i++) { - float *pRow = p; + uint32_t *pRow = p; for (j = 0; j < w; j++, pRow += 4) { pRow[0] = pRow[1] = pRow[2] = - pRow[3] = (float)(*src++ & 0xff); + pRow[3] = (*src++ & 0xff); } p += dst_stride; } From airlied at kemper.freedesktop.org Tue Apr 7 22:20:10 2015 From: airlied at kemper.freedesktop.org (Dave Airlie) Date: Tue, 7 Apr 2015 15:20:10 -0700 (PDT) Subject: Mesa (master): r600g/sb: Update last_cf for loops Message-ID: <20150407222010.A9B7C7633A@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 06bb68da4a58403e678b51511e40a7f752dfc046 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=06bb68da4a58403e678b51511e40a7f752dfc046 Author: Glenn Kennard Date: Thu Mar 26 02:56:50 2015 +0100 r600g/sb: Update last_cf for loops CF_END could end up emitted in the middle of a shader on cayman when there was a loop at the very end. Fixes glsl-1.50-geometry-end-primitive and ext_transform_feedback-geometry-shaders-basic piglit tests. Signed-off-by: Glenn Kennard Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/sb/sb_bc_finalize.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp index 8d0be06..08b7d77 100644 --- a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp +++ b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp @@ -127,6 +127,14 @@ void bc_finalizer::finalize_loop(region_node* r) { cf_node *loop_start = sh.create_cf(CF_OP_LOOP_START_DX10); cf_node *loop_end = sh.create_cf(CF_OP_LOOP_END); + // Update last_cf, but don't overwrite it if it's outside the current loop nest since + // it may point to a cf that is later in program order. + // The single parent level check is sufficient since finalize_loop() is processed in + // reverse order from innermost to outermost loop nest level. + if (!last_cf || last_cf->get_parent_region() == r) { + last_cf = loop_end; + } + loop_start->jump_after(loop_end); loop_end->jump_after(loop_start); From airlied at kemper.freedesktop.org Wed Apr 8 00:31:54 2015 From: airlied at kemper.freedesktop.org (Dave Airlie) Date: Tue, 7 Apr 2015 17:31:54 -0700 (PDT) Subject: Mesa (master): u_tile: fix warnings about incompatible casts. Message-ID: <20150408003154.B94E2762DA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 6b722c390b484485b3be60057782ee19583a82d1 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6b722c390b484485b3be60057782ee19583a82d1 Author: Dave Airlie Date: Wed Apr 8 10:31:14 2015 +1000 u_tile: fix warnings about incompatible casts. Signed-off-by: Dave Airlie --- src/gallium/auxiliary/util/u_tile.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c index f5edb8b..8e19920 100644 --- a/src/gallium/auxiliary/util/u_tile.c +++ b/src/gallium/auxiliary/util/u_tile.c @@ -214,7 +214,7 @@ s8x24_get_tile_rgba(const unsigned *src, unsigned i, j; for (i = 0; i < h; i++) { - uint32_t *pRow = p; + uint32_t *pRow = (uint32_t *)p; for (j = 0; j < w; j++, pRow += 4) { pRow[0] = @@ -241,7 +241,7 @@ x24s8_get_tile_rgba(const unsigned *src, unsigned i, j; for (i = 0; i < h; i++) { - uint32_t *pRow = p; + uint32_t *pRow = (uint32_t *)p; for (j = 0; j < w; j++, pRow += 4) { pRow[0] = pRow[1] = @@ -265,7 +265,7 @@ s8_get_tile_rgba(const unsigned char *src, unsigned i, j; for (i = 0; i < h; i++) { - uint32_t *pRow = p; + uint32_t *pRow = (uint32_t *)p; for (j = 0; j < w; j++, pRow += 4) { pRow[0] = pRow[1] = From zhen at kemper.freedesktop.org Wed Apr 8 05:24:49 2015 From: zhen at kemper.freedesktop.org (Zhenyu Wang) Date: Tue, 7 Apr 2015 22:24:49 -0700 (PDT) Subject: Mesa (master): i965: Fix depth field setting in surface state for raw buffer on Gen7/8 Message-ID: <20150408052450.034CA762DA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: eb51c6d55ff8b91497bd81f48f95e6bbe863a3e0 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=eb51c6d55ff8b91497bd81f48f95e6bbe863a3e0 Author: Zhenyu Wang Date: Tue Apr 7 13:48:38 2015 +0800 i965: Fix depth field setting in surface state for raw buffer on Gen7/8 On Gen7/8 for RAW surface format, the depth field (surf[3]) in surface state means [30:21] bits of number of entries which is different from other surface format which uses [26:21] bits field. Signed-off-by: Zhenyu Wang Reviewed-by: Kristian H?gsberg Reviewed-by: Anuj Phogat --- src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 7 +++++-- src/mesa/drivers/dri/i965/gen8_surface_state.c | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c index d9361d3..18bcb8a 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c @@ -238,8 +238,11 @@ gen7_emit_buffer_surface_state(struct brw_context *brw, surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */ surf[2] = SET_FIELD((buffer_size - 1) & 0x7f, GEN7_SURFACE_WIDTH) | SET_FIELD(((buffer_size - 1) >> 7) & 0x3fff, GEN7_SURFACE_HEIGHT); - surf[3] = SET_FIELD(((buffer_size - 1) >> 21) & 0x3f, BRW_SURFACE_DEPTH) | - (pitch - 1); + if (surface_format == BRW_SURFACEFORMAT_RAW) + surf[3] = SET_FIELD(((buffer_size - 1) >> 21) & 0x3ff, BRW_SURFACE_DEPTH); + else + surf[3] = SET_FIELD(((buffer_size - 1) >> 21) & 0x3f, BRW_SURFACE_DEPTH); + surf[3] |= (pitch - 1); surf[5] = SET_FIELD(GEN7_MOCS_L3, GEN7_SURFACE_MOCS); diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c index 0007c95..ba59b05 100644 --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c @@ -129,8 +129,11 @@ gen8_emit_buffer_surface_state(struct brw_context *brw, surf[2] = SET_FIELD((buffer_size - 1) & 0x7f, GEN7_SURFACE_WIDTH) | SET_FIELD(((buffer_size - 1) >> 7) & 0x3fff, GEN7_SURFACE_HEIGHT); - surf[3] = SET_FIELD(((buffer_size - 1) >> 21) & 0x3f, BRW_SURFACE_DEPTH) | - (pitch - 1); + if (surface_format == BRW_SURFACEFORMAT_RAW) + surf[3] = SET_FIELD(((buffer_size - 1) >> 21) & 0x3ff, BRW_SURFACE_DEPTH); + else + surf[3] = SET_FIELD(((buffer_size - 1) >> 21) & 0x3f, BRW_SURFACE_DEPTH); + surf[3] |= (pitch - 1); surf[7] = SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) | SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) | SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) | From nroberts at kemper.freedesktop.org Wed Apr 8 11:13:28 2015 From: nroberts at kemper.freedesktop.org (Neil Roberts) Date: Wed, 8 Apr 2015 04:13:28 -0700 (PDT) Subject: Mesa (master): i965/skl: Fix the order of the arguments for the LD sampler message Message-ID: <20150408111328.8284C762DA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 4deca1274c25b80351dbec972b68ab6520a89b31 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4deca1274c25b80351dbec972b68ab6520a89b31 Author: Neil Roberts Date: Fri Mar 6 19:11:19 2015 +0000 i965/skl: Fix the order of the arguments for the LD sampler message In Skylake the order of the arguments for sample messages with the LD type are u, v, lod, r whereas previously they were u, lod, v, r. This fixes 144 Piglit tests including ones that directly use texelFetch and also some using the meta stencil blit path which appears to use texelFetch in its shader. v2: Fix sampling 1D textures Reviewed-by: Kenneth Graunke Reviewed-by: Anuj Phogat --- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 3622e65..06337c9 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1889,15 +1889,26 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst, length++; break; case ir_txf: - /* Unfortunately, the parameters for LD are intermixed: u, lod, v, r. */ + /* Unfortunately, the parameters for LD are intermixed: u, lod, v, r. + * On Gen9 they are u, v, lod, r + */ + emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate)); coordinate = offset(coordinate, 1); length++; + if (brw->gen >= 9) { + if (coord_components >= 2) { + emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate)); + coordinate = offset(coordinate, 1); + } + length++; + } + emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), lod)); length++; - for (int i = 1; i < coord_components; i++) { + for (int i = brw->gen >= 9 ? 2 : 1; i < coord_components; i++) { emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate)); coordinate = offset(coordinate, 1); length++; From brianp at kemper.freedesktop.org Wed Apr 8 23:03:29 2015 From: brianp at kemper.freedesktop.org (Brian Paul) Date: Wed, 8 Apr 2015 16:03:29 -0700 (PDT) Subject: Mesa (master): glsl: check for forced_language_version in is_version() Message-ID: <20150408230329.B1525761C1@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 09e7e2016b702e2c4b79a2c01e8abc1365b4c422 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=09e7e2016b702e2c4b79a2c01e8abc1365b4c422 Author: Brian Paul Date: Wed Apr 1 14:36:09 2015 -0600 glsl: check for forced_language_version in is_version() This is a follow-on fix from the earlier "glsl: allow ForceGLSLVersion to override #version directives" change. Since we're not changing the language_version field, we have to check forced_language_version here. Reviewed-by: Ian Romanick --- src/glsl/glsl_parser_extras.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h index 1f5478b..dae7864 100644 --- a/src/glsl/glsl_parser_extras.h +++ b/src/glsl/glsl_parser_extras.h @@ -105,8 +105,10 @@ struct _mesa_glsl_parse_state { { unsigned required_version = this->es_shader ? required_glsl_es_version : required_glsl_version; + unsigned this_version = this->forced_language_version + ? this->forced_language_version : this->language_version; return required_version != 0 - && this->language_version >= required_version; + && this_version >= required_version; } bool check_version(unsigned required_glsl_version, From sroland at kemper.freedesktop.org Thu Apr 9 00:36:52 2015 From: sroland at kemper.freedesktop.org (Roland Scheidegger) Date: Wed, 8 Apr 2015 17:36:52 -0700 (PDT) Subject: Mesa (master): draw: (trivial) don' t print the shader twice with GALLIVM_DEBUG=tgsi (or ir) Message-ID: <20150409003652.E975D7633A@kemper.freedesktop.org> Module: Mesa Branch: master Commit: a873b79fa5e3138196a3c1785f2a65308fa78286 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a873b79fa5e3138196a3c1785f2a65308fa78286 Author: Roland Scheidegger Date: Sat Apr 4 16:49:08 2015 +0200 draw: (trivial) don't print the shader twice with GALLIVM_DEBUG=tgsi (or ir) Neither the shader nor the key change when doing elts or linear variant, so this was just annoying (probably mildly useful at some point when we printed the IR per function too). Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/draw/draw_llvm.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 1e6e699..7150611 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -562,6 +562,11 @@ draw_llvm_create_variant(struct draw_llvm *llvm, memcpy(&variant->key, key, shader->variant_key_size); + if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) { + tgsi_dump(llvm->draw->vs.vertex_shader->state.tokens, 0); + draw_llvm_dump_variant_key(&variant->key); + } + vertex_header = create_jit_vertex_header(variant->gallivm, num_inputs); variant->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0); @@ -606,11 +611,6 @@ generate_vs(struct draw_llvm_variant *variant, LLVMValueRef num_consts_ptr = draw_jit_context_num_vs_constants(variant->gallivm, context_ptr); - if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) { - tgsi_dump(tokens, 0); - draw_llvm_dump_variant_key(&variant->key); - } - lp_build_tgsi_soa(variant->gallivm, tokens, vs_type, From sroland at kemper.freedesktop.org Thu Apr 9 00:36:52 2015 From: sroland at kemper.freedesktop.org (Roland Scheidegger) Date: Wed, 8 Apr 2015 17:36:52 -0700 (PDT) Subject: Mesa (master): gallivm: don' t use control flow when doing indirect constant buffer lookups Message-ID: <20150409003652.DFA48761C1@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 586536a4e1c34725b3b38c3425db569fac0c91e9 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=586536a4e1c34725b3b38c3425db569fac0c91e9 Author: Roland Scheidegger Date: Thu Apr 9 00:49:11 2015 +0200 gallivm: don't use control flow when doing indirect constant buffer lookups llvm goes crazy when doing that, using way more memory and time, though there's probably more to it - this points to a very much similar issue as fixed in 8a9f5ecdb116d0449d63f7b94efbfa8b205d826f. In any case I've seen a quite plain looking vertex shader with just ~50 simple tgsi instructions (but with a dozen or so such indirect constant buffer lookups) go from a terribly high ~440ms compile time (consuming 25MB of memory in the process) down to a still awful ~230ms and 13MB with this fix (with llvm 3.3), so there's still obvious improvements possible (but I have no clue why it's so slow...). The resulting shader is most likely also faster (certainly seemed so though I don't have any hard numbers as it may have been influenced by compile times) since generally fetching constants outside the buffer range is most likely an app error (that is we expect all indices to be valid). It is possible this fixes some mysterious vertex shader slowdowns we've seen ever since we are conforming to newer apis at least partially (the main draw loop also has similar looking conditionals which we probably could do without - if not for the fetch at least for the additional elts condition.) v2: use static vars for the fake bufs, minor code cleanups Reviewed-by: Jose Fonseca --- .../draw/draw_pt_fetch_shade_pipeline_llvm.c | 28 +++--- src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 93 ++++++++------------ src/gallium/drivers/llvmpipe/lp_setup.c | 6 +- 3 files changed, 58 insertions(+), 69 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index 0dfafdc..d17d695 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -271,30 +271,38 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle, static void llvm_middle_end_bind_parameters(struct draw_pt_middle_end *middle) { + static const float fake_const_buf[4]; struct llvm_middle_end *fpme = llvm_middle_end(middle); struct draw_context *draw = fpme->draw; + struct draw_llvm *llvm = fpme->llvm; unsigned i; - for (i = 0; i < Elements(fpme->llvm->jit_context.vs_constants); ++i) { + for (i = 0; i < Elements(llvm->jit_context.vs_constants); ++i) { int num_consts = draw->pt.user.vs_constants_size[i] / (sizeof(float) * 4); - fpme->llvm->jit_context.vs_constants[i] = draw->pt.user.vs_constants[i]; - fpme->llvm->jit_context.num_vs_constants[i] = num_consts; + llvm->jit_context.vs_constants[i] = draw->pt.user.vs_constants[i]; + llvm->jit_context.num_vs_constants[i] = num_consts; + if (num_consts == 0) { + llvm->jit_context.vs_constants[i] = fake_const_buf; + } } - for (i = 0; i < Elements(fpme->llvm->gs_jit_context.constants); ++i) { + for (i = 0; i < Elements(llvm->gs_jit_context.constants); ++i) { int num_consts = draw->pt.user.gs_constants_size[i] / (sizeof(float) * 4); - fpme->llvm->gs_jit_context.constants[i] = draw->pt.user.gs_constants[i]; - fpme->llvm->gs_jit_context.num_constants[i] = num_consts; + llvm->gs_jit_context.constants[i] = draw->pt.user.gs_constants[i]; + llvm->gs_jit_context.num_constants[i] = num_consts; + if (num_consts == 0) { + llvm->gs_jit_context.constants[i] = fake_const_buf; + } } - fpme->llvm->jit_context.planes = + llvm->jit_context.planes = (float (*)[DRAW_TOTAL_CLIP_PLANES][4]) draw->pt.user.planes[0]; - fpme->llvm->gs_jit_context.planes = + llvm->gs_jit_context.planes = (float (*)[DRAW_TOTAL_CLIP_PLANES][4]) draw->pt.user.planes[0]; - fpme->llvm->jit_context.viewports = draw->viewports; - fpme->llvm->gs_jit_context.viewports = draw->viewports; + llvm->jit_context.viewports = draw->viewports; + llvm->gs_jit_context.viewports = draw->viewports; } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 17b68ff..448c99d 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -944,20 +944,38 @@ gather_outputs(struct lp_build_tgsi_soa_context * bld) * with a little work. */ static LLVMValueRef -build_gather(struct lp_build_context *bld, +build_gather(struct lp_build_tgsi_context *bld_base, LLVMValueRef base_ptr, LLVMValueRef indexes, - LLVMValueRef *overflow_mask) + LLVMValueRef overflow_mask) { - LLVMBuilderRef builder = bld->gallivm->builder; + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + struct lp_build_context *uint_bld = &bld_base->uint_bld; + struct lp_build_context *bld = &bld_base->base; LLVMValueRef res = bld->undef; unsigned i; - LLVMValueRef temp_ptr = NULL; + + /* + * overflow_mask is a vector telling us which channels + * in the vector overflowed. We use the overflow behavior for + * constant buffers which is defined as: + * Out of bounds access to constant buffer returns 0 in all + * components. Out of bounds behavior is always with respect + * to the size of the buffer bound at that slot. + */ if (overflow_mask) { - temp_ptr = lp_build_alloca( - bld->gallivm, - lp_build_vec_type(bld->gallivm, bld->type), ""); + /* + * We avoid per-element control flow here (also due to llvm going crazy, + * though I suspect it's better anyway since overflow is likely rare). + * Note that since we still fetch from buffers even if num_elements was + * zero (in this case we'll fetch from index zero) the jit func callers + * MUST provide valid fake constant buffers of size 4x32 (the values do + * not matter), otherwise we'd still need (not per element though) + * control flow. + */ + indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes); } /* @@ -968,53 +986,16 @@ build_gather(struct lp_build_context *bld, LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, ""); LLVMValueRef scalar_ptr, scalar; - LLVMValueRef overflow; - struct lp_build_if_state if_ctx; - - /* - * overflow_mask is a boolean vector telling us which channels - * in the vector overflowed. We use the overflow behavior for - * constant buffers which is defined as: - * Out of bounds access to constant buffer returns 0 in all - * componenets. Out of bounds behavior is always with respect - * to the size of the buffer bound at that slot. - */ - if (overflow_mask) { - overflow = LLVMBuildExtractElement(builder, *overflow_mask, - ii, ""); - lp_build_if(&if_ctx, bld->gallivm, overflow); - { - LLVMValueRef val = LLVMBuildLoad(builder, temp_ptr, ""); - val = LLVMBuildInsertElement( - builder, val, - LLVMConstNull(LLVMFloatTypeInContext(bld->gallivm->context)), - ii, ""); - LLVMBuildStore(builder, val, temp_ptr); - } - lp_build_else(&if_ctx); - { - LLVMValueRef val = LLVMBuildLoad(builder, temp_ptr, ""); - - scalar_ptr = LLVMBuildGEP(builder, base_ptr, - &index, 1, "gather_ptr"); - scalar = LLVMBuildLoad(builder, scalar_ptr, ""); - - val = LLVMBuildInsertElement(builder, val, scalar, ii, ""); - LLVMBuildStore(builder, val, temp_ptr); - } - lp_build_endif(&if_ctx); - } else { - scalar_ptr = LLVMBuildGEP(builder, base_ptr, - &index, 1, "gather_ptr"); - scalar = LLVMBuildLoad(builder, scalar_ptr, ""); + scalar_ptr = LLVMBuildGEP(builder, base_ptr, + &index, 1, "gather_ptr"); + scalar = LLVMBuildLoad(builder, scalar_ptr, ""); - res = LLVMBuildInsertElement(builder, res, scalar, ii, ""); - } + res = LLVMBuildInsertElement(builder, res, scalar, ii, ""); } if (overflow_mask) { - res = LLVMBuildLoad(builder, temp_ptr, "gather_val"); + res = lp_build_select(bld, overflow_mask, bld->zero, res); } return res; @@ -1247,17 +1228,15 @@ emit_fetch_constant( num_consts = lp_build_broadcast_scalar(uint_bld, num_consts); /* Construct a boolean vector telling us which channels * overflow the bound constant buffer */ - overflow_mask = LLVMBuildICmp(builder, LLVMIntUGE, - indirect_index, - num_consts, ""); + overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL, + indirect_index, num_consts); /* index_vec = indirect_index * 4 + swizzle */ index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); /* Gather values from the constant buffer */ - res = build_gather(&bld_base->base, consts_ptr, index_vec, - &overflow_mask); + res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask); } else { LLVMValueRef index; /* index into the const buffer */ @@ -1319,7 +1298,7 @@ emit_fetch_immediate( FALSE); /* Gather values from the immediate register array */ - res = build_gather(&bld_base->base, imms_array, index_vec, NULL); + res = build_gather(bld_base, imms_array, index_vec, NULL); } else { LLVMValueRef lindex = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle); @@ -1373,7 +1352,7 @@ emit_fetch_input( inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, ""); /* Gather values from the input register array */ - res = build_gather(&bld_base->base, inputs_array, index_vec, NULL); + res = build_gather(bld_base, inputs_array, index_vec, NULL); } else { if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) { LLVMValueRef lindex = lp_build_const_int32(gallivm, @@ -1495,7 +1474,7 @@ emit_fetch_temporary( temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, ""); /* Gather values from the temporary register array */ - res = build_gather(&bld_base->base, temps_array, index_vec, NULL); + res = build_gather(bld_base, temps_array, index_vec, NULL); } else { LLVMValueRef temp_ptr; diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 3b0056c..96cc77c 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -999,6 +999,7 @@ lp_setup_is_resource_referenced( const struct lp_setup_context *setup, static boolean try_update_scene_state( struct lp_setup_context *setup ) { + static const float fake_const_buf[4]; boolean new_scene = (setup->fs.stored == NULL); struct lp_scene *scene = setup->scene; unsigned i; @@ -1103,14 +1104,15 @@ try_update_scene_state( struct lp_setup_context *setup ) setup->constants[i].stored_size = current_size; setup->constants[i].stored_data = stored; } + setup->fs.current.jit_context.constants[i] = + setup->constants[i].stored_data; } else { setup->constants[i].stored_size = 0; setup->constants[i].stored_data = NULL; + setup->fs.current.jit_context.constants[i] = fake_const_buf; } - setup->fs.current.jit_context.constants[i] = - setup->constants[i].stored_data; num_constants = setup->constants[i].stored_size / (sizeof(float) * 4); setup->fs.current.jit_context.num_constants[i] = num_constants; From evelikov at kemper.freedesktop.org Thu Apr 9 11:27:22 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Thu, 9 Apr 2015 04:27:22 -0700 (PDT) Subject: Mesa (10.5): st_glsl_to_tgsi: only do mov copy propagation on temps (v2) Message-ID: <20150409112722.E7C24761C1@kemper.freedesktop.org> Module: Mesa Branch: 10.5 Commit: 712466fb53b5f5086159a8bb883adfdf169f4f00 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=712466fb53b5f5086159a8bb883adfdf169f4f00 Author: Dave Airlie Date: Thu Mar 26 09:17:39 2015 +1000 st_glsl_to_tgsi: only do mov copy propagation on temps (v2) Don't propagate ARRAYs This should fix: https://bugs.freedesktop.org/show_bug.cgi?id=89759 v2: just specify arrays so we get input propagation Signed-off-by: Dave Airlie Cc: mesa-stable at lists.freedesktop.org Reviewed-by: Ilia Mirkin (cherry picked from commit 91e3533481d6921c4b46109742d6f67b7f897f86) --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index a9ea8c8..7abb85a 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -3668,6 +3668,7 @@ glsl_to_tgsi_visitor::copy_propagate(void) inst->dst.index == inst->src[0].index) && !inst->dst.reladdr && !inst->saturate && + inst->src[0].file != PROGRAM_ARRAY && !inst->src[0].reladdr && !inst->src[0].reladdr2 && !inst->src[0].negate) { From evelikov at kemper.freedesktop.org Thu Apr 9 11:27:22 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Thu, 9 Apr 2015 04:27:22 -0700 (PDT) Subject: Mesa (10.5): st/mesa: update arrays when the current attrib has been updated Message-ID: <20150409112722.EEABE7633A@kemper.freedesktop.org> Module: Mesa Branch: 10.5 Commit: 407365e375ca7ba78d95c2e3f189a837201bcc22 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=407365e375ca7ba78d95c2e3f189a837201bcc22 Author: Ilia Mirkin Date: Thu Mar 26 15:14:22 2015 -0400 st/mesa: update arrays when the current attrib has been updated Fixes the recently-sent gl-2.0-vertex-const-attr piglit test. Makes sure to revalidate arrays when only the current attribute has been updated via glVertexAttrib*. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89754 Signed-off-by: Ilia Mirkin Reviewed-by: Marek Ol??k Cc: "10.4 10.5" (cherry picked from commit 9d1b5febb62d74c9fc564635d4e0fa5207928c46) --- src/mesa/state_tracker/st_atom_array.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/state_tracker/st_atom_array.c b/src/mesa/state_tracker/st_atom_array.c index 9b52f97..d4fb8b8 100644 --- a/src/mesa/state_tracker/st_atom_array.c +++ b/src/mesa/state_tracker/st_atom_array.c @@ -598,7 +598,7 @@ static void update_array(struct st_context *st) const struct st_tracked_state st_update_array = { "st_update_array", /* name */ { /* dirty */ - 0, /* mesa */ + _NEW_CURRENT_ATTRIB, /* mesa */ ST_NEW_VERTEX_ARRAYS | ST_NEW_VERTEX_PROGRAM, /* st */ }, update_array /* update */ From evelikov at kemper.freedesktop.org Thu Apr 9 11:27:23 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Thu, 9 Apr 2015 04:27:23 -0700 (PDT) Subject: Mesa (10.5): nv50/ir/gk110: fix offset flag position for TXD opcode Message-ID: <20150409112723.1251C761C1@kemper.freedesktop.org> Module: Mesa Branch: 10.5 Commit: ac46cf31fc897cad098901686cc2caf5921a39cb URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ac46cf31fc897cad098901686cc2caf5921a39cb Author: Ilia Mirkin Date: Fri Mar 27 18:38:24 2015 -0400 nv50/ir/gk110: fix offset flag position for TXD opcode Cc: "10.4 10.5" Signed-off-by: Ilia Mirkin (cherry picked from commit 58030a8f99d94d6c1bab02ef113d93c6c2636216) --- src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp index d8adc93..a8c2619 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp @@ -1116,6 +1116,7 @@ CodeEmitterGK110::emitTEX(const TexInstruction *i) if (i->tex.useOffsets == 1) { switch (i->op) { case OP_TXF: code[1] |= 0x200; break; + case OP_TXD: code[1] |= 0x00400000; break; default: code[1] |= 0x800; break; } } From evelikov at kemper.freedesktop.org Thu Apr 9 11:27:23 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Thu, 9 Apr 2015 04:27:23 -0700 (PDT) Subject: Mesa (10.5): freedreno/a3xx: fix 3d texture layout Message-ID: <20150409112723.1C8AE761C1@kemper.freedesktop.org> Module: Mesa Branch: 10.5 Commit: 61fc1295af5bbde3abe755fb263c1827c58688ae URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=61fc1295af5bbde3abe755fb263c1827c58688ae Author: Ilia Mirkin Date: Sun Mar 15 16:38:42 2015 -0400 freedreno/a3xx: fix 3d texture layout The SZ2 field contains the layer size of a lower miplevel. It only contains 4 bits, which limits the maximum layer size it can describe. In situations where the next miplevel would be too big, the hardware appears to keep minifying the size until it hits one of that size. Unfortunately the hardware's ideas about sizes can differ from freedreno's which can still lead to issues. Minimize those by stopping to minify as soon as possible. Signed-off-by: Ilia Mirkin Cc: "10.4 10.5" (cherry picked from commit 738c8319ac85b175994b35d1fdc4860e18184b93) --- src/gallium/drivers/freedreno/a3xx/fd3_texture.c | 7 +++++-- src/gallium/drivers/freedreno/freedreno_resource.c | 16 +++++++++++----- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c index 567f6c7..59b3a93 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c @@ -212,6 +212,7 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, struct fd_resource *rsc = fd_resource(prsc); unsigned lvl = cso->u.tex.first_level; unsigned miplevels = cso->u.tex.last_level - lvl; + uint32_t sz2 = 0; if (!so) return NULL; @@ -252,8 +253,10 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, case PIPE_TEXTURE_3D: so->texconst3 = A3XX_TEX_CONST_3_DEPTH(u_minify(prsc->depth0, lvl)) | - A3XX_TEX_CONST_3_LAYERSZ1(rsc->slices[0].size0) | - A3XX_TEX_CONST_3_LAYERSZ2(rsc->slices[0].size0); + A3XX_TEX_CONST_3_LAYERSZ1(rsc->slices[lvl].size0); + while (lvl < cso->u.tex.last_level && sz2 != rsc->slices[lvl+1].size0) + sz2 = rsc->slices[++lvl].size0; + so->texconst3 |= A3XX_TEX_CONST_3_LAYERSZ2(sz2); break; default: so->texconst3 = 0x00000000; diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c index 69e5452..efafb89 100644 --- a/src/gallium/drivers/freedreno/freedreno_resource.c +++ b/src/gallium/drivers/freedreno/freedreno_resource.c @@ -215,14 +215,20 @@ setup_slices(struct fd_resource *rsc, uint32_t alignment) slice->pitch = width = align(width, 32); slice->offset = size; - /* 1d array, 2d array, 3d textures (but not cube!) must all have the - * same layer size for each miplevel on a3xx. These are also the - * targets that have non-1 alignment. + /* 1d array and 2d array textures must all have the same layer size + * for each miplevel on a3xx. 3d textures can have different layer + * sizes for high levels, but the hw auto-sizer is buggy (or at least + * different than what this code does), so as soon as the layer size + * range gets into range, we stop reducing it. */ - if (level == 0 || layers_in_level == 1 || alignment == 1) + if (prsc->target == PIPE_TEXTURE_3D && ( + level == 1 || + (level > 1 && rsc->slices[level - 1].size0 > 0xf000))) + slice->size0 = align(slice->pitch * height * rsc->cpp, alignment); + else if (level == 0 || rsc->layer_first || alignment == 1) slice->size0 = align(slice->pitch * height * rsc->cpp, alignment); else - slice->size0 = rsc->slices[0].size0; + slice->size0 = rsc->slices[level - 1].size0; size += slice->size0 * depth * layers_in_level; From evelikov at kemper.freedesktop.org Thu Apr 9 11:27:23 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Thu, 9 Apr 2015 04:27:23 -0700 (PDT) Subject: Mesa (10.5): radeonsi: Cache LLVMTargetMachineRef in context instead of in screen Message-ID: <20150409112723.361AB761C1@kemper.freedesktop.org> Module: Mesa Branch: 10.5 Commit: 64d0f0e3b24c7d3ffd7bde921aadfccf3f12df0c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=64d0f0e3b24c7d3ffd7bde921aadfccf3f12df0c Author: Michel D?nzer Date: Thu Mar 26 11:32:59 2015 +0900 radeonsi: Cache LLVMTargetMachineRef in context instead of in screen Fixes a crash in genymotion with several threads compiling shaders concurrently. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89746 Cc: 10.5 Reviewed-by: Tom Stellard (cherry picked from commit d64adc3a79e419062432cfa8d1cbc437676a3fbd) Conflicts: src/gallium/drivers/radeonsi/si_shader.c --- src/gallium/drivers/radeonsi/si_compute.c | 3 +- src/gallium/drivers/radeonsi/si_pipe.c | 43 ++++++++++--------- src/gallium/drivers/radeonsi/si_pipe.h | 3 +- src/gallium/drivers/radeonsi/si_shader.c | 50 ++++++++++++++++++++--- src/gallium/drivers/radeonsi/si_shader.h | 5 ++- src/gallium/drivers/radeonsi/si_state_shaders.c | 4 +- 6 files changed, 78 insertions(+), 30 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 5009f69..8ebcb8d 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -130,7 +130,8 @@ static void *si_create_compute_state( for (i = 0; i < program->num_kernels; i++) { LLVMModuleRef mod = radeon_llvm_get_kernel_module(program->llvm_ctx, i, code, header->num_bytes); - si_compile_llvm(sctx->screen, &program->kernels[i], mod); + si_compile_llvm(sctx->screen, &program->kernels[i], sctx->tm, + mod); LLVMDisposeModule(mod); } } diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 26182c2..e761d20 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -69,6 +69,11 @@ static void si_destroy_context(struct pipe_context *context) si_pm4_cleanup(sctx); r600_common_context_cleanup(&sctx->b); + +#if HAVE_LLVM >= 0x0306 + LLVMDisposeTargetMachine(sctx->tm); +#endif + FREE(sctx); } @@ -77,6 +82,12 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void * struct si_context *sctx = CALLOC_STRUCT(si_context); struct si_screen* sscreen = (struct si_screen *)screen; struct radeon_winsys *ws = sscreen->b.ws; + LLVMTargetRef r600_target; +#if HAVE_LLVM >= 0x0306 + const char *triple = "amdgcn--"; +#else + const char *triple = "r600--"; +#endif int shader, i; if (sctx == NULL) @@ -167,6 +178,17 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void * */ sctx->scratch_waves = 32 * sscreen->b.info.max_compute_units; +#if HAVE_LLVM >= 0x0306 + /* Initialize LLVM TargetMachine */ + r600_target = radeon_llvm_get_r600_target(triple); + sctx->tm = LLVMCreateTargetMachine(r600_target, triple, + r600_get_llvm_processor_name(sscreen->b.family), + "+DumpCode,+vgpr-spilling", + LLVMCodeGenLevelDefault, + LLVMRelocDefault, + LLVMCodeModelDefault); +#endif + return &sctx->b.b; fail: si_destroy_context(&sctx->b.b); @@ -435,12 +457,6 @@ static void si_destroy_screen(struct pipe_screen* pscreen) if (!sscreen->b.ws->unref(sscreen->b.ws)) return; -#if HAVE_LLVM >= 0x0306 - // r600_destroy_common_screen() frees sscreen, so we need to make - // sure to dispose the TargetMachine before we call it. - LLVMDisposeTargetMachine(sscreen->tm); -#endif - r600_destroy_common_screen(&sscreen->b); } @@ -498,12 +514,7 @@ static bool si_initialize_pipe_config(struct si_screen *sscreen) struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) { struct si_screen *sscreen = CALLOC_STRUCT(si_screen); - LLVMTargetRef r600_target; -#if HAVE_LLVM >= 0x0306 - const char *triple = "amdgcn--"; -#else - const char *triple = "r600--"; -#endif + if (sscreen == NULL) { return NULL; } @@ -531,13 +542,5 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) /* Create the auxiliary context. This must be done last. */ sscreen->b.aux_context = sscreen->b.b.context_create(&sscreen->b.b, NULL); -#if HAVE_LLVM >= 0x0306 - /* Initialize LLVM TargetMachine */ - r600_target = radeon_llvm_get_r600_target(triple); - sscreen->tm = LLVMCreateTargetMachine(r600_target, triple, - r600_get_llvm_processor_name(sscreen->b.family), - "+DumpCode,+vgpr-spilling", LLVMCodeGenLevelDefault, LLVMRelocDefault, - LLVMCodeModelDefault); -#endif return &sscreen->b.b; } diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 059fe0d..63fe54f 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -83,7 +83,6 @@ struct si_compute; struct si_screen { struct r600_common_screen b; - LLVMTargetMachineRef tm; }; struct si_sampler_view { @@ -200,6 +199,8 @@ struct si_context { struct pipe_resource *esgs_ring; struct pipe_resource *gsvs_ring; + LLVMTargetMachineRef tm; + /* SI state handling */ union si_state queued; union si_state emitted; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 81ce3f6..a86d4b1 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -72,6 +72,7 @@ struct si_shader_context int param_streamout_offset[4]; int param_vertex_id; int param_instance_id; + LLVMTargetMachineRef tm; LLVMValueRef const_md; LLVMValueRef const_resource[SI_NUM_CONST_BUFFERS]; LLVMValueRef ddxy_lds; @@ -2638,13 +2639,13 @@ int si_shader_binary_read(struct si_screen *sscreen, } int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader, - LLVMModuleRef mod) + LLVMTargetMachineRef tm, LLVMModuleRef mod) { int r = 0; bool dump = r600_can_dump_shader(&sscreen->b, shader->selector ? shader->selector->tokens : NULL); r = radeon_llvm_compile(mod, &shader->binary, - r600_get_llvm_processor_name(sscreen->b.family), dump, sscreen->tm); + r600_get_llvm_processor_name(sscreen->b.family), dump, tm); if (r) { return r; @@ -2732,7 +2733,7 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen, fprintf(stderr, "Copy Vertex Shader for Geometry Shader:\n\n"); r = si_compile_llvm(sscreen, si_shader_ctx->shader, - bld_base->base.gallivm->module); + si_shader_ctx->tm, bld_base->base.gallivm->module); radeon_llvm_dispose(&si_shader_ctx->radeon_bld); @@ -2740,7 +2741,45 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen, return r; } -int si_shader_create(struct si_screen *sscreen, struct si_shader *shader) +static void si_dump_key(unsigned shader, union si_shader_key *key) +{ + int i; + + fprintf(stderr, "SHADER KEY\n"); + + switch (shader) { + case PIPE_SHADER_VERTEX: + fprintf(stderr, " instance_divisors = {"); + for (i = 0; i < Elements(key->vs.instance_divisors); i++) + fprintf(stderr, !i ? "%u" : ", %u", + key->vs.instance_divisors[i]); + fprintf(stderr, "}\n"); + + if (key->vs.as_es) + fprintf(stderr, " gs_used_inputs = 0x%"PRIx64"\n", + key->vs.gs_used_inputs); + fprintf(stderr, " as_es = %u\n", key->vs.as_es); + break; + + case PIPE_SHADER_GEOMETRY: + break; + + case PIPE_SHADER_FRAGMENT: + fprintf(stderr, " export_16bpc = 0x%X\n", key->ps.export_16bpc); + fprintf(stderr, " last_cbuf = %u\n", key->ps.last_cbuf); + fprintf(stderr, " color_two_side = %u\n", key->ps.color_two_side); + fprintf(stderr, " alpha_func = %u\n", key->ps.alpha_func); + fprintf(stderr, " alpha_to_one = %u\n", key->ps.alpha_to_one); + fprintf(stderr, " poly_stipple = %u\n", key->ps.poly_stipple); + break; + + default: + assert(0); + } +} + +int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, + struct si_shader *shader) { struct si_shader_selector *sel = shader->selector; struct tgsi_token *tokens = sel->tokens; @@ -2812,6 +2851,7 @@ int si_shader_create(struct si_screen *sscreen, struct si_shader *shader) si_shader_ctx.shader = shader; si_shader_ctx.type = tgsi_get_processor_type(tokens); si_shader_ctx.screen = sscreen; + si_shader_ctx.tm = tm; switch (si_shader_ctx.type) { case TGSI_PROCESSOR_VERTEX: @@ -2867,7 +2907,7 @@ int si_shader_create(struct si_screen *sscreen, struct si_shader *shader) radeon_llvm_finalize_module(&si_shader_ctx.radeon_bld); mod = bld_base->base.gallivm->module; - r = si_compile_llvm(sscreen, shader, mod); + r = si_compile_llvm(sscreen, shader, tm, mod); if (r) { fprintf(stderr, "LLVM failed to compile shader\n"); goto out; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 551c7dc..0727b53 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -181,9 +181,10 @@ static inline struct si_shader* si_get_vs_state(struct si_context *sctx) } /* radeonsi_shader.c */ -int si_shader_create(struct si_screen *sscreen, struct si_shader *shader); +int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, + struct si_shader *shader); int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader, - LLVMModuleRef mod); + LLVMTargetMachineRef tm, LLVMModuleRef mod); void si_shader_destroy(struct pipe_context *ctx, struct si_shader *shader); unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index); int si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader, diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 5b46336..ba3b31e 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -398,6 +398,7 @@ static INLINE void si_shader_selector_key(struct pipe_context *ctx, static int si_shader_select(struct pipe_context *ctx, struct si_shader_selector *sel) { + struct si_context *sctx = (struct si_context *)ctx; union si_shader_key key; struct si_shader * shader = NULL; int r; @@ -437,7 +438,8 @@ static int si_shader_select(struct pipe_context *ctx, shader->next_variant = sel->current; sel->current = shader; - r = si_shader_create((struct si_screen*)ctx->screen, shader); + r = si_shader_create((struct si_screen*)ctx->screen, sctx->tm, + shader); if (unlikely(r)) { R600_ERR("Failed to build shader variant (type=%u) %d\n", sel->type, r); From evelikov at kemper.freedesktop.org Thu Apr 9 11:27:23 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Thu, 9 Apr 2015 04:27:23 -0700 (PDT) Subject: Mesa (10.5): xmlpool: don't forget to ship the MOS Message-ID: <20150409112723.49EA5761C1@kemper.freedesktop.org> Module: Mesa Branch: 10.5 Commit: f3b2698f6a0d3c74896d00630957deec018ebada URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f3b2698f6a0d3c74896d00630957deec018ebada Author: Emil Velikov Date: Sun Mar 29 13:46:31 2015 +0100 xmlpool: don't forget to ship the MOS This will allow us to finally remove python from the build time dependencies list. Considering that you're building from a release tarball of course :-) Cc: Bernd Kuhls Reported-by: Bernd Kuhls Cc: "10.5" Signed-off-by: Emil Velikov Reviewed-by: Matt Turner (cherry picked from commit a665b9b3c89095923cf2251895afc69c9f79aafe) --- src/mesa/drivers/dri/common/xmlpool/Makefile.am | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/common/xmlpool/Makefile.am b/src/mesa/drivers/dri/common/xmlpool/Makefile.am index 5557716..9700499 100644 --- a/src/mesa/drivers/dri/common/xmlpool/Makefile.am +++ b/src/mesa/drivers/dri/common/xmlpool/Makefile.am @@ -52,7 +52,14 @@ POT=xmlpool.pot .PHONY: all clean pot po mo -EXTRA_DIST = gen_xmlpool.py options.h t_options.h $(POS) SConscript +EXTRA_DIST = \ + gen_xmlpool.py \ + options.h \ + t_options.h \ + $(POS) \ + $(MOS) \ + SConscript + BUILT_SOURCES = options.h CLEANFILES = $(MOS) options.h From evelikov at kemper.freedesktop.org Thu Apr 9 11:27:23 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Thu, 9 Apr 2015 04:27:23 -0700 (PDT) Subject: Mesa (10.5): i965: Add forgotten multi-stream code to Gen8 SOL state. Message-ID: <20150409112723.6A0F3761C1@kemper.freedesktop.org> Module: Mesa Branch: 10.5 Commit: 11e7ae07417870241597a306ba202973088cbe57 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=11e7ae07417870241597a306ba202973088cbe57 Author: Kenneth Graunke Date: Thu Mar 26 17:21:10 2015 -0700 i965: Add forgotten multi-stream code to Gen8 SOL state. Fixes Piglit's arb_gpu_shader5-xfb-streams-without-invocations. Signed-off-by: Kenneth Graunke Reviewed-by: Chris Forbes Cc: mesa-stable at lists.freedesktop.org (cherry picked from commit f368d0fa1fe37a58780ee555d4a9ccf15474782b) --- src/mesa/drivers/dri/i965/gen8_sol_state.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/mesa/drivers/dri/i965/gen8_sol_state.c b/src/mesa/drivers/dri/i965/gen8_sol_state.c index 1f122ec..d98a226 100644 --- a/src/mesa/drivers/dri/i965/gen8_sol_state.c +++ b/src/mesa/drivers/dri/i965/gen8_sol_state.c @@ -128,6 +128,15 @@ gen8_upload_3dstate_streamout(struct brw_context *brw, bool active, dw2 |= urb_entry_read_offset << SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT; dw2 |= (urb_entry_read_length - 1) << SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT; + dw2 |= urb_entry_read_offset << SO_STREAM_1_VERTEX_READ_OFFSET_SHIFT; + dw2 |= (urb_entry_read_length - 1) << SO_STREAM_1_VERTEX_READ_LENGTH_SHIFT; + + dw2 |= urb_entry_read_offset << SO_STREAM_2_VERTEX_READ_OFFSET_SHIFT; + dw2 |= (urb_entry_read_length - 1) << SO_STREAM_2_VERTEX_READ_LENGTH_SHIFT; + + dw2 |= urb_entry_read_offset << SO_STREAM_3_VERTEX_READ_OFFSET_SHIFT; + dw2 |= (urb_entry_read_length - 1) << SO_STREAM_3_VERTEX_READ_LENGTH_SHIFT; + /* Set buffer pitches; 0 means unbound. */ if (xfb_obj->Buffers[0]) dw3 |= linked_xfb_info->BufferStride[0] * 4; From evelikov at kemper.freedesktop.org Thu Apr 9 11:27:23 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Thu, 9 Apr 2015 04:27:23 -0700 (PDT) Subject: Mesa (10.5): nv50,nvc0: limit the y-tiling of 3d textures to the first level's tiling Message-ID: <20150409112723.821A5761C1@kemper.freedesktop.org> Module: Mesa Branch: 10.5 Commit: b7c44cd5abcd2d6dd7b525368ed93f0f35d87644 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b7c44cd5abcd2d6dd7b525368ed93f0f35d87644 Author: Ilia Mirkin Date: Sun Apr 5 17:40:44 2015 -0400 nv50,nvc0: limit the y-tiling of 3d textures to the first level's tiling We limit y-tiling to 0x20 when depth is involved. However the function is run for each miplevel, and the hardware expects miplevel 0 to have the highest tiling settings. Perform the y-tiling limit on all levels of a 3d texture, not just the ones that have depth. Fixes: texelFetch fs sampler3D 98x129x1-98x129x9 Signed-off-by: Ilia Mirkin Tested-by: Nick Tenney # GT216 Cc: "10.4 10.5" (cherry picked from commit ae720c66cb91c2640dfd6707446899694a24ab5b) --- src/gallium/drivers/nouveau/nv50/nv50_miptree.c | 14 ++++++++------ src/gallium/drivers/nouveau/nv50/nv50_resource.h | 3 ++- src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c | 6 +++--- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c index 2e41091..744a3a5 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c @@ -29,7 +29,8 @@ #include "nv50/nv50_resource.h" uint32_t -nv50_tex_choose_tile_dims_helper(unsigned nx, unsigned ny, unsigned nz) +nv50_tex_choose_tile_dims_helper(unsigned nx, unsigned ny, unsigned nz, + boolean is_3d) { uint32_t tile_mode = 0x000; @@ -41,7 +42,7 @@ nv50_tex_choose_tile_dims_helper(unsigned nx, unsigned ny, unsigned nz) else if (ny > 8) tile_mode = 0x010; /* height 16 tiles */ - if (nz == 1) + if (!is_3d) return tile_mode; else if (tile_mode > 0x020) @@ -52,14 +53,15 @@ nv50_tex_choose_tile_dims_helper(unsigned nx, unsigned ny, unsigned nz) if (nz > 8) return tile_mode | 0x400; /* depth 16 tiles */ if (nz > 4) return tile_mode | 0x300; /* depth 8 tiles */ if (nz > 2) return tile_mode | 0x200; /* depth 4 tiles */ + if (nz > 1) return tile_mode | 0x100; /* depth 2 tiles */ - return tile_mode | 0x100; + return tile_mode; } static uint32_t -nv50_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz) +nv50_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz, boolean is_3d) { - return nv50_tex_choose_tile_dims_helper(nx, ny * 2, nz); + return nv50_tex_choose_tile_dims_helper(nx, ny * 2, nz, is_3d); } static uint32_t @@ -304,7 +306,7 @@ nv50_miptree_init_layout_tiled(struct nv50_miptree *mt) lvl->offset = mt->total_size; - lvl->tile_mode = nv50_tex_choose_tile_dims(nbx, nby, d); + lvl->tile_mode = nv50_tex_choose_tile_dims(nbx, nby, d, mt->layout_3d); tsx = NV50_TILE_SIZE_X(lvl->tile_mode); /* x is tile row pitch in bytes */ tsy = NV50_TILE_SIZE_Y(lvl->tile_mode); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_resource.h b/src/gallium/drivers/nouveau/nv50/nv50_resource.h index c06daa3..36d70d8 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_resource.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_resource.h @@ -34,7 +34,8 @@ nv50_screen_init_resource_functions(struct pipe_screen *pscreen); #endif /* __NVC0_RESOURCE_H__ */ uint32_t -nv50_tex_choose_tile_dims_helper(unsigned nx, unsigned ny, unsigned nz); +nv50_tex_choose_tile_dims_helper(unsigned nx, unsigned ny, unsigned nz, + boolean is_3d); struct nv50_miptree_level { uint32_t offset; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c index 1beda7d..fc75fc6 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c @@ -29,9 +29,9 @@ #include "nvc0/nvc0_resource.h" static uint32_t -nvc0_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz) +nvc0_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz, boolean is_3d) { - return nv50_tex_choose_tile_dims_helper(nx, ny, nz); + return nv50_tex_choose_tile_dims_helper(nx, ny, nz, is_3d); } static uint32_t @@ -211,7 +211,7 @@ nvc0_miptree_init_layout_tiled(struct nv50_miptree *mt) lvl->offset = mt->total_size; - lvl->tile_mode = nvc0_tex_choose_tile_dims(nbx, nby, d); + lvl->tile_mode = nvc0_tex_choose_tile_dims(nbx, nby, d, mt->layout_3d); tsx = NVC0_TILE_SIZE_X(lvl->tile_mode); /* x is tile row pitch in bytes */ tsy = NVC0_TILE_SIZE_Y(lvl->tile_mode); From evelikov at kemper.freedesktop.org Thu Apr 9 11:27:23 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Thu, 9 Apr 2015 04:27:23 -0700 (PDT) Subject: Mesa (10.5): i965: Fix URB size for CHV Message-ID: <20150409112723.96CA67633A@kemper.freedesktop.org> Module: Mesa Branch: 10.5 Commit: 42854fdf2c1123d10ba6915fc6fb32de093d7785 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=42854fdf2c1123d10ba6915fc6fb32de093d7785 Author: Ville Syrj?l? Date: Mon Jan 19 16:08:31 2015 +0200 i965: Fix URB size for CHV Increase the device info .urb.size for CHV to match the default URB size (192kB). Reviewed-by: Kenneth Graunke Signed-off-by: Ville Syrj?l? (cherry picked from commit 970dc2360372a7859691d690bd2f1976c3c97fb0) --- src/mesa/drivers/dri/i965/brw_device_info.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_device_info.c b/src/mesa/drivers/dri/i965/brw_device_info.c index 3c3c564..ba65584 100644 --- a/src/mesa/drivers/dri/i965/brw_device_info.c +++ b/src/mesa/drivers/dri/i965/brw_device_info.c @@ -241,7 +241,7 @@ static const struct brw_device_info brw_device_info_chv = { .max_gs_threads = 80, .max_wm_threads = 128, .urb = { - .size = 128, + .size = 192, .min_vs_entries = 34, .max_vs_entries = 640, .max_gs_entries = 256, From evelikov at kemper.freedesktop.org Thu Apr 9 11:27:23 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Thu, 9 Apr 2015 04:27:23 -0700 (PDT) Subject: Mesa (10.5): clover: Return CL_BUILD_ERROR for CL_PROGRAM_BUILD_STATUS when compilation fails v2 Message-ID: <20150409112723.3FE03761C1@kemper.freedesktop.org> Module: Mesa Branch: 10.5 Commit: 88e05a251fba232e2313e4bf81d2430ecdcadf10 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=88e05a251fba232e2313e4bf81d2430ecdcadf10 Author: Tom Stellard Date: Tue Mar 24 17:17:22 2015 +0000 clover: Return CL_BUILD_ERROR for CL_PROGRAM_BUILD_STATUS when compilation fails v2 v2: - Don't use _errs map Cc: 10.5 10.4 Reviewed-by: Francisco Jerez (cherry picked from commit fda7558057a301a5a0ee1cb4d68f09ea39b03bb3) --- src/gallium/state_trackers/clover/core/program.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/gallium/state_trackers/clover/core/program.cpp b/src/gallium/state_trackers/clover/core/program.cpp index 8553ca7..c07548c 100644 --- a/src/gallium/state_trackers/clover/core/program.cpp +++ b/src/gallium/state_trackers/clover/core/program.cpp @@ -90,6 +90,8 @@ cl_build_status program::build_status(const device &dev) const { if (_binaries.count(&dev)) return CL_BUILD_SUCCESS; + else if (_logs.count(&dev)) + return CL_BUILD_ERROR; else return CL_BUILD_NONE; } From evelikov at kemper.freedesktop.org Thu Apr 9 11:27:23 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Thu, 9 Apr 2015 04:27:23 -0700 (PDT) Subject: Mesa (10.5): nv50: allocate more offset space for occlusion queries Message-ID: <20150409112723.54A5B761C1@kemper.freedesktop.org> Module: Mesa Branch: 10.5 Commit: 71367a1bb4003434e12c229748afc7e89688e67c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=71367a1bb4003434e12c229748afc7e89688e67c Author: Ilia Mirkin Date: Fri Apr 3 23:57:43 2015 -0400 nv50: allocate more offset space for occlusion queries Commit 1a170980a09 started writing to q->data[4]/[5] but kept the per-query space at 16, which meant that in some cases we would write past the end of the buffer. Rotate by 32, like nvc0 does. This ensures that we always have 32 bytes in front of us, and the data writes will go within the allocated space. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89679 Signed-off-by: Ilia Mirkin Tested-by: Nick Tenney Reviewed-by: Samuel Pitoiset Reviewed-by: Tobias Klausmann Cc: "10.4 10.5" (cherry picked from commit ba353935a392d2a43422f1d258456336b40b60ea) --- src/gallium/drivers/nouveau/nv50/nv50_query.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c index e0671ce..c867bca 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c @@ -41,7 +41,7 @@ struct nv50_query { uint32_t sequence; struct nouveau_bo *bo; uint32_t base; - uint32_t offset; /* base + i * 16 */ + uint32_t offset; /* base + i * 32 */ boolean ready; boolean flushed; boolean is64bit; @@ -116,8 +116,8 @@ nv50_query_create(struct pipe_context *pipe, unsigned type, unsigned index) q->type = type; if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { - q->offset -= 16; - q->data -= 16 / sizeof(*q->data); /* we advance before query_begin ! */ + q->offset -= 32; + q->data -= 32 / sizeof(*q->data); /* we advance before query_begin ! */ } return (struct pipe_query *)q; @@ -150,8 +150,8 @@ nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq) * initialized it to TRUE. */ if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { - q->offset += 16; - q->data += 16 / sizeof(*q->data); + q->offset += 32; + q->data += 32 / sizeof(*q->data); if (q->offset - q->base == NV50_QUERY_ALLOC_SPACE) nv50_query_allocate(nv50, q, NV50_QUERY_ALLOC_SPACE); From evelikov at kemper.freedesktop.org Thu Apr 9 11:27:23 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Thu, 9 Apr 2015 04:27:23 -0700 (PDT) Subject: Mesa (10.5): i965: Do not render primitives in non-zero streams then TF is disabled Message-ID: <20150409112723.745E2761C1@kemper.freedesktop.org> Module: Mesa Branch: 10.5 Commit: a02e05f0fa071a002edffa84b2fd34be78075795 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a02e05f0fa071a002edffa84b2fd34be78075795 Author: Iago Toral Quiroga Date: Mon Mar 9 15:17:03 2015 +0100 i965: Do not render primitives in non-zero streams then TF is disabled Haswell hardware seems to ignore Render Stream Select bits from 3DSTATE_STREAMOUT packet when the SOL stage is disabled even if the PRM says otherwise. Because of this, all primitives are sent down the pipeline for rasterization, which is wrong. If SOL is enabled, Render Stream Select is honored and primitives bound to non-zero streams are discarded after stream output. Since the only purpose of primives sent to non-zero streams is to be recorded by transform feedback, we can simply discard all geometry bound to non-zero streams then transform feedback is disabled to prevent it from ever reaching the rasterization stage. Notice that this patch introduces a small change in the behavior we get when a geometry shader emits more vertices than the maximum declared: before, a vertex that was emitted to a non-zero stream when TF was disabled would still count for the purposes of checking that we don't exceed the maximum number of output vertices declared by the shader. With this change, these vertices are completely ignored and won't increase the output vertex count, making more room for other (hopefully more useful) vertices. Fixes piglit test arb_gpu_shader5-emitstreamvertex_nodraw on Haswell and Broadwell. v2 (Ken): Drop is_haswell check in favor of doing this unconditionally. Broadwell needs the workaround as well, and it doesn't hurt to do it in general. Also tweak comments - the Haswell PRM does actually mention this ("Command Reference: Instructions" page 797). Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=83962 Reviewed-by: Kenneth Graunke Cc: mesa-stable at lists.freedesktop.org (cherry picked from commit 2042a2f961a07e04eaca0347e42859c249325531) --- src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index 7a0ea3c..97cfd5d 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -476,6 +476,19 @@ vec4_gs_visitor::visit(ir_emit_vertex *ir) { this->current_annotation = "emit vertex: safety check"; + /* Haswell and later hardware ignores the "Render Stream Select" bits + * from the 3DSTATE_STREAMOUT packet when the SOL stage is disabled, + * and instead sends all primitives down the pipeline for rasterization. + * If the SOL stage is enabled, "Render Stream Select" is honored and + * primitives bound to non-zero streams are discarded after stream output. + * + * Since the only purpose of primives sent to non-zero streams is to + * be recorded by transform feedback, we can simply discard all geometry + * bound to these streams when transform feedback is disabled. + */ + if (ir->stream_id() > 0 && shader_prog->TransformFeedback.NumVarying == 0) + return; + /* To ensure that we don't output more vertices than the shader specified * using max_vertices, do the logic inside a conditional of the form "if * (vertex_count < MAX)" From evelikov at kemper.freedesktop.org Thu Apr 9 11:27:23 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Thu, 9 Apr 2015 04:27:23 -0700 (PDT) Subject: Mesa (10.5): nv50/ir: take postFactor into account when doing peephole optimizations Message-ID: <20150409112723.0583A7635A@kemper.freedesktop.org> Module: Mesa Branch: 10.5 Commit: c8d962c205bc5d6f81f3d083d90ecaf66f78b1e8 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c8d962c205bc5d6f81f3d083d90ecaf66f78b1e8 Author: Ilia Mirkin Date: Wed Mar 25 18:00:00 2015 -0400 nv50/ir: take postFactor into account when doing peephole optimizations Multiply operations can have a post-factor on them, which other ops don't support. Only perform the peephole optimizations when there is no post-factor involved. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89758 Cc: "10.4 10.5" Signed-off-by: Ilia Mirkin (cherry picked from commit 49b86007aa2bb599ada6cdbed7ff56246917f12e) --- src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 21d20ca..9e2a933 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -422,7 +422,9 @@ ConstantFolding::expr(Instruction *i, b->data.f32 = 0.0f; } switch (i->dType) { - case TYPE_F32: res.data.f32 = a->data.f32 * b->data.f32; break; + case TYPE_F32: + res.data.f32 = a->data.f32 * b->data.f32 * exp2f(i->postFactor); + break; case TYPE_F64: res.data.f64 = a->data.f64 * b->data.f64; break; case TYPE_S32: if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) { @@ -550,6 +552,7 @@ ConstantFolding::expr(Instruction *i, i->src(0).mod = Modifier(0); i->src(1).mod = Modifier(0); + i->postFactor = 0; i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res.data.u32)); i->setSrc(1, NULL); @@ -653,7 +656,7 @@ ConstantFolding::tryCollapseChainedMULs(Instruction *mul2, Instruction *insn; Instruction *mul1 = NULL; // mul1 before mul2 int e = 0; - float f = imm2.reg.data.f32; + float f = imm2.reg.data.f32 * exp2f(mul2->postFactor); ImmediateValue imm1; assert(mul2->op == OP_MUL && mul2->dType == TYPE_F32); @@ -753,9 +756,10 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) i->op = OP_MOV; i->setSrc(0, new_ImmediateValue(prog, 0u)); i->src(0).mod = Modifier(0); + i->postFactor = 0; i->setSrc(1, NULL); } else - if (imm0.isInteger(1) || imm0.isInteger(-1)) { + if (!i->postFactor && (imm0.isInteger(1) || imm0.isInteger(-1))) { if (imm0.isNegative()) i->src(t).mod = i->src(t).mod ^ Modifier(NV50_IR_MOD_NEG); i->op = i->src(t).mod.getOp(); @@ -768,7 +772,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) i->src(0).mod = 0; i->setSrc(1, NULL); } else - if (imm0.isInteger(2) || imm0.isInteger(-2)) { + if (!i->postFactor && (imm0.isInteger(2) || imm0.isInteger(-2))) { if (imm0.isNegative()) i->src(t).mod = i->src(t).mod ^ Modifier(NV50_IR_MOD_NEG); i->op = OP_ADD; From evelikov at kemper.freedesktop.org Thu Apr 9 11:27:23 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Thu, 9 Apr 2015 04:27:23 -0700 (PDT) Subject: Mesa (10.5): freedreno/a3xx: point size should not be divided by 2 Message-ID: <20150409112723.28A1A761C1@kemper.freedesktop.org> Module: Mesa Branch: 10.5 Commit: 9a3a2479d49ac2e22e4c4b253a20e75aca38a77a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=9a3a2479d49ac2e22e4c4b253a20e75aca38a77a Author: Ilia Mirkin Date: Tue Mar 17 01:00:38 2015 -0400 freedreno/a3xx: point size should not be divided by 2 The division is probably a holdover from the days when the fixed point inline functions generated by headergen were broken. Also reduce the maximum point size to 4092 (vs 4096), which is what the blob does. Cc: "10.4 10.5" Signed-off-by: Ilia Mirkin (cherry picked from commit 7fc5da8b9392042b5f8a989d2afa49ea1944f9a9) --- src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c | 8 ++++---- src/gallium/drivers/freedreno/freedreno_screen.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c b/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c index 4b926b5..345f688 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c @@ -50,7 +50,7 @@ fd3_rasterizer_state_create(struct pipe_context *pctx, if (cso->point_size_per_vertex) { psize_min = util_get_min_point_size(cso); - psize_max = 8192; + psize_max = 4092; } else { /* Force the point size to be as if the vertex output was disabled. */ psize_min = cso->point_size; @@ -67,9 +67,9 @@ fd3_rasterizer_state_create(struct pipe_context *pctx, */ so->gras_cl_clip_cntl = A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER; /* ??? */ so->gras_su_point_minmax = - A3XX_GRAS_SU_POINT_MINMAX_MIN(psize_min/2) | - A3XX_GRAS_SU_POINT_MINMAX_MAX(psize_max/2); - so->gras_su_point_size = A3XX_GRAS_SU_POINT_SIZE(cso->point_size/2); + A3XX_GRAS_SU_POINT_MINMAX_MIN(psize_min) | + A3XX_GRAS_SU_POINT_MINMAX_MAX(psize_max); + so->gras_su_point_size = A3XX_GRAS_SU_POINT_SIZE(cso->point_size); so->gras_su_poly_offset_scale = A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL(cso->offset_scale); so->gras_su_poly_offset_offset = diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index 1ce96d3..f4ae624 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -314,7 +314,7 @@ fd_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param) case PIPE_CAPF_MAX_LINE_WIDTH_AA: case PIPE_CAPF_MAX_POINT_WIDTH: case PIPE_CAPF_MAX_POINT_WIDTH_AA: - return 8192.0f; + return 4092.0f; case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: return 16.0f; case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: From evelikov at kemper.freedesktop.org Thu Apr 9 11:27:23 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Thu, 9 Apr 2015 04:27:23 -0700 (PDT) Subject: Mesa (10.5): i965: Fix instanced geometry shaders on Gen8+. Message-ID: <20150409112723.5EF64761C1@kemper.freedesktop.org> Module: Mesa Branch: 10.5 Commit: b6ce705dac84ee95a640ba5828864681ec2f2887 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b6ce705dac84ee95a640ba5828864681ec2f2887 Author: Kenneth Graunke Date: Sat Apr 4 02:00:52 2015 -0700 i965: Fix instanced geometry shaders on Gen8+. Jordan added this in commit 741782b5948bb3d01d699f062a37513c2e73b076 for Gen7 platforms. I missed this when adding the Broadwell code. Fixes Piglit's spec/arb_gpu_shader5/invocation-id-{basic,in-separate-gs} with MESA_EXTENSION_OVERRIDE=GL_ARB_gpu_shader5 set. Signed-off-by: Kenneth Graunke Reviewed-by: Jordan Justen Reviewed-by: Chris Forbes Cc: mesa-stable at lists.freedesktop.org (cherry picked from commit f9e5dc0a85df8dbfb8213ff772dfeb218972db12) --- src/mesa/drivers/dri/i965/gen8_gs_state.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mesa/drivers/dri/i965/gen8_gs_state.c b/src/mesa/drivers/dri/i965/gen8_gs_state.c index 95cc123..46b9713 100644 --- a/src/mesa/drivers/dri/i965/gen8_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen8_gs_state.c @@ -82,6 +82,8 @@ gen8_upload_gs_state(struct brw_context *brw) uint32_t dw7 = (brw->gs.prog_data->control_data_header_size_hwords << GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT) | brw->gs.prog_data->dispatch_mode | + ((brw->gs.prog_data->invocations - 1) << + GEN7_GS_INSTANCE_CONTROL_SHIFT) | GEN6_GS_STATISTICS_ENABLE | (brw->gs.prog_data->include_primitive_id ? GEN7_GS_INCLUDE_PRIMITIVE_ID : 0) | From evelikov at kemper.freedesktop.org Thu Apr 9 11:27:23 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Thu, 9 Apr 2015 04:27:23 -0700 (PDT) Subject: Mesa (10.5): nouveau: synchronize "scratch runout" destruction with the command stream Message-ID: <20150409112723.8C0FC761C1@kemper.freedesktop.org> Module: Mesa Branch: 10.5 Commit: 64bb117f6dc80103b7be6a3971a93195dc5f1917 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=64bb117f6dc80103b7be6a3971a93195dc5f1917 Author: Marcin ?lusarz Date: Tue Mar 31 22:04:31 2015 +0200 nouveau: synchronize "scratch runout" destruction with the command stream When nvc0_push_vbo calls nouveau_scratch_done it does not mean scratch buffers can be freed immediately. It means "when hardware advances to this place in the command stream the scratch buffers can be freed". To fix it, just postpone scratch runout destruction after current fence is signalled. The bug existed for a very long time. Nobody noticed, because "scratch runout" code path is rarely executed. Fixes hang at the very beginning of first mission in "Serious Sam 3" on nve7/gk107. It manifested as: nouveau E[ PFIFO][0000:01:00.0] read fault at 0x000a9e0000 [PTE] from GR/GPC0/PE_2 on channel 0x007f853000 [Sam3[17056]] Cc: "10.4 10.5" Reviewed-by: Ilia Mirkin (cherry picked from commit f9e2295560f9b4869fa2a94933c1881ec7970af4) --- src/gallium/drivers/nouveau/nouveau_buffer.c | 48 ++++++++++++++++--------- src/gallium/drivers/nouveau/nouveau_context.h | 8 +++-- 2 files changed, 37 insertions(+), 19 deletions(-) diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c b/src/gallium/drivers/nouveau/nouveau_buffer.c index 49ff100..32fa65c 100644 --- a/src/gallium/drivers/nouveau/nouveau_buffer.c +++ b/src/gallium/drivers/nouveau/nouveau_buffer.c @@ -846,17 +846,28 @@ nouveau_scratch_bo_alloc(struct nouveau_context *nv, struct nouveau_bo **pbo, 4096, size, NULL, pbo); } +static void +nouveau_scratch_unref_bos(void *d) +{ + struct runout *b = d; + int i; + + for (i = 0; i < b->nr; ++i) + nouveau_bo_ref(NULL, &b->bo[i]); + + FREE(b); +} + void nouveau_scratch_runout_release(struct nouveau_context *nv) { - if (!nv->scratch.nr_runout) + if (!nv->scratch.runout) + return; + + if (!nouveau_fence_work(nv->screen->fence.current, nouveau_scratch_unref_bos, + nv->scratch.runout)) return; - do { - --nv->scratch.nr_runout; - nouveau_bo_ref(NULL, &nv->scratch.runout[nv->scratch.nr_runout]); - } while (nv->scratch.nr_runout); - FREE(nv->scratch.runout); nv->scratch.end = 0; nv->scratch.runout = NULL; } @@ -868,21 +879,26 @@ static INLINE boolean nouveau_scratch_runout(struct nouveau_context *nv, unsigned size) { int ret; - const unsigned n = nv->scratch.nr_runout++; + unsigned n; - nv->scratch.runout = REALLOC(nv->scratch.runout, - (n + 0) * sizeof(*nv->scratch.runout), - (n + 1) * sizeof(*nv->scratch.runout)); - nv->scratch.runout[n] = NULL; - - ret = nouveau_scratch_bo_alloc(nv, &nv->scratch.runout[n], size); + if (nv->scratch.runout) + n = nv->scratch.runout->nr; + else + n = 0; + nv->scratch.runout = REALLOC(nv->scratch.runout, n == 0 ? 0 : + (sizeof(*nv->scratch.runout) + (n + 0) * sizeof(void *)), + sizeof(*nv->scratch.runout) + (n + 1) * sizeof(void *)); + nv->scratch.runout->nr = n + 1; + nv->scratch.runout->bo[n] = NULL; + + ret = nouveau_scratch_bo_alloc(nv, &nv->scratch.runout->bo[n], size); if (!ret) { - ret = nouveau_bo_map(nv->scratch.runout[n], 0, NULL); + ret = nouveau_bo_map(nv->scratch.runout->bo[n], 0, NULL); if (ret) - nouveau_bo_ref(NULL, &nv->scratch.runout[--nv->scratch.nr_runout]); + nouveau_bo_ref(NULL, &nv->scratch.runout->bo[--nv->scratch.runout->nr]); } if (!ret) { - nv->scratch.current = nv->scratch.runout[n]; + nv->scratch.current = nv->scratch.runout->bo[n]; nv->scratch.offset = 0; nv->scratch.end = size; nv->scratch.map = nv->scratch.current->map; diff --git a/src/gallium/drivers/nouveau/nouveau_context.h b/src/gallium/drivers/nouveau/nouveau_context.h index 14608d3..c2ba015 100644 --- a/src/gallium/drivers/nouveau/nouveau_context.h +++ b/src/gallium/drivers/nouveau/nouveau_context.h @@ -40,8 +40,10 @@ struct nouveau_context { unsigned end; struct nouveau_bo *bo[NOUVEAU_MAX_SCRATCH_BUFS]; struct nouveau_bo *current; - struct nouveau_bo **runout; - unsigned nr_runout; + struct runout { + unsigned nr; + struct nouveau_bo *bo[0]; + } *runout; unsigned bo_size; } scratch; @@ -71,7 +73,7 @@ static INLINE void nouveau_scratch_done(struct nouveau_context *nv) { nv->scratch.wrap = nv->scratch.id; - if (unlikely(nv->scratch.nr_runout)) + if (unlikely(nv->scratch.runout)) nouveau_scratch_runout_release(nv); } From evelikov at kemper.freedesktop.org Thu Apr 9 11:27:23 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Thu, 9 Apr 2015 04:27:23 -0700 (PDT) Subject: Mesa (10.5): configure.ac: error out if python/ mako is not found when required Message-ID: <20150409112723.A1A51761C1@kemper.freedesktop.org> Module: Mesa Branch: 10.5 Commit: b3bb6b19637561171f286af7342266cac63b8fcd URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b3bb6b19637561171f286af7342266cac63b8fcd Author: Emil Velikov Date: Mon Mar 23 17:49:24 2015 +0000 configure.ac: error out if python/mako is not found when required In case of using a distribution tarball (or a dirty git tree) one can have the generated sources locally. Make configure.ac error out otherwise, to alert that about the unmet requirement(s) of python/mako. v2: Check only for a single file for each dependency. Suggested-by: Matt Turner Signed-off-by: Emil Velikov Reviewed-by: Matt Turner (cherry picked from commit 4008975e6f4b2e15fceed4f07d8ec763dd0949d1) --- configure.ac | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index 20e6d62..101cf66 100644 --- a/configure.ac +++ b/configure.ac @@ -115,8 +115,17 @@ if test "x$INDENT" != "xcat"; then fi AX_CHECK_PYTHON_MAKO_MODULE($PYTHON_MAKO_REQUIRED) -if test -n "$PYTHON2" -a "x$acv_mako_found" != "xyes"; then - AC_MSG_ERROR([Python mako module v$PYTHON_MAKO_REQUIRED or higher not found]) + +if test -z "$PYTHON2"; then + if test ! -f "$srcdir/src/util/format_srgb.c"; then + AC_MSG_ERROR([Python not found - unable to generate sources]) + fi +else + if test "x$acv_mako_found" = xno; then + if test ! -f "$srcdir/src/mesa/main/format_unpack.c"; then + AC_MSG_ERROR([Python mako module v$PYTHON_MAKO_REQUIRED or higher not found]) + fi + fi fi AC_PROG_INSTALL From vlee at kemper.freedesktop.org Thu Apr 9 17:42:07 2015 From: vlee at kemper.freedesktop.org (Vinson Lee) Date: Thu, 9 Apr 2015 10:42:07 -0700 (PDT) Subject: Mesa (master): gallivm: Fix build since llvm-3.7.0svn r234460. Message-ID: <20150409174207.BA9DA76332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 50e9fa2ed69cb5f76f66231976ea789c0091a64d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=50e9fa2ed69cb5f76f66231976ea789c0091a64d Author: Vinson Lee Date: Wed Apr 8 22:51:57 2015 -0700 gallivm: Fix build since llvm-3.7.0svn r234460. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89963 Signed-off-by: Vinson Lee Reviewed-by: Tom Stellard --- src/gallium/auxiliary/gallivm/lp_bld_debug.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp index 65d2896..b712915 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp @@ -97,7 +97,11 @@ private: uint64_t pos; public: +#if HAVE_LLVM >= 0x0307 + raw_debug_ostream() : raw_ostream(SK_FD), pos(0) { } +#else raw_debug_ostream() : pos(0) { } +#endif void write_impl(const char *Ptr, size_t Size); From kwg at kemper.freedesktop.org Fri Apr 10 09:23:13 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Fri, 10 Apr 2015 02:23:13 -0700 (PDT) Subject: Mesa (master): nir: Make nir_lower_samplers take a gl_shader_stage, not a gl_program *. Message-ID: <20150410092313.EA57576332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 99264b7f37dc92bcb3a9ae226e00c9300414431c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=99264b7f37dc92bcb3a9ae226e00c9300414431c Author: Kenneth Graunke Date: Tue Apr 7 15:14:16 2015 -0700 nir: Make nir_lower_samplers take a gl_shader_stage, not a gl_program *. We don't actually need a gl_program struct. We only used it to translate prog->Target (i.e. GL_VERTEX_PROGRAM) to the gl_shader_stage (i.e. MESA_SHADER_VERTEX). We may as well just pass that. Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand Reviewed-by: Ian Romanick --- src/glsl/nir/nir.h | 2 +- src/glsl/nir/nir_lower_samplers.cpp | 26 ++++++++++++-------------- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 2 +- 3 files changed, 14 insertions(+), 16 deletions(-) diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 17a9354..679911c 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1612,7 +1612,7 @@ void nir_lower_phis_to_scalar(nir_shader *shader); void nir_lower_samplers(nir_shader *shader, struct gl_shader_program *shader_program, - struct gl_program *prog); + gl_shader_stage stage); void nir_lower_system_values(nir_shader *shader); void nir_lower_tex_projector(nir_shader *shader); diff --git a/src/glsl/nir/nir_lower_samplers.cpp b/src/glsl/nir/nir_lower_samplers.cpp index 1e509a9..7a7cf85 100644 --- a/src/glsl/nir/nir_lower_samplers.cpp +++ b/src/glsl/nir/nir_lower_samplers.cpp @@ -36,11 +36,9 @@ extern "C" { } static unsigned -get_sampler_index(struct gl_shader_program *shader_program, const char *name, - const struct gl_program *prog) +get_sampler_index(struct gl_shader_program *shader_program, + gl_shader_stage stage, const char *name) { - GLuint shader = _mesa_program_enum_to_shader_stage(prog->Target); - unsigned location; if (!shader_program->UniformHash->get(location, name)) { linker_error(shader_program, @@ -48,7 +46,7 @@ get_sampler_index(struct gl_shader_program *shader_program, const char *name, return 0; } - if (!shader_program->UniformStorage[location].sampler[shader].active) { + if (!shader_program->UniformStorage[location].sampler[stage].active) { assert(0 && "cannot return a sampler"); linker_error(shader_program, "cannot return a sampler named %s, because it is not " @@ -57,12 +55,12 @@ get_sampler_index(struct gl_shader_program *shader_program, const char *name, return 0; } - return shader_program->UniformStorage[location].sampler[shader].index; + return shader_program->UniformStorage[location].sampler[stage].index; } static void lower_sampler(nir_tex_instr *instr, struct gl_shader_program *shader_program, - const struct gl_program *prog, void *mem_ctx) + gl_shader_stage stage, void *mem_ctx) { if (instr->sampler == NULL) return; @@ -133,7 +131,7 @@ lower_sampler(nir_tex_instr *instr, struct gl_shader_program *shader_program, } } - instr->sampler_index += get_sampler_index(shader_program, name, prog); + instr->sampler_index += get_sampler_index(shader_program, stage, name); instr->sampler = NULL; } @@ -141,7 +139,7 @@ lower_sampler(nir_tex_instr *instr, struct gl_shader_program *shader_program, typedef struct { void *mem_ctx; struct gl_shader_program *shader_program; - struct gl_program *prog; + gl_shader_stage stage; } lower_state; static bool @@ -152,7 +150,7 @@ lower_block_cb(nir_block *block, void *_state) nir_foreach_instr(block, instr) { if (instr->type == nir_instr_type_tex) { nir_tex_instr *tex_instr = nir_instr_as_tex(instr); - lower_sampler(tex_instr, state->shader_program, state->prog, + lower_sampler(tex_instr, state->shader_program, state->stage, state->mem_ctx); } } @@ -162,23 +160,23 @@ lower_block_cb(nir_block *block, void *_state) static void lower_impl(nir_function_impl *impl, struct gl_shader_program *shader_program, - struct gl_program *prog) + gl_shader_stage stage) { lower_state state; state.mem_ctx = ralloc_parent(impl); state.shader_program = shader_program; - state.prog = prog; + state.stage = stage; nir_foreach_block(impl, lower_block_cb, &state); } extern "C" void nir_lower_samplers(nir_shader *shader, struct gl_shader_program *shader_program, - struct gl_program *prog) + gl_shader_stage stage) { nir_foreach_overload(shader, overload) { if (overload->impl) - lower_impl(overload->impl, shader_program, prog); + lower_impl(overload->impl, shader_program, stage); } } diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index a874337..7c56290 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -141,7 +141,7 @@ fs_visitor::emit_nir_code() nir_validate_shader(nir); if (shader_prog) { - nir_lower_samplers(nir, shader_prog, shader->base.Program); + nir_lower_samplers(nir, shader_prog, stage); nir_validate_shader(nir); } From kwg at kemper.freedesktop.org Fri Apr 10 09:23:14 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Fri, 10 Apr 2015 02:23:14 -0700 (PDT) Subject: Mesa (master): nir: Constify nir_lower_sampler' s gl_shader_program pointer. Message-ID: <20150410092314.05C8F7633A@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 500da98e0b96d211eab27629696ef50c47caaff9 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=500da98e0b96d211eab27629696ef50c47caaff9 Author: Kenneth Graunke Date: Tue Apr 7 15:50:54 2015 -0700 nir: Constify nir_lower_sampler's gl_shader_program pointer. Now that we're not generating linker errors, we don't actually modify this. Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand Reviewed-by: Ian Romanick --- src/glsl/nir/nir.h | 2 +- src/glsl/nir/nir_lower_samplers.cpp | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 679911c..e844e4d 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1611,7 +1611,7 @@ void nir_lower_alu_to_scalar(nir_shader *shader); void nir_lower_phis_to_scalar(nir_shader *shader); void nir_lower_samplers(nir_shader *shader, - struct gl_shader_program *shader_program, + const struct gl_shader_program *shader_program, gl_shader_stage stage); void nir_lower_system_values(nir_shader *shader); diff --git a/src/glsl/nir/nir_lower_samplers.cpp b/src/glsl/nir/nir_lower_samplers.cpp index 90e023a..cf8ab83 100644 --- a/src/glsl/nir/nir_lower_samplers.cpp +++ b/src/glsl/nir/nir_lower_samplers.cpp @@ -36,7 +36,7 @@ extern "C" { } static unsigned -get_sampler_index(struct gl_shader_program *shader_program, +get_sampler_index(const struct gl_shader_program *shader_program, gl_shader_stage stage, const char *name) { unsigned location; @@ -54,7 +54,7 @@ get_sampler_index(struct gl_shader_program *shader_program, } static void -lower_sampler(nir_tex_instr *instr, struct gl_shader_program *shader_program, +lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_program, gl_shader_stage stage, void *mem_ctx) { if (instr->sampler == NULL) @@ -133,7 +133,7 @@ lower_sampler(nir_tex_instr *instr, struct gl_shader_program *shader_program, typedef struct { void *mem_ctx; - struct gl_shader_program *shader_program; + const struct gl_shader_program *shader_program; gl_shader_stage stage; } lower_state; @@ -154,7 +154,7 @@ lower_block_cb(nir_block *block, void *_state) } static void -lower_impl(nir_function_impl *impl, struct gl_shader_program *shader_program, +lower_impl(nir_function_impl *impl, const struct gl_shader_program *shader_program, gl_shader_stage stage) { lower_state state; @@ -167,7 +167,7 @@ lower_impl(nir_function_impl *impl, struct gl_shader_program *shader_program, } extern "C" void -nir_lower_samplers(nir_shader *shader, struct gl_shader_program *shader_program, +nir_lower_samplers(nir_shader *shader, const struct gl_shader_program *shader_program, gl_shader_stage stage) { nir_foreach_overload(shader, overload) { From kwg at kemper.freedesktop.org Fri Apr 10 09:23:14 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Fri, 10 Apr 2015 02:23:14 -0700 (PDT) Subject: Mesa (master): i965: Change brw_shader to gl_shader in brw_link_shader(). Message-ID: <20150410092314.0F74676332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 10d85ffc5a74bbce4decd7fe2aedd856ce7d39cb URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=10d85ffc5a74bbce4decd7fe2aedd856ce7d39cb Author: Kenneth Graunke Date: Tue Apr 7 16:29:32 2015 -0700 i965: Change brw_shader to gl_shader in brw_link_shader(). Nothing actually wanted brw_shader fields - we just had to type shader->base all over the place for no reason. Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand Reviewed-by: Ian Romanick --- src/mesa/drivers/dri/i965/brw_shader.cpp | 63 +++++++++++++++--------------- 1 file changed, 31 insertions(+), 32 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 54d6d71..9fad02c 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -129,15 +129,14 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) { const struct gl_shader_compiler_options *options = &ctx->Const.ShaderCompilerOptions[stage]; - struct brw_shader *shader = - (struct brw_shader *)shProg->_LinkedShaders[stage]; + struct gl_shader *shader = shProg->_LinkedShaders[stage]; if (!shader) continue; struct gl_program *prog = ctx->Driver.NewProgram(ctx, _mesa_shader_stage_to_program(stage), - shader->base.Name); + shader->Name); if (!prog) return false; prog->Parameters = _mesa_new_parameter_list(); @@ -147,19 +146,19 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) /* Temporary memory context for any new IR. */ void *mem_ctx = ralloc_context(NULL); - ralloc_adopt(mem_ctx, shader->base.ir); + ralloc_adopt(mem_ctx, shader->ir); bool progress; /* lower_packing_builtins() inserts arithmetic instructions, so it * must precede lower_instructions(). */ - brw_lower_packing_builtins(brw, (gl_shader_stage) stage, shader->base.ir); - do_mat_op_to_vec(shader->base.ir); + brw_lower_packing_builtins(brw, (gl_shader_stage) stage, shader->ir); + do_mat_op_to_vec(shader->ir); const int bitfield_insert = brw->gen >= 7 ? BITFIELD_INSERT_TO_BFM_BFI : 0; - lower_instructions(shader->base.ir, + lower_instructions(shader->ir, MOD_TO_FLOOR | DIV_TO_MUL_RCP | SUB_TO_ADD_NEG | @@ -172,21 +171,21 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) * if-statements need to be flattened. */ if (brw->gen < 6) - lower_if_to_cond_assign(shader->base.ir, 16); + lower_if_to_cond_assign(shader->ir, 16); - do_lower_texture_projection(shader->base.ir); - brw_lower_texture_gradients(brw, shader->base.ir); - do_vec_index_to_cond_assign(shader->base.ir); - lower_vector_insert(shader->base.ir, true); + do_lower_texture_projection(shader->ir); + brw_lower_texture_gradients(brw, shader->ir); + do_vec_index_to_cond_assign(shader->ir); + lower_vector_insert(shader->ir, true); if (options->NirOptions == NULL) - brw_do_cubemap_normalize(shader->base.ir); - lower_offset_arrays(shader->base.ir); - brw_do_lower_unnormalized_offset(shader->base.ir); - lower_noise(shader->base.ir); - lower_quadop_vector(shader->base.ir, false); + brw_do_cubemap_normalize(shader->ir); + lower_offset_arrays(shader->ir); + brw_do_lower_unnormalized_offset(shader->ir); + lower_noise(shader->ir); + lower_quadop_vector(shader->ir, false); bool lowered_variable_indexing = - lower_variable_index_to_cond_assign(shader->base.ir, + lower_variable_index_to_cond_assign(shader->ir, options->EmitNoIndirectInput, options->EmitNoIndirectOutput, options->EmitNoIndirectTemp, @@ -197,23 +196,23 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) "back to very inefficient code generation\n"); } - lower_ubo_reference(&shader->base, shader->base.ir); + lower_ubo_reference(shader, shader->ir); do { progress = false; if (is_scalar_shader_stage(brw, stage)) { - brw_do_channel_expressions(shader->base.ir); - brw_do_vector_splitting(shader->base.ir); + brw_do_channel_expressions(shader->ir); + brw_do_vector_splitting(shader->ir); } - progress = do_lower_jumps(shader->base.ir, true, true, + progress = do_lower_jumps(shader->ir, true, true, true, /* main return */ false, /* continue */ false /* loops */ ) || progress; - progress = do_common_optimization(shader->base.ir, true, true, + progress = do_common_optimization(shader->ir, true, true, options, ctx->Const.NativeIntegers) || progress; } while (progress); @@ -225,7 +224,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) * too late. At that point, the values for the built-in uniforms won't * get sent to the shader. */ - foreach_in_list(ir_instruction, node, shader->base.ir) { + foreach_in_list(ir_instruction, node, shader->ir) { ir_variable *var = node->as_variable(); if ((var == NULL) || (var->data.mode != ir_var_uniform) @@ -241,15 +240,15 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) } } - validate_ir_tree(shader->base.ir); + validate_ir_tree(shader->ir); - do_set_program_inouts(shader->base.ir, prog, shader->base.Stage); + do_set_program_inouts(shader->ir, prog, shader->Stage); - prog->SamplersUsed = shader->base.active_samplers; - prog->ShadowSamplers = shader->base.shadow_samplers; + prog->SamplersUsed = shader->active_samplers; + prog->ShadowSamplers = shader->shadow_samplers; _mesa_update_shader_textures_used(shProg, prog); - _mesa_reference_program(ctx, &shader->base.Program, prog); + _mesa_reference_program(ctx, &shader->Program, prog); brw_add_texrect_params(prog); @@ -259,15 +258,15 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) * to the permanent memory context, and free the temporary one (discarding any * junk we optimized away). */ - reparent_ir(shader->base.ir, shader->base.ir); + reparent_ir(shader->ir, shader->ir); ralloc_free(mem_ctx); if (ctx->_Shader->Flags & GLSL_DUMP) { fprintf(stderr, "\n"); fprintf(stderr, "GLSL IR for linked %s program %d:\n", - _mesa_shader_stage_to_string(shader->base.Stage), + _mesa_shader_stage_to_string(shader->Stage), shProg->Name); - _mesa_print_ir(stderr, shader->base.ir, NULL); + _mesa_print_ir(stderr, shader->ir, NULL); fprintf(stderr, "\n"); } } From kwg at kemper.freedesktop.org Fri Apr 10 09:23:14 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Fri, 10 Apr 2015 02:23:14 -0700 (PDT) Subject: Mesa (master): i965: Move brw_link_shader' s GLSL IR transformations into a helper. Message-ID: <20150410092314.175C176332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: ae17f348502c0665fcead8daf8abaef8aa152b03 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ae17f348502c0665fcead8daf8abaef8aa152b03 Author: Kenneth Graunke Date: Tue Apr 7 16:28:10 2015 -0700 i965: Move brw_link_shader's GLSL IR transformations into a helper. This function was getting a bit large and unwieldy. Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand Reviewed-by: Ian Romanick --- src/mesa/drivers/dri/i965/brw_shader.cpp | 192 +++++++++++++++--------------- 1 file changed, 99 insertions(+), 93 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 9fad02c..bf9aceb 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -120,6 +120,104 @@ brw_lower_packing_builtins(struct brw_context *brw, lower_packing_builtins(ir, ops); } +static void +process_glsl_ir(struct brw_context *brw, + struct gl_shader_program *shader_prog, + struct gl_shader *shader) +{ + struct gl_context *ctx = &brw->ctx; + const struct gl_shader_compiler_options *options = + &ctx->Const.ShaderCompilerOptions[shader->Stage]; + + /* Temporary memory context for any new IR. */ + void *mem_ctx = ralloc_context(NULL); + + ralloc_adopt(mem_ctx, shader->ir); + + /* lower_packing_builtins() inserts arithmetic instructions, so it + * must precede lower_instructions(). + */ + brw_lower_packing_builtins(brw, shader->Stage, shader->ir); + do_mat_op_to_vec(shader->ir); + const int bitfield_insert = brw->gen >= 7 ? BITFIELD_INSERT_TO_BFM_BFI : 0; + lower_instructions(shader->ir, + MOD_TO_FLOOR | + DIV_TO_MUL_RCP | + SUB_TO_ADD_NEG | + EXP_TO_EXP2 | + LOG_TO_LOG2 | + bitfield_insert | + LDEXP_TO_ARITH); + + /* Pre-gen6 HW can only nest if-statements 16 deep. Beyond this, + * if-statements need to be flattened. + */ + if (brw->gen < 6) + lower_if_to_cond_assign(shader->ir, 16); + + do_lower_texture_projection(shader->ir); + brw_lower_texture_gradients(brw, shader->ir); + do_vec_index_to_cond_assign(shader->ir); + lower_vector_insert(shader->ir, true); + if (options->NirOptions == NULL) + brw_do_cubemap_normalize(shader->ir); + lower_offset_arrays(shader->ir); + brw_do_lower_unnormalized_offset(shader->ir); + lower_noise(shader->ir); + lower_quadop_vector(shader->ir, false); + + bool lowered_variable_indexing = + lower_variable_index_to_cond_assign(shader->ir, + options->EmitNoIndirectInput, + options->EmitNoIndirectOutput, + options->EmitNoIndirectTemp, + options->EmitNoIndirectUniform); + + if (unlikely(brw->perf_debug && lowered_variable_indexing)) { + perf_debug("Unsupported form of variable indexing in FS; falling " + "back to very inefficient code generation\n"); + } + + lower_ubo_reference(shader, shader->ir); + + bool progress; + do { + progress = false; + + if (is_scalar_shader_stage(brw, shader->Stage)) { + brw_do_channel_expressions(shader->ir); + brw_do_vector_splitting(shader->ir); + } + + progress = do_lower_jumps(shader->ir, true, true, + true, /* main return */ + false, /* continue */ + false /* loops */ + ) || progress; + + progress = do_common_optimization(shader->ir, true, true, + options, ctx->Const.NativeIntegers) || progress; + } while (progress); + + validate_ir_tree(shader->ir); + + /* Now that we've finished altering the linked IR, reparent any live IR back + * to the permanent memory context, and free the temporary one (discarding any + * junk we optimized away). + */ + reparent_ir(shader->ir, shader->ir); + ralloc_free(mem_ctx); + + if (ctx->_Shader->Flags & GLSL_DUMP) { + fprintf(stderr, "\n"); + fprintf(stderr, "GLSL IR for linked %s program %d:\n", + _mesa_shader_stage_to_string(shader->Stage), + shader_prog->Name); + _mesa_print_ir(stderr, shader->ir, NULL); + fprintf(stderr, "\n"); + } +} + GLboolean brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) { @@ -127,8 +225,6 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) unsigned int stage; for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) { - const struct gl_shader_compiler_options *options = - &ctx->Const.ShaderCompilerOptions[stage]; struct gl_shader *shader = shProg->_LinkedShaders[stage]; if (!shader) @@ -143,79 +239,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) _mesa_copy_linked_program_data((gl_shader_stage) stage, shProg, prog); - /* Temporary memory context for any new IR. */ - void *mem_ctx = ralloc_context(NULL); - - ralloc_adopt(mem_ctx, shader->ir); - - bool progress; - - /* lower_packing_builtins() inserts arithmetic instructions, so it - * must precede lower_instructions(). - */ - brw_lower_packing_builtins(brw, (gl_shader_stage) stage, shader->ir); - do_mat_op_to_vec(shader->ir); - const int bitfield_insert = brw->gen >= 7 - ? BITFIELD_INSERT_TO_BFM_BFI - : 0; - lower_instructions(shader->ir, - MOD_TO_FLOOR | - DIV_TO_MUL_RCP | - SUB_TO_ADD_NEG | - EXP_TO_EXP2 | - LOG_TO_LOG2 | - bitfield_insert | - LDEXP_TO_ARITH); - - /* Pre-gen6 HW can only nest if-statements 16 deep. Beyond this, - * if-statements need to be flattened. - */ - if (brw->gen < 6) - lower_if_to_cond_assign(shader->ir, 16); - - do_lower_texture_projection(shader->ir); - brw_lower_texture_gradients(brw, shader->ir); - do_vec_index_to_cond_assign(shader->ir); - lower_vector_insert(shader->ir, true); - if (options->NirOptions == NULL) - brw_do_cubemap_normalize(shader->ir); - lower_offset_arrays(shader->ir); - brw_do_lower_unnormalized_offset(shader->ir); - lower_noise(shader->ir); - lower_quadop_vector(shader->ir, false); - - bool lowered_variable_indexing = - lower_variable_index_to_cond_assign(shader->ir, - options->EmitNoIndirectInput, - options->EmitNoIndirectOutput, - options->EmitNoIndirectTemp, - options->EmitNoIndirectUniform); - - if (unlikely(brw->perf_debug && lowered_variable_indexing)) { - perf_debug("Unsupported form of variable indexing in FS; falling " - "back to very inefficient code generation\n"); - } - - lower_ubo_reference(shader, shader->ir); - - do { - progress = false; - - if (is_scalar_shader_stage(brw, stage)) { - brw_do_channel_expressions(shader->ir); - brw_do_vector_splitting(shader->ir); - } - - progress = do_lower_jumps(shader->ir, true, true, - true, /* main return */ - false, /* continue */ - false /* loops */ - ) || progress; - - progress = do_common_optimization(shader->ir, true, true, - options, ctx->Const.NativeIntegers) - || progress; - } while (progress); + process_glsl_ir(brw, shProg, shader); /* Make a pass over the IR to add state references for any built-in * uniforms that are used. This has to be done now (during linking). @@ -240,8 +264,6 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) } } - validate_ir_tree(shader->ir); - do_set_program_inouts(shader->ir, prog, shader->Stage); prog->SamplersUsed = shader->active_samplers; @@ -253,22 +275,6 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) brw_add_texrect_params(prog); _mesa_reference_program(ctx, &prog, NULL); - - /* Now that we've finished altering the linked IR, reparent any live IR back - * to the permanent memory context, and free the temporary one (discarding any - * junk we optimized away). - */ - reparent_ir(shader->ir, shader->ir); - ralloc_free(mem_ctx); - - if (ctx->_Shader->Flags & GLSL_DUMP) { - fprintf(stderr, "\n"); - fprintf(stderr, "GLSL IR for linked %s program %d:\n", - _mesa_shader_stage_to_string(shader->Stage), - shProg->Name); - _mesa_print_ir(stderr, shader->ir, NULL); - fprintf(stderr, "\n"); - } } if ((ctx->_Shader->Flags & GLSL_DUMP) && shProg->Name != 0) { From kwg at kemper.freedesktop.org Fri Apr 10 09:23:13 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Fri, 10 Apr 2015 02:23:13 -0700 (PDT) Subject: Mesa (master): nir: Constify prog_to_nir's gl_program pointer. Message-ID: <20150410092313.CAAAA76332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: d0f39a2fcd82da9b1e293ddfd2f5ea6e78d28df8 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d0f39a2fcd82da9b1e293ddfd2f5ea6e78d28df8 Author: Kenneth Graunke Date: Tue Apr 7 15:07:33 2015 -0700 nir: Constify prog_to_nir's gl_program pointer. prog_to_nir should not modify the incoming Mesa IR program - just translate it. Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand Reviewed-by: Ian Romanick --- src/mesa/program/prog_to_nir.c | 4 ++-- src/mesa/program/prog_to_nir.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c index b298d07..c738f50 100644 --- a/src/mesa/program/prog_to_nir.c +++ b/src/mesa/program/prog_to_nir.c @@ -43,7 +43,7 @@ */ struct ptn_compile { - struct gl_program *prog; + const struct gl_program *prog; nir_builder build; bool error; @@ -1052,7 +1052,7 @@ setup_registers_and_variables(struct ptn_compile *c) } struct nir_shader * -prog_to_nir(struct gl_program *prog, const nir_shader_compiler_options *options) +prog_to_nir(const struct gl_program *prog, const nir_shader_compiler_options *options) { struct ptn_compile *c; struct nir_shader *s; diff --git a/src/mesa/program/prog_to_nir.h b/src/mesa/program/prog_to_nir.h index 3c9b664..34e4cd1 100644 --- a/src/mesa/program/prog_to_nir.h +++ b/src/mesa/program/prog_to_nir.h @@ -28,7 +28,7 @@ extern "C" { #endif -struct nir_shader *prog_to_nir(struct gl_program *prog, +struct nir_shader *prog_to_nir(const struct gl_program *prog, const nir_shader_compiler_options *options); #ifdef __cplusplus From kwg at kemper.freedesktop.org Fri Apr 10 09:23:13 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Fri, 10 Apr 2015 02:23:13 -0700 (PDT) Subject: Mesa (master): nir: Fix #include guards in shader_enums.h. Message-ID: <20150410092313.D6B0776332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: feafe703998dd88ebea131741326eb229956656a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=feafe703998dd88ebea131741326eb229956656a Author: Kenneth Graunke Date: Tue Apr 7 15:16:51 2015 -0700 nir: Fix #include guards in shader_enums.h. This header was originally going to be called pipeline.h, but it got renamed at the last minute. Make the include guards match. Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand Reviewed-by: Ian Romanick --- src/glsl/shader_enums.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/glsl/shader_enums.h b/src/glsl/shader_enums.h index 0e08bd3..7f59fdc 100644 --- a/src/glsl/shader_enums.h +++ b/src/glsl/shader_enums.h @@ -23,8 +23,8 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef PIPELINE_H -#define PIPELINE_H +#ifndef SHADER_ENUMS_H +#define SHADER_ENUMS_H /** * Bitflags for system values. @@ -167,4 +167,4 @@ enum glsl_interp_qualifier }; -#endif /* PIPELINE_H */ +#endif /* SHADER_ENUMS_H */ From kwg at kemper.freedesktop.org Fri Apr 10 09:23:13 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Fri, 10 Apr 2015 02:23:13 -0700 (PDT) Subject: Mesa (master): nir: Move gl_shader_stage enum from mtypes.h to shader_enums.h. Message-ID: <20150410092313.E057F76332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 4b27391cadcc789a3befbd2b5a846012afa069b8 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4b27391cadcc789a3befbd2b5a846012afa069b8 Author: Kenneth Graunke Date: Tue Apr 7 15:18:43 2015 -0700 nir: Move gl_shader_stage enum from mtypes.h to shader_enums.h. I want to use this in some code that doesn't currently include mtypes.h. It seems like a better place for it anyway. Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand Reviewed-by: Ian Romanick --- src/glsl/nir/nir.h | 1 + src/glsl/shader_enums.h | 17 +++++++++++++++++ src/mesa/main/mtypes.h | 19 ------------------- 3 files changed, 18 insertions(+), 19 deletions(-) diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index f9ca0f7..17a9354 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -34,6 +34,7 @@ #include "util/set.h" #include "util/bitset.h" #include "nir_types.h" +#include "glsl/shader_enums.h" #include #include "nir_opcodes.h" diff --git a/src/glsl/shader_enums.h b/src/glsl/shader_enums.h index 7f59fdc..79e0f6b 100644 --- a/src/glsl/shader_enums.h +++ b/src/glsl/shader_enums.h @@ -27,6 +27,23 @@ #define SHADER_ENUMS_H /** + * Shader stages. Note that these will become 5 with tessellation. + * + * The order must match how shaders are ordered in the pipeline. + * The GLSL linker assumes that if i Module: Mesa Branch: master Commit: 709b88ccd8009d98142616cb53c2ad66ddcd52a9 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=709b88ccd8009d98142616cb53c2ad66ddcd52a9 Author: Kenneth Graunke Date: Tue Apr 7 15:46:56 2015 -0700 nir: Remove linker_error calls from nir_lower_samplers(). These should never happen. Plus, NIR passes really shouldn't be reporting linker errors - this is past link time. Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand Reviewed-by: Ian Romanick --- src/glsl/nir/nir_lower_samplers.cpp | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/glsl/nir/nir_lower_samplers.cpp b/src/glsl/nir/nir_lower_samplers.cpp index 7a7cf85..90e023a 100644 --- a/src/glsl/nir/nir_lower_samplers.cpp +++ b/src/glsl/nir/nir_lower_samplers.cpp @@ -41,17 +41,12 @@ get_sampler_index(struct gl_shader_program *shader_program, { unsigned location; if (!shader_program->UniformHash->get(location, name)) { - linker_error(shader_program, - "failed to find sampler named %s.\n", name); + assert(!"failed to find sampler"); return 0; } if (!shader_program->UniformStorage[location].sampler[stage].active) { - assert(0 && "cannot return a sampler"); - linker_error(shader_program, - "cannot return a sampler named %s, because it is not " - "used in this shader stage. This is a driver bug.\n", - name); + assert(!"cannot return a sampler"); return 0; } From vsyrjala at kemper.freedesktop.org Fri Apr 10 11:53:44 2015 From: vsyrjala at kemper.freedesktop.org (Ville Syrjala) Date: Fri, 10 Apr 2015 04:53:44 -0700 (PDT) Subject: Mesa (master): i965/disasm: Print the type after the swizzle also for 3src src operands Message-ID: <20150410115344.70A4176332@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 50db8bd1b5942a6577ab5ee399cae460fde761d4 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=50db8bd1b5942a6577ab5ee399cae460fde761d4 Author: Ville Syrj?l? Date: Tue Mar 31 15:01:24 2015 +0300 i965/disasm: Print the type after the swizzle also for 3src src operands The disassembly currently has the swizzle after the type for 3src source operands, and the other way around for 2src. Flip the type and swizzle around for 3src so that the output matches 2src. Reviewed-by: Matt Turner Signed-off-by: Ville Syrj?l? --- src/mesa/drivers/dri/i965/brw_disasm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c index c41dde2..2630218 100644 --- a/src/mesa/drivers/dri/i965/brw_disasm.c +++ b/src/mesa/drivers/dri/i965/brw_disasm.c @@ -955,9 +955,9 @@ src0_3src(FILE *file, struct brw_context *brw, brw_inst *inst) string(file, "<0,1,0>"); else string(file, "<4,4,1>"); + err |= src_swizzle(file, brw_inst_3src_src0_swizzle(brw, inst)); err |= control(file, "src da16 reg type", three_source_reg_encoding, brw_inst_3src_src_type(brw, inst), NULL); - err |= src_swizzle(file, brw_inst_3src_src0_swizzle(brw, inst)); return err; } @@ -981,9 +981,9 @@ src1_3src(FILE *file, struct brw_context *brw, brw_inst *inst) string(file, "<0,1,0>"); else string(file, "<4,4,1>"); + err |= src_swizzle(file, brw_inst_3src_src1_swizzle(brw, inst)); err |= control(file, "src da16 reg type", three_source_reg_encoding, brw_inst_3src_src_type(brw, inst), NULL); - err |= src_swizzle(file, brw_inst_3src_src1_swizzle(brw, inst)); return err; } @@ -1008,9 +1008,9 @@ src2_3src(FILE *file, struct brw_context *brw, brw_inst *inst) string(file, "<0,1,0>"); else string(file, "<4,4,1>"); + err |= src_swizzle(file, brw_inst_3src_src2_swizzle(brw, inst)); err |= control(file, "src da16 reg type", three_source_reg_encoding, brw_inst_3src_src_type(brw, inst), NULL); - err |= src_swizzle(file, brw_inst_3src_src2_swizzle(brw, inst)); return err; } From jvesely at kemper.freedesktop.org Fri Apr 10 19:42:03 2015 From: jvesely at kemper.freedesktop.org (Jan Vesely) Date: Fri, 10 Apr 2015 12:42:03 -0700 (PDT) Subject: Mesa (master): gallivm: Fix build since llvm-3.7.0svn r234495 Message-ID: <20150410194204.003EE76338@kemper.freedesktop.org> Module: Mesa Branch: master Commit: f9048ee3c85ddaff0c44851b2523aaa2a554e059 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f9048ee3c85ddaff0c44851b2523aaa2a554e059 Author: Nick Sarnie Date: Fri Apr 10 12:45:57 2015 -0400 gallivm: Fix build since llvm-3.7.0svn r234495 Revert 50e9fa2ed69cb5f76f66231976ea789c0091a64d as LLVM reverted their change. Signed-off-by: Nick Sarnie Reviewed-by: Jan Vesely --- src/gallium/auxiliary/gallivm/lp_bld_debug.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp index b712915..65d2896 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp @@ -97,11 +97,7 @@ private: uint64_t pos; public: -#if HAVE_LLVM >= 0x0307 - raw_debug_ostream() : raw_ostream(SK_FD), pos(0) { } -#else raw_debug_ostream() : pos(0) { } -#endif void write_impl(const char *Ptr, size_t Size); From jekstrand at kemper.freedesktop.org Sat Apr 11 00:21:22 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Fri, 10 Apr 2015 17:21:22 -0700 (PDT) Subject: Mesa (master): i965: Don' t set NirOptions for stages that will use the vec4 backend. Message-ID: <20150411002122.4156676338@kemper.freedesktop.org> Module: Mesa Branch: master Commit: c2a0600d5b0645533ba442b5ab879b23c2564a4d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c2a0600d5b0645533ba442b5ab879b23c2564a4d Author: Kenneth Graunke Date: Thu Apr 9 23:26:49 2015 -0700 i965: Don't set NirOptions for stages that will use the vec4 backend. We've started using NirOptions != NULL to mean "we're using NIR for this stage." However, when INTEL_USE_NIR=1, we set it for a bunch of stages that still use the vec4 backend, and thus definitely aren't using NIR. For example, if INTEL_USE_NIR=1 we disable the GLSL IR cubemap normalization pass, even for vertex shaders and geometry shaders. This is wrong, but breaks a very uncommon case. When I started deleting GLSL IR for stages where we claimed to be using NIR, this bug quickly became apparent. For now, only set it for fragment shaders, and vertex shaders if brw->scalar_vs is set. Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_context.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index f0de711..dfd0031 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -560,12 +560,6 @@ brw_initialize_context_constants(struct brw_context *brw) .lower_ffma = true, }; - bool use_nir_default[MESA_SHADER_STAGES]; - use_nir_default[MESA_SHADER_VERTEX] = false; - use_nir_default[MESA_SHADER_GEOMETRY] = false; - use_nir_default[MESA_SHADER_FRAGMENT] = false; - use_nir_default[MESA_SHADER_COMPUTE] = false; - /* We want the GLSL compiler to emit code that uses condition codes */ for (int i = 0; i < MESA_SHADER_STAGES; i++) { ctx->Const.ShaderCompilerOptions[i].MaxIfDepth = brw->gen < 6 ? 16 : UINT_MAX; @@ -579,9 +573,6 @@ brw_initialize_context_constants(struct brw_context *brw) (i == MESA_SHADER_FRAGMENT); ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectUniform = false; ctx->Const.ShaderCompilerOptions[i].LowerClipDistance = true; - - if (brw_env_var_as_boolean("INTEL_USE_NIR", use_nir_default[i])) - ctx->Const.ShaderCompilerOptions[i].NirOptions = &nir_options; } ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = true; @@ -594,8 +585,14 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoIndirectOutput = true; ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoIndirectTemp = true; ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = false; + + if (brw_env_var_as_boolean("INTEL_USE_NIR", false)) + ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].NirOptions = &nir_options; } + if (brw_env_var_as_boolean("INTEL_USE_NIR", false)) + ctx->Const.ShaderCompilerOptions[MESA_SHADER_FRAGMENT].NirOptions = &nir_options; + /* ARB_viewport_array */ if (brw->gen >= 7 && ctx->API == API_OPENGL_CORE) { ctx->Const.MaxViewports = GEN7_NUM_VIEWPORTS; From jekstrand at kemper.freedesktop.org Sat Apr 11 00:21:22 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Fri, 10 Apr 2015 17:21:22 -0700 (PDT) Subject: Mesa (master): i965: Use NIR by default for fragment shaders Message-ID: <20150411002122.49C2E7633A@kemper.freedesktop.org> Module: Mesa Branch: master Commit: d47405eb707b9921f70454049677a9d504ee3fa6 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d47405eb707b9921f70454049677a9d504ee3fa6 Author: Jason Ekstrand Date: Fri Apr 10 16:24:11 2015 -0700 i965: Use NIR by default for fragment shaders GLSL IR vs. NIR shader-db results on i965: total instructions in shared programs: 2889747 -> 2890782 (0.04%) instructions in affected programs: 2425446 -> 2426481 (0.04%) helped: 3698 HURT: 5341 GLSL IR vs. NIR shader-db results on g4x: total instructions in shared programs: 2547252 -> 2550440 (0.13%) instructions in affected programs: 1984482 -> 1987670 (0.16%) helped: 2844 HURT: 4776 GLSL IR vs. NIR shader-db results on Iron Lake: total instructions in shared programs: 4053381 -> 4063828 (0.26%) instructions in affected programs: 3026601 -> 3037048 (0.35%) helped: 4110 HURT: 8331 GAINED: 1287 LOST: 9 GLSL IR vs. NIR shader-db results on Sandy Bridge: total instructions in shared programs: 5307041 -> 5236666 (-1.33%) instructions in affected programs: 3442908 -> 3372533 (-2.04%) helped: 11829 HURT: 5604 GAINED: 33 LOST: 18 GLSL IR vs. NIR shader-db results on Ivy Bridge: total instructions in shared programs: 4926333 -> 4857017 (-1.41%) instructions in affected programs: 3144042 -> 3074726 (-2.20%) helped: 11559 HURT: 4774 GAINED: 46 LOST: 25 GLSL IR vs. NIR shader-db results on Bay Trail: total instructions in shared programs: 4926333 -> 4857017 (-1.41%) instructions in affected programs: 3144042 -> 3074726 (-2.20%) helped: 11559 HURT: 4774 GAINED: 46 LOST: 25 GLSL IR vs. NIR shader-db results on Haswell: total instructions in shared programs: 4392487 -> 4293476 (-2.25%) instructions in affected programs: 2800180 -> 2701169 (-3.54%) helped: 13073 HURT: 3383 GAINED: 46 LOST: 23 GLSL IR vs. NIR shader-db results on Broadwell (FS only): total instructions in shared programs: 4378113 -> 4283025 (-2.17%) instructions in affected programs: 2743209 -> 2648121 (-3.47%) helped: 12470 HURT: 3609 GAINED: 64 LOST: 27 Signed-off-by: Jason Ekstrand Acked-by: Matt Turner Acked-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index dfd0031..e52c44e 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -590,7 +590,7 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].NirOptions = &nir_options; } - if (brw_env_var_as_boolean("INTEL_USE_NIR", false)) + if (brw_env_var_as_boolean("INTEL_USE_NIR", true)) ctx->Const.ShaderCompilerOptions[MESA_SHADER_FRAGMENT].NirOptions = &nir_options; /* ARB_viewport_array */ From robclark at kemper.freedesktop.org Sat Apr 11 16:59:09 2015 From: robclark at kemper.freedesktop.org (Rob Clark) Date: Sat, 11 Apr 2015 09:59:09 -0700 (PDT) Subject: Mesa (master): gallium/ttn: minor cleanup Message-ID: <20150411165909.0632F761E8@kemper.freedesktop.org> Module: Mesa Branch: master Commit: b91d987140ce54969cbf9e0a10ad6b3ad5d5a75d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b91d987140ce54969cbf9e0a10ad6b3ad5d5a75d Author: Rob Clark Date: Wed Apr 8 13:17:30 2015 -0400 gallium/ttn: minor cleanup Extract tgsi_dst->Index into a local.. split out from 'gallium/ttn: add support for temp arrays' for noise reduction.. Signed-off-by: Rob Clark --- src/gallium/auxiliary/nir/tgsi_to_nir.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index da935a4..fcccdad 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -340,17 +340,18 @@ ttn_get_dest(struct ttn_compile *c, struct tgsi_full_dst_register *tgsi_fdst) { struct tgsi_dst_register *tgsi_dst = &tgsi_fdst->Register; nir_alu_dest dest; + unsigned index = tgsi_dst->Index; memset(&dest, 0, sizeof(dest)); if (tgsi_dst->File == TGSI_FILE_TEMPORARY) { - dest.dest.reg.reg = c->temp_regs[tgsi_dst->Index].reg; - dest.dest.reg.base_offset = c->temp_regs[tgsi_dst->Index].offset; + dest.dest.reg.reg = c->temp_regs[index].reg; + dest.dest.reg.base_offset = c->temp_regs[index].offset; } else if (tgsi_dst->File == TGSI_FILE_OUTPUT) { - dest.dest.reg.reg = c->output_regs[tgsi_dst->Index].reg; - dest.dest.reg.base_offset = c->output_regs[tgsi_dst->Index].offset; + dest.dest.reg.reg = c->output_regs[index].reg; + dest.dest.reg.base_offset = c->output_regs[index].offset; } else if (tgsi_dst->File == TGSI_FILE_ADDRESS) { - assert(tgsi_dst->Index == 0); + assert(index == 0); dest.dest.reg.reg = c->addr_reg; } From robclark at kemper.freedesktop.org Sat Apr 11 16:59:09 2015 From: robclark at kemper.freedesktop.org (Rob Clark) Date: Sat, 11 Apr 2015 09:59:09 -0700 (PDT) Subject: Mesa (master): gallium/ttn: add support for temp arrays Message-ID: <20150411165909.0FAC4761E8@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 96c0f9328d315d6a5a0e83f920ad27791fd91b4f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=96c0f9328d315d6a5a0e83f920ad27791fd91b4f Author: Rob Clark Date: Tue Apr 7 11:38:23 2015 -0400 gallium/ttn: add support for temp arrays Since the rest of NIR really would rather have these as variables rather than registers, create a nir_variable per array. But rather than completely re-arrange ttn to be variable based rather than register based, keep the registers. In the cases where there is a matching var for the reg, ttn_emit_instruction will append the appropriate intrinsic to get things back from the shadow reg into the variable. NOTE: this doesn't quite handle TEMP[ADDR[]] when the DCL doesn't give an array id. But those just kinda suck, and should really go away. AFAICT we don't get those from glsl. Might be an issue for some other state tracker. v2: rework to use load_var/store_var with deref chains v3: create new "burner" reg for temporarily holding the (potentially writemask'd) dest after each instruction; add load_var to initialize temporary dest in case not all components are overwritten v4: review comments: asserts and use ttn_src_for_indirect() in ttn_array_deref() so we can drop later patch converting to use vec1 for addr reg (since ttn_src_for_indirect() handles the imov to vec1 from tgsi addr component that we want) v5: rebase: new requirements about parent mem ctx for derefs Signed-off-by: Rob Clark Reviewed-by: Eric Anholt --- src/gallium/auxiliary/nir/tgsi_to_nir.c | 160 ++++++++++++++++++++++++++++--- 1 file changed, 145 insertions(+), 15 deletions(-) diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index fcccdad..c897303 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -44,6 +44,7 @@ struct ttn_reg_info { /** nir register containing this TGSI index. */ nir_register *reg; + nir_variable *var; /** Offset (in vec4s) from the start of var for this TGSI index. */ int offset; }; @@ -120,21 +121,32 @@ ttn_emit_declaration(struct ttn_compile *c) unsigned i; if (file == TGSI_FILE_TEMPORARY) { - nir_register *reg; - if (c->scan->indirect_files & (1 << file)) { - reg = nir_local_reg_create(b->impl); - reg->num_components = 4; - reg->num_array_elems = array_size; + if (decl->Declaration.Array) { + /* for arrays, we create variables instead of registers: */ + nir_variable *var = rzalloc(b->shader, nir_variable); + + var->type = glsl_array_type(glsl_vec4_type(), array_size); + var->data.mode = nir_var_global; + var->name = ralloc_asprintf(var, "arr_%d", decl->Array.ArrayID); + + exec_list_push_tail(&b->shader->globals, &var->node); for (i = 0; i < array_size; i++) { - c->temp_regs[decl->Range.First + i].reg = reg; + /* point all the matching slots to the same var, + * with appropriate offset set, mostly just so + * we know what to do when tgsi does a non-indirect + * access + */ + c->temp_regs[decl->Range.First + i].reg = NULL; + c->temp_regs[decl->Range.First + i].var = var; c->temp_regs[decl->Range.First + i].offset = i; } } else { for (i = 0; i < array_size; i++) { - reg = nir_local_reg_create(b->impl); + nir_register *reg = nir_local_reg_create(b->impl); reg->num_components = 4; c->temp_regs[decl->Range.First + i].reg = reg; + c->temp_regs[decl->Range.First + i].var = NULL; c->temp_regs[decl->Range.First + i].offset = 0; } } @@ -245,6 +257,32 @@ ttn_emit_immediate(struct ttn_compile *c) static nir_src * ttn_src_for_indirect(struct ttn_compile *c, struct tgsi_ind_register *indirect); +/* generate either a constant or indirect deref chain for accessing an + * array variable. + */ +static nir_deref_var * +ttn_array_deref(struct ttn_compile *c, nir_intrinsic_instr *instr, + nir_variable *var, unsigned offset, + struct tgsi_ind_register *indirect) +{ + nir_deref_var *deref = nir_deref_var_create(instr, var); + nir_deref_array *arr = nir_deref_array_create(deref); + + arr->base_offset = offset; + arr->deref.type = glsl_get_array_element(var->type); + + if (indirect) { + arr->deref_array_type = nir_deref_array_type_indirect; + arr->indirect = *ttn_src_for_indirect(c, indirect); + } else { + arr->deref_array_type = nir_deref_array_type_direct; + } + + deref->deref.child = &arr->deref; + + return deref; +} + static nir_src ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index, struct tgsi_ind_register *indirect) @@ -256,10 +294,25 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index, switch (file) { case TGSI_FILE_TEMPORARY: - src.reg.reg = c->temp_regs[index].reg; - src.reg.base_offset = c->temp_regs[index].offset; - if (indirect) - src.reg.indirect = ttn_src_for_indirect(c, indirect); + if (c->temp_regs[index].var) { + unsigned offset = c->temp_regs[index].offset; + nir_variable *var = c->temp_regs[index].var; + nir_intrinsic_instr *load; + + load = nir_intrinsic_instr_create(b->shader, + nir_intrinsic_load_var); + load->num_components = 4; + load->variables[0] = ttn_array_deref(c, load, var, offset, indirect); + + nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL); + nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr); + + src = nir_src_for_ssa(&load->dest.ssa); + + } else { + assert(!indirect); + src.reg.reg = c->temp_regs[index].reg; + } break; case TGSI_FILE_ADDRESS: @@ -345,8 +398,49 @@ ttn_get_dest(struct ttn_compile *c, struct tgsi_full_dst_register *tgsi_fdst) memset(&dest, 0, sizeof(dest)); if (tgsi_dst->File == TGSI_FILE_TEMPORARY) { - dest.dest.reg.reg = c->temp_regs[index].reg; - dest.dest.reg.base_offset = c->temp_regs[index].offset; + if (c->temp_regs[index].var) { + nir_builder *b = &c->build; + nir_intrinsic_instr *load; + struct tgsi_ind_register *indirect = + tgsi_dst->Indirect ? &tgsi_fdst->Indirect : NULL; + nir_register *reg; + + /* this works, because TGSI will give us a base offset + * (in case of indirect index) that points back into + * the array. Access can be direct or indirect, we + * don't really care. Just create a one-shot dst reg + * that will get store_var'd back into the array var + * at the end of ttn_emit_instruction() + */ + reg = nir_local_reg_create(c->build.impl); + reg->num_components = 4; + dest.dest.reg.reg = reg; + dest.dest.reg.base_offset = 0; + + /* since the alu op might not write to all components + * of the temporary, we must first do a load_var to + * get the previous array elements into the register. + * This is one area that NIR could use a bit of + * improvement (or opt pass to clean up the mess + * once things are scalarized) + */ + + load = nir_intrinsic_instr_create(c->build.shader, + nir_intrinsic_load_var); + load->num_components = 4; + load->variables[0] = + ttn_array_deref(c, load, c->temp_regs[index].var, + c->temp_regs[index].offset, + indirect); + + load->dest = nir_dest_for_reg(reg); + + nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr); + } else { + assert(!tgsi_dst->Indirect); + dest.dest.reg.reg = c->temp_regs[index].reg; + dest.dest.reg.base_offset = c->temp_regs[index].offset; + } } else if (tgsi_dst->File == TGSI_FILE_OUTPUT) { dest.dest.reg.reg = c->output_regs[index].reg; dest.dest.reg.base_offset = c->output_regs[index].offset; @@ -358,12 +452,28 @@ ttn_get_dest(struct ttn_compile *c, struct tgsi_full_dst_register *tgsi_fdst) dest.write_mask = tgsi_dst->WriteMask; dest.saturate = false; - if (tgsi_dst->Indirect) + if (tgsi_dst->Indirect && (tgsi_dst->File != TGSI_FILE_TEMPORARY)) dest.dest.reg.indirect = ttn_src_for_indirect(c, &tgsi_fdst->Indirect); return dest; } +static nir_variable * +ttn_get_var(struct ttn_compile *c, struct tgsi_full_dst_register *tgsi_fdst) +{ + struct tgsi_dst_register *tgsi_dst = &tgsi_fdst->Register; + unsigned index = tgsi_dst->Index; + + if (tgsi_dst->File == TGSI_FILE_TEMPORARY) { + /* we should not have an indirect when there is no var! */ + if (!c->temp_regs[index].var) + assert(!tgsi_dst->Indirect); + return c->temp_regs[index].var; + } + + return NULL; +} + static nir_ssa_def * ttn_get_src(struct ttn_compile *c, struct tgsi_full_src_register *tgsi_fsrc) { @@ -1134,6 +1244,7 @@ ttn_emit_instruction(struct ttn_compile *c) struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction; unsigned i; unsigned tgsi_op = tgsi_inst->Instruction.Opcode; + struct tgsi_full_dst_register *tgsi_dst = &tgsi_inst->Dst[0]; if (tgsi_op == TGSI_OPCODE_END) return; @@ -1142,7 +1253,7 @@ ttn_emit_instruction(struct ttn_compile *c) for (i = 0; i < TGSI_FULL_MAX_SRC_REGISTERS; i++) { src[i] = ttn_get_src(c, &tgsi_inst->Src[i]); } - nir_alu_dest dest = ttn_get_dest(c, &tgsi_inst->Dst[0]); + nir_alu_dest dest = ttn_get_dest(c, tgsi_dst); switch (tgsi_op) { case TGSI_OPCODE_RSQ: @@ -1332,6 +1443,25 @@ ttn_emit_instruction(struct ttn_compile *c) assert(!dest.dest.is_ssa); ttn_move_dest(b, dest, nir_fsat(b, ttn_src_for_dest(b, &dest))); } + + /* if the dst has a matching var, append store_global to move + * output from reg to var + */ + nir_variable *var = ttn_get_var(c, tgsi_dst); + if (var) { + unsigned index = tgsi_dst->Register.Index; + unsigned offset = c->temp_regs[index].offset; + nir_intrinsic_instr *store = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var); + struct tgsi_ind_register *indirect = tgsi_dst->Register.Indirect ? + &tgsi_dst->Indirect : NULL; + + store->num_components = 4; + store->variables[0] = ttn_array_deref(c, store, var, offset, indirect); + store->src[0] = nir_src_for_reg(dest.dest.reg.reg); + + nir_instr_insert_after_cf_list(b->cf_node_list, &store->instr); + } } /** From robclark at kemper.freedesktop.org Sat Apr 11 16:59:09 2015 From: robclark at kemper.freedesktop.org (Rob Clark) Date: Sat, 11 Apr 2015 09:59:09 -0700 (PDT) Subject: Mesa (master): gallium/ttn: split out helper to get texture info Message-ID: <20150411165909.1A1FA761E8@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 0b71451920e65c4dff049359cc6ebb6e9ce53773 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=0b71451920e65c4dff049359cc6ebb6e9ce53773 Author: Rob Clark Date: Thu Apr 9 15:16:17 2015 -0400 gallium/ttn: split out helper to get texture info We'll need this as well for TXQ. Split this out first to reduce noise in the next patch. Signed-off-by: Rob Clark Reviewed-by: Eric Anholt --- src/gallium/auxiliary/nir/tgsi_to_nir.c | 112 ++++++++++++++++--------------- 1 file changed, 59 insertions(+), 53 deletions(-) diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index c897303..7bd5532 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -867,58 +867,9 @@ ttn_endloop(struct ttn_compile *c) } static void -ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src) +setup_texture_info(nir_tex_instr *instr, unsigned texture) { - nir_builder *b = &c->build; - struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction; - nir_tex_instr *instr; - nir_texop op; - unsigned num_srcs; - - switch (tgsi_inst->Instruction.Opcode) { - case TGSI_OPCODE_TEX: - op = nir_texop_tex; - num_srcs = 1; - break; - case TGSI_OPCODE_TXP: - op = nir_texop_tex; - num_srcs = 2; - break; - case TGSI_OPCODE_TXB: - op = nir_texop_txb; - num_srcs = 2; - break; - case TGSI_OPCODE_TXL: - op = nir_texop_txl; - num_srcs = 2; - break; - case TGSI_OPCODE_TXF: - op = nir_texop_txf; - num_srcs = 1; - break; - case TGSI_OPCODE_TXD: - op = nir_texop_txd; - num_srcs = 3; - break; - default: - fprintf(stderr, "unknown TGSI tex op %d\n", tgsi_inst->Instruction.Opcode); - abort(); - } - - if (tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || - tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY || - tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || - tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY || - tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || - tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || - tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { - num_srcs++; - } - - instr = nir_tex_instr_create(b->shader, num_srcs); - instr->op = op; - - switch (tgsi_inst->Texture.Texture) { + switch (texture) { case TGSI_TEXTURE_1D: instr->sampler_dim = GLSL_SAMPLER_DIM_1D; break; @@ -985,10 +936,65 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src) instr->is_shadow = true; break; default: - fprintf(stderr, "Unknown TGSI texture target %d\n", - tgsi_inst->Texture.Texture); + fprintf(stderr, "Unknown TGSI texture target %d\n", texture); abort(); } +} + +static void +ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src) +{ + nir_builder *b = &c->build; + struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction; + nir_tex_instr *instr; + nir_texop op; + unsigned num_srcs; + + switch (tgsi_inst->Instruction.Opcode) { + case TGSI_OPCODE_TEX: + op = nir_texop_tex; + num_srcs = 1; + break; + case TGSI_OPCODE_TXP: + op = nir_texop_tex; + num_srcs = 2; + break; + case TGSI_OPCODE_TXB: + op = nir_texop_txb; + num_srcs = 2; + break; + case TGSI_OPCODE_TXL: + op = nir_texop_txl; + num_srcs = 2; + break; + case TGSI_OPCODE_TXF: + op = nir_texop_txf; + num_srcs = 1; + break; + case TGSI_OPCODE_TXD: + op = nir_texop_txd; + num_srcs = 3; + break; + + default: + fprintf(stderr, "unknown TGSI tex op %d\n", tgsi_inst->Instruction.Opcode); + abort(); + } + + if (tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || + tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY || + tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || + tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY || + tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || + tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || + tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { + num_srcs++; + } + + instr = nir_tex_instr_create(b->shader, num_srcs); + instr->op = op; + + setup_texture_info(instr, tgsi_inst->Texture.Texture); switch (instr->sampler_dim) { case GLSL_SAMPLER_DIM_1D: From robclark at kemper.freedesktop.org Sat Apr 11 16:59:09 2015 From: robclark at kemper.freedesktop.org (Rob Clark) Date: Sat, 11 Apr 2015 09:59:09 -0700 (PDT) Subject: Mesa (master): gallium/ttn: add TXQ support (v2) Message-ID: <20150411165909.27B49761E8@kemper.freedesktop.org> Module: Mesa Branch: master Commit: ca3ae90490d1df40d4a27c8a2d3171af528155b2 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ca3ae90490d1df40d4a27c8a2d3171af528155b2 Author: Rob Clark Date: Thu Apr 9 15:41:31 2015 -0400 gallium/ttn: add TXQ support (v2) Split out from ttn_tex() since it is kind of a weird instruction that maps to two NIR opcodes, and it was cleaner this way. v2: query_levels doesn't take any args Signed-off-by: Rob Clark Reviewed-by: Eric Anholt --- src/gallium/auxiliary/nir/tgsi_to_nir.c | 59 ++++++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 8 deletions(-) diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index 7bd5532..9acf0b9 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -1021,13 +1021,11 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src) unsigned src_number = 0; - if (tgsi_inst->Instruction.Opcode != TGSI_OPCODE_TXQ) { - instr->src[src_number].src = - nir_src_for_ssa(nir_swizzle(b, src[0], SWIZ(X, Y, Z, W), - instr->coord_components, false)); - instr->src[src_number].src_type = nir_tex_src_coord; - src_number++; - } + instr->src[src_number].src = + nir_src_for_ssa(nir_swizzle(b, src[0], SWIZ(X, Y, Z, W), + instr->coord_components, false)); + instr->src[src_number].src_type = nir_tex_src_coord; + src_number++; if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXP) { instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W)); @@ -1066,6 +1064,48 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src) ttn_move_dest(b, dest, &instr->dest.ssa); } +/* TGSI_OPCODE_TXQ is actually two distinct operations: + * + * dst.x = texture\_width(unit, lod) + * dst.y = texture\_height(unit, lod) + * dst.z = texture\_depth(unit, lod) + * dst.w = texture\_levels(unit) + * + * dst.xyz map to NIR txs opcode, and dst.w maps to query_levels + */ +static void +ttn_txq(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src) +{ + nir_builder *b = &c->build; + struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction; + nir_tex_instr *txs, *qlv; + + txs = nir_tex_instr_create(b->shader, 1); + txs->op = nir_texop_txs; + setup_texture_info(txs, tgsi_inst->Texture.Texture); + + qlv = nir_tex_instr_create(b->shader, 0); + qlv->op = nir_texop_query_levels; + setup_texture_info(qlv, tgsi_inst->Texture.Texture); + + assert(tgsi_inst->Src[1].Register.File == TGSI_FILE_SAMPLER); + txs->sampler_index = tgsi_inst->Src[1].Register.Index; + qlv->sampler_index = tgsi_inst->Src[1].Register.Index; + + /* only single src, the lod: */ + txs->src[0].src = nir_src_for_ssa(ttn_channel(b, src[0], X)); + txs->src[0].src_type = nir_tex_src_lod; + + nir_ssa_dest_init(&txs->instr, &txs->dest, 3, NULL); + nir_instr_insert_after_cf_list(b->cf_node_list, &txs->instr); + + nir_ssa_dest_init(&qlv->instr, &qlv->dest, 1, NULL); + nir_instr_insert_after_cf_list(b->cf_node_list, &qlv->instr); + + ttn_move_dest_masked(b, dest, &txs->dest.ssa, TGSI_WRITEMASK_XYZ); + ttn_move_dest_masked(b, dest, &qlv->dest.ssa, TGSI_WRITEMASK_W); +} + static const nir_op op_trans[TGSI_OPCODE_LAST] = { [TGSI_OPCODE_ARL] = 0, [TGSI_OPCODE_MOV] = nir_op_fmov, @@ -1389,7 +1429,6 @@ ttn_emit_instruction(struct ttn_compile *c) case TGSI_OPCODE_TXL: case TGSI_OPCODE_TXB: case TGSI_OPCODE_TXD: - case TGSI_OPCODE_TXQ: case TGSI_OPCODE_TXL2: case TGSI_OPCODE_TXB2: case TGSI_OPCODE_TXQ_LZ: @@ -1398,6 +1437,10 @@ ttn_emit_instruction(struct ttn_compile *c) ttn_tex(c, dest, src); break; + case TGSI_OPCODE_TXQ: + ttn_txq(c, dest, src); + break; + case TGSI_OPCODE_NOP: break; From robclark at kemper.freedesktop.org Sat Apr 11 16:59:09 2015 From: robclark at kemper.freedesktop.org (Rob Clark) Date: Sat, 11 Apr 2015 09:59:09 -0700 (PDT) Subject: Mesa (master): gallium/ttn: fix TXD Message-ID: <20150411165909.3123A761E8@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 2faa878f13b414d3c6d1e4c3c7642f8db52d9550 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=2faa878f13b414d3c6d1e4c3c7642f8db52d9550 Author: Rob Clark Date: Fri Apr 10 14:19:22 2015 -0400 gallium/ttn: fix TXD With TXD we also have the ddx/ddy sources (before the sampler). Signed-off-by: Rob Clark Reviewed-by: Eric Anholt --- src/gallium/auxiliary/nir/tgsi_to_nir.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index 9acf0b9..0e87164 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -948,7 +948,7 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src) struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction; nir_tex_instr *instr; nir_texop op; - unsigned num_srcs; + unsigned num_srcs, samp = 1; switch (tgsi_inst->Instruction.Opcode) { case TGSI_OPCODE_TEX: @@ -974,6 +974,7 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src) case TGSI_OPCODE_TXD: op = nir_texop_txd; num_srcs = 3; + samp = 3; break; default: @@ -1016,8 +1017,8 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src) if (instr->is_array) instr->coord_components++; - assert(tgsi_inst->Src[1].Register.File == TGSI_FILE_SAMPLER); - instr->sampler_index = tgsi_inst->Src[1].Register.Index; + assert(tgsi_inst->Src[samp].Register.File == TGSI_FILE_SAMPLER); + instr->sampler_index = tgsi_inst->Src[samp].Register.Index; unsigned src_number = 0; @@ -1045,6 +1046,19 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src) src_number++; } + if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXD) { + instr->src[src_number].src = + nir_src_for_ssa(nir_swizzle(b, src[1], SWIZ(X, Y, Z, W), + instr->coord_components, false)); + instr->src[src_number].src_type = nir_tex_src_ddx; + src_number++; + instr->src[src_number].src = + nir_src_for_ssa(nir_swizzle(b, src[2], SWIZ(X, Y, Z, W), + instr->coord_components, false)); + instr->src[src_number].src_type = nir_tex_src_ddy; + src_number++; + } + if (instr->is_shadow) { if (instr->coord_components < 3) instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], Z)); From robclark at kemper.freedesktop.org Sat Apr 11 16:59:09 2015 From: robclark at kemper.freedesktop.org (Rob Clark) Date: Sat, 11 Apr 2015 09:59:09 -0700 (PDT) Subject: Mesa (master): nir: split out lower_sub from lower_negate Message-ID: <20150411165909.47E8F761E8@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 58add76791459e023f82eab973719c71779dae9d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=58add76791459e023f82eab973719c71779dae9d Author: Rob Clark Date: Sat Apr 4 08:13:44 2015 -0400 nir: split out lower_sub from lower_negate Originally you had to have one or the other. But actually I don't want either. (Or rather I want whatever is the minimum # of instructions.) TODO: not sure where the best place to insert a check that driver hasn't set *both* lower_negate and lower_sub? Signed-off-by: Rob Clark --- src/glsl/nir/nir.h | 2 ++ src/glsl/nir/nir_opt_algebraic.py | 4 ++-- src/mesa/drivers/dri/i965/brw_context.c | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index e844e4d..6531237 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1382,6 +1382,8 @@ typedef struct nir_shader_compiler_options { bool lower_fsqrt; /** lowers fneg and ineg to fsub and isub. */ bool lower_negate; + /** lowers fsub and isub to fadd+fneg and iadd+ineg. */ + bool lower_sub; /* lower {slt,sge,seq,sne} to {flt,fge,feq,fne} + b2f: */ bool lower_scmp; diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py index a8c1745..319732d 100644 --- a/src/glsl/nir/nir_opt_algebraic.py +++ b/src/glsl/nir/nir_opt_algebraic.py @@ -192,8 +192,8 @@ optimizations = [ # Subtracts (('fsub', a, ('fsub', 0.0, b)), ('fadd', a, b)), (('isub', a, ('isub', 0, b)), ('iadd', a, b)), - (('fsub', a, b), ('fadd', a, ('fneg', b)), '!options->lower_negate'), - (('isub', a, b), ('iadd', a, ('ineg', b)), '!options->lower_negate'), + (('fsub', a, b), ('fadd', a, ('fneg', b)), 'options->lower_sub'), + (('isub', a, b), ('iadd', a, ('ineg', b)), 'options->lower_sub'), (('fneg', a), ('fsub', 0.0, a), 'options->lower_negate'), (('ineg', a), ('isub', 0, a), 'options->lower_negate'), (('fadd', a, ('fsub', 0.0, b)), ('fsub', a, b)), diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index e52c44e..d4a7d3d 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -558,6 +558,7 @@ brw_initialize_context_constants(struct brw_context *brw) * re-combine them as a later step. */ .lower_ffma = true, + .lower_sub = true, }; /* We want the GLSL compiler to emit code that uses condition codes */ From robclark at kemper.freedesktop.org Sat Apr 11 16:59:09 2015 From: robclark at kemper.freedesktop.org (Rob Clark) Date: Sat, 11 Apr 2015 09:59:09 -0700 (PDT) Subject: Mesa (master): nir: fix bit of cargo-culting in lower_idiv Message-ID: <20150411165909.57130761E8@kemper.freedesktop.org> Module: Mesa Branch: master Commit: f59613561694cc4a4b81db8a73f8afe893dbacac URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f59613561694cc4a4b81db8a73f8afe893dbacac Author: Rob Clark Date: Fri Apr 10 16:39:30 2015 -0400 nir: fix bit of cargo-culting in lower_idiv I guess I was looking too much at how lower_system_values worked when writing lower_idiv. Since ttn wasn't emitting load_var for sysvals and the only drivers using lower_idiv were using ttn, I think nothing was broken as a result. But might as well fix this before it becomes a problem. Signed-off-by: Rob Clark Reviewed-by: Jason Ekstrand --- src/glsl/nir/nir_lower_idiv.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/glsl/nir/nir_lower_idiv.c b/src/glsl/nir/nir_lower_idiv.c index c2f08df..7b68032 100644 --- a/src/glsl/nir/nir_lower_idiv.c +++ b/src/glsl/nir/nir_lower_idiv.c @@ -152,6 +152,4 @@ nir_lower_idiv(nir_shader *shader) if (overload->impl) convert_impl(overload->impl); } - - exec_list_make_empty(&shader->system_values); } From robclark at kemper.freedesktop.org Sat Apr 11 16:59:09 2015 From: robclark at kemper.freedesktop.org (Rob Clark) Date: Sat, 11 Apr 2015 09:59:09 -0700 (PDT) Subject: Mesa (master): freedreno/ir3/nir: handle txs and query_levels tex ops Message-ID: <20150411165909.B1024761E8@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 715b2e0dbb88ef80880b8517f8fe822c26ef3be5 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=715b2e0dbb88ef80880b8517f8fe822c26ef3be5 Author: Rob Clark Date: Thu Apr 9 20:32:14 2015 -0400 freedreno/ir3/nir: handle txs and query_levels tex ops These correspond to the tgsi TXQ opcode (plus sneak in a fix for two-sided color) Signed-off-by: Rob Clark --- .../drivers/freedreno/ir3/ir3_compiler_nir.c | 85 +++++++++++++++++++- 1 file changed, 81 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index d044c1a..f6f44e0 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -99,6 +99,10 @@ struct ir3_compile { */ bool flat_bypass; + /* on a3xx, we need to add one to # of array levels: + */ + bool levels_add_one; + /* for looking up which system value is which */ unsigned sysval_semantics[8]; @@ -216,9 +220,11 @@ compile_init(struct ir3_shader_variant *so, } else if (ir3_shader_gpuid(so->shader) >= 400) { /* need special handling for "flat" */ ctx->flat_bypass = true; + ctx->levels_add_one = false; } else { /* no special handling for "flat" */ ctx->flat_bypass = false; + ctx->levels_add_one = true; } switch (so->type) { @@ -1375,6 +1381,63 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex) split_dest(b, dst, sam); } +static void +emit_tex_query_levels(struct ir3_compile *ctx, nir_tex_instr *tex) +{ + struct ir3_block *b = ctx->block; + struct ir3_instruction **dst, *sam; + + dst = get_dst(ctx, &tex->dest, 1); + + sam = ir3_SAM(b, OPC_GETINFO, TYPE_U32, TGSI_WRITEMASK_Z, 0, + tex->sampler_index, tex->sampler_index, NULL, NULL); + + /* even though there is only one component, since it ends + * up in .z rather than .x, we need a split_dest() + */ + split_dest(b, dst, sam); + + /* The # of levels comes from getinfo.z. We need to add 1 to it, since + * the value in TEX_CONST_0 is zero-based. + */ + if (ctx->levels_add_one) + dst[0] = ir3_ADD_U(b, dst[0], 0, create_immed(b, 1), 0); +} + +static void +emit_tex_txs(struct ir3_compile *ctx, nir_tex_instr *tex) +{ + struct ir3_block *b = ctx->block; + struct ir3_instruction **dst, *sam, *lod; + unsigned flags, coords; + + tex_info(tex, &flags, &coords); + + dst = get_dst(ctx, &tex->dest, 4); + + compile_assert(ctx, tex->num_srcs == 1); + compile_assert(ctx, tex->src[0].src_type == nir_tex_src_lod); + + lod = get_src(ctx, &tex->src[0].src)[0]; + + sam = ir3_SAM(b, OPC_GETSIZE, TYPE_U32, TGSI_WRITEMASK_XYZW, flags, + tex->sampler_index, tex->sampler_index, lod, NULL); + + split_dest(b, dst, sam); + + /* Array size actually ends up in .w rather than .z. This doesn't + * matter for miplevel 0, but for higher mips the value in z is + * minified whereas w stays. Also, the value in TEX_CONST_3_DEPTH is + * returned, which means that we have to add 1 to it for arrays. + */ + if (tex->is_array) { + if (ctx->levels_add_one) { + dst[coords] = ir3_ADD_U(b, dst[3], 0, create_immed(b, 1), 0); + } else { + dst[coords] = ir3_MOV(b, dst[3], TYPE_U32); + } + } +} static void emit_instr(struct ir3_compile *ctx, nir_instr *instr) @@ -1392,10 +1455,23 @@ emit_instr(struct ir3_compile *ctx, nir_instr *instr) case nir_instr_type_ssa_undef: emit_undef(ctx, nir_instr_as_ssa_undef(instr)); break; - case nir_instr_type_tex: - emit_tex(ctx, nir_instr_as_tex(instr)); + case nir_instr_type_tex: { + nir_tex_instr *tex = nir_instr_as_tex(instr); + /* couple tex instructions get special-cased: + */ + switch (tex->op) { + case nir_texop_txs: + emit_tex_txs(ctx, tex); + break; + case nir_texop_query_levels: + emit_tex_query_levels(ctx, tex); + break; + default: + emit_tex(ctx, tex); + break; + } break; - + } case nir_instr_type_call: case nir_instr_type_jump: case nir_instr_type_phi: @@ -1490,7 +1566,8 @@ setup_input(struct ir3_compile *ctx, nir_variable *in) /* with NIR, we need to infer TGSI_INTERPOLATE_COLOR * from the semantic name: */ - if (semantic_name == TGSI_SEMANTIC_COLOR) + if ((semantic_name == TGSI_SEMANTIC_COLOR) || + (semantic_name == TGSI_SEMANTIC_BCOLOR)) so->inputs[n].interpolate = TGSI_INTERPOLATE_COLOR; if (ctx->flat_bypass) { From robclark at kemper.freedesktop.org Sat Apr 11 16:59:09 2015 From: robclark at kemper.freedesktop.org (Rob Clark) Date: Sat, 11 Apr 2015 09:59:09 -0700 (PDT) Subject: Mesa (master): freedreno/ir3/nir: handle system values Message-ID: <20150411165909.C0F4F761E8@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 1b936bb9f8da72baaef5c7454e8bebb63bbe067a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1b936bb9f8da72baaef5c7454e8bebb63bbe067a Author: Rob Clark Date: Fri Apr 10 15:57:29 2015 -0400 freedreno/ir3/nir: handle system values Signed-off-by: Rob Clark --- .../drivers/freedreno/ir3/ir3_compiler_nir.c | 53 ++++++++++++++++++-- 1 file changed, 50 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index f6f44e0..34598db 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -255,9 +255,14 @@ compile_init(struct ir3_shader_variant *so, if (lowered_tokens != tokens) free((void *)lowered_tokens); - so->first_immediate = ctx->s->num_uniforms; - /* for now, now driver params: */ - so->first_driver_param = so->first_immediate; + so->first_driver_param = so->first_immediate = ctx->s->num_uniforms; + + /* one (vec4) slot for vertex id base: */ + if (so->type == SHADER_VERTEX) + so->first_immediate++; + + /* reserve 4 (vec4) slots for ubo base addresses: */ + so->first_immediate += 4; return ctx; } @@ -1067,6 +1072,22 @@ emit_intrinisic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr) } } +static void add_sysval_input(struct ir3_compile *ctx, unsigned name, + struct ir3_instruction *instr) +{ + struct ir3_shader_variant *so = ctx->so; + unsigned r = regid(so->inputs_count, 0); + unsigned n = so->inputs_count++; + + so->inputs[n].semantic = ir3_semantic_name(name, 0); + so->inputs[n].compmask = 1; + so->inputs[n].regid = r; + so->inputs[n].interpolate = TGSI_INTERPOLATE_CONSTANT; + so->total_in++; + + ctx->block->inputs[r] = instr; +} + static void emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) { @@ -1128,6 +1149,32 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) b->outputs[n] = src[i]; } break; + case nir_intrinsic_load_base_vertex: + if (!ctx->basevertex) { + /* first four vec4 sysval's reserved for UBOs: */ + unsigned r = regid(ctx->so->first_driver_param + 4, 0); + ctx->basevertex = create_uniform(ctx, r); + add_sysval_input(ctx, TGSI_SEMANTIC_BASEVERTEX, + ctx->basevertex); + } + dst[0] = ctx->basevertex; + break; + case nir_intrinsic_load_vertex_id_zero_base: + if (!ctx->vertex_id) { + ctx->vertex_id = create_input(ctx->block, NULL, 0); + add_sysval_input(ctx, TGSI_SEMANTIC_VERTEXID_NOBASE, + ctx->vertex_id); + } + dst[0] = ctx->vertex_id; + break; + case nir_intrinsic_load_instance_id: + if (!ctx->instance_id) { + ctx->instance_id = create_input(ctx->block, NULL, 0); + add_sysval_input(ctx, TGSI_SEMANTIC_INSTANCEID, + ctx->instance_id); + } + dst[0] = ctx->instance_id; + break; case nir_intrinsic_discard_if: case nir_intrinsic_discard: { struct ir3_instruction *cond, *kill; From robclark at kemper.freedesktop.org Sat Apr 11 16:59:09 2015 From: robclark at kemper.freedesktop.org (Rob Clark) Date: Sat, 11 Apr 2015 09:59:09 -0700 (PDT) Subject: Mesa (master): gallium/ttn: add support for system values Message-ID: <20150411165909.3BDEF761E8@kemper.freedesktop.org> Module: Mesa Branch: master Commit: fd65122a900a5779393faa0ede6737fafcb95a27 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=fd65122a900a5779393faa0ede6737fafcb95a27 Author: Rob Clark Date: Fri Apr 10 15:01:16 2015 -0400 gallium/ttn: add support for system values So far just the system values that freedreno supports, so we may add more later. Signed-off-by: Rob Clark Reviewed-by: Eric Anholt --- src/gallium/auxiliary/nir/tgsi_to_nir.c | 34 +++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index 0e87164..9d988b06 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -153,6 +153,8 @@ ttn_emit_declaration(struct ttn_compile *c) } else if (file == TGSI_FILE_ADDRESS) { c->addr_reg = nir_local_reg_create(b->impl); c->addr_reg->num_components = 4; + } else if (file == TGSI_FILE_SYSTEM_VALUE) { + /* Nothing to record for system values. */ } else if (file == TGSI_FILE_SAMPLER) { /* Nothing to record for samplers. */ } else { @@ -324,6 +326,38 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index, assert(!indirect); break; + case TGSI_FILE_SYSTEM_VALUE: { + nir_intrinsic_instr *load; + nir_intrinsic_op op; + unsigned ncomp = 1; + + switch (c->scan->system_value_semantic_name[index]) { + case TGSI_SEMANTIC_VERTEXID_NOBASE: + op = nir_intrinsic_load_vertex_id_zero_base; + break; + case TGSI_SEMANTIC_VERTEXID: + op = nir_intrinsic_load_vertex_id; + break; + case TGSI_SEMANTIC_BASEVERTEX: + op = nir_intrinsic_load_base_vertex; + break; + case TGSI_SEMANTIC_INSTANCEID: + op = nir_intrinsic_load_instance_id; + break; + default: + unreachable("bad system value"); + } + + load = nir_intrinsic_instr_create(b->shader, op); + load->num_components = ncomp; + + nir_ssa_dest_init(&load->instr, &load->dest, ncomp, NULL); + nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr); + + src = nir_src_for_ssa(&load->dest.ssa); + break; + } + case TGSI_FILE_INPUT: case TGSI_FILE_CONSTANT: { nir_intrinsic_instr *load; From robclark at kemper.freedesktop.org Sat Apr 11 16:59:09 2015 From: robclark at kemper.freedesktop.org (Rob Clark) Date: Sat, 11 Apr 2015 09:59:09 -0700 (PDT) Subject: Mesa (master): freedreno/ir3/cp: support to swap mad src's Message-ID: <20150411165909.611D376028@kemper.freedesktop.org> Module: Mesa Branch: master Commit: f0e9a632a12798bd727799e396cde665bd960665 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f0e9a632a12798bd727799e396cde665bd960665 Author: Rob Clark Date: Mon Apr 6 10:48:11 2015 -0400 freedreno/ir3/cp: support to swap mad src's For a normal MAD (ie. not MADSH), if first source is gpr and second source is const, we can swap the first two sources to avoid needing a mov instruction. This gives back the biggest advantage TGSI f/e had over NIR f/e for common shaders, since TGSI f/e had this logic in the f/e. Note that doing this in copy-prop step has the advantage that it will also work for cases like: MOV TEMP[b], CONST[x] MAD TEMP[d], TEMP[a], TEMP[b], TEMP[c] Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/ir3/instr-a3xx.h | 13 ++++++++-- src/gallium/drivers/freedreno/ir3/ir3.h | 4 +-- src/gallium/drivers/freedreno/ir3/ir3_cp.c | 32 +++++++++++++++++++++--- src/gallium/drivers/freedreno/ir3/ir3_depth.c | 3 ++- 4 files changed, 43 insertions(+), 9 deletions(-) diff --git a/src/gallium/drivers/freedreno/ir3/instr-a3xx.h b/src/gallium/drivers/freedreno/ir3/instr-a3xx.h index 4d75d77..98637c7 100644 --- a/src/gallium/drivers/freedreno/ir3/instr-a3xx.h +++ b/src/gallium/drivers/freedreno/ir3/instr-a3xx.h @@ -676,9 +676,7 @@ static inline bool is_mad(opc_t opc) { switch (opc) { case OPC_MAD_U16: - case OPC_MADSH_U16: case OPC_MAD_S16: - case OPC_MADSH_M16: case OPC_MAD_U24: case OPC_MAD_S24: case OPC_MAD_F16: @@ -689,4 +687,15 @@ static inline bool is_mad(opc_t opc) } } +static inline bool is_madsh(opc_t opc) +{ + switch (opc) { + case OPC_MADSH_U16: + case OPC_MADSH_M16: + return true; + default: + return false; + } +} + #endif /* INSTR_A3XX_H_ */ diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h index f424f73..1a8bead 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.h +++ b/src/gallium/drivers/freedreno/ir3/ir3.h @@ -540,10 +540,10 @@ static inline bool reg_gpr(struct ir3_register *r) return true; } -/* some cat2 instructions (ie. those which are not float can embed an +/* some cat2 instructions (ie. those which are not float) can embed an * immediate: */ -static inline bool ir3_cat2_immed(opc_t opc) +static inline bool ir3_cat2_int(opc_t opc) { switch (opc) { case OPC_ADD_U: diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cp.c b/src/gallium/drivers/freedreno/ir3/ir3_cp.c index 3eb85f6..77bfbc5 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_cp.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_cp.c @@ -115,7 +115,7 @@ static bool valid_flags(struct ir3_instruction *instr, unsigned n, case 2: valid_flags = ir3_cat2_absneg(instr->opc) | IR3_REG_CONST; - if (ir3_cat2_immed(instr->opc)) + if (ir3_cat2_int(instr->opc)) valid_flags |= IR3_REG_IMMED; if (flags & ~valid_flags) @@ -199,6 +199,15 @@ static void combine_flags(unsigned *dstflags, unsigned srcflags) static struct ir3_instruction * instr_cp(struct ir3_instruction *instr, unsigned *flags); +/* the "plain" MAD's (ie. the ones that don't shift first src prior to + * multiply) can swap their first two srcs if src[0] is !CONST and + * src[1] is CONST: + */ +static bool is_valid_mad(struct ir3_instruction *instr) +{ + return (instr->category == 3) && is_mad(instr->opc); +} + /** * Handle cp for a given src register. This additionally handles * the cases of collapsing immedate/const (which replace the src @@ -255,8 +264,23 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n) combine_flags(&new_flags, reg->flags); - if (!valid_flags(instr, n, new_flags)) - return; + if (!valid_flags(instr, n, new_flags)) { + /* special case for "normal" mad instructions, we can + * try swapping the first two args if that fits better. + */ + if ((n == 1) && is_valid_mad(instr) && + !(instr->regs[0 + 1]->flags & IR3_REG_CONST) && + valid_flags(instr, 0, new_flags)) { + /* swap src[0] and src[1]: */ + struct ir3_register *tmp; + tmp = instr->regs[0 + 1]; + instr->regs[0 + 1] = instr->regs[1 + 1]; + instr->regs[1 + 1] = tmp; + n = 0; + } else { + return; + } + } /* Here we handle the special case of mov from * CONST and/or RELATIV. These need to be handled @@ -305,7 +329,7 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n) debug_assert((instr->category == 6) || ((instr->category == 2) && - ir3_cat2_immed(instr->opc))); + ir3_cat2_int(instr->opc))); if (new_flags & IR3_REG_SABS) iim_val = abs(iim_val); diff --git a/src/gallium/drivers/freedreno/ir3/ir3_depth.c b/src/gallium/drivers/freedreno/ir3/ir3_depth.c index 0cda62b..9e1f45d 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_depth.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_depth.c @@ -74,7 +74,8 @@ int ir3_delayslots(struct ir3_instruction *assigner, if (is_flow(consumer) || is_sfu(consumer) || is_tex(consumer)) { return 6; } else if ((consumer->category == 3) && - is_mad(consumer->opc) && (n == 2)) { + (is_mad(consumer->opc) || is_madsh(consumer->opc)) && + (n == 2)) { /* special case, 3rd src to cat3 not required on first cycle */ return 1; } else { From robclark at kemper.freedesktop.org Sat Apr 11 16:59:09 2015 From: robclark at kemper.freedesktop.org (Rob Clark) Date: Sat, 11 Apr 2015 09:59:09 -0700 (PDT) Subject: Mesa (master): freedreno/ir3/nir: set first_driver_param Message-ID: <20150411165909.6BA7C761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 09cbd97a47a81f5d4b014adb5bdb9a490b24db82 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=09cbd97a47a81f5d4b014adb5bdb9a490b24db82 Author: Rob Clark Date: Mon Apr 6 10:54:30 2015 -0400 freedreno/ir3/nir: set first_driver_param Without this, a3xx breaks.. a4xx would too if it had already implemented support for passing driver params. Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index fdf814f..aa5b5f7 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -244,6 +244,8 @@ compile_init(struct ir3_shader_variant *so, free((void *)lowered_tokens); so->first_immediate = ctx->s->num_uniforms; + /* for now, now driver params: */ + so->first_driver_param = so->first_immediate; return ctx; } From robclark at kemper.freedesktop.org Sat Apr 11 16:59:09 2015 From: robclark at kemper.freedesktop.org (Rob Clark) Date: Sat, 11 Apr 2015 09:59:09 -0700 (PDT) Subject: Mesa (master): freedreno/ir3/asm: change assert to warning Message-ID: <20150411165909.78A69761E8@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 972ce757d7f521f9f867594fe5661b8074130821 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=972ce757d7f521f9f867594fe5661b8074130821 Author: Rob Clark Date: Mon Apr 6 11:42:57 2015 -0400 freedreno/ir3/asm: change assert to warning It probably *should* be an assert, but for now TGSI f/e isn't very good about dealing w/ CONST vs ABS/NEG. So for debug builds, print a warning instead of crashing with an assert for now. Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/ir3/ir3.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/freedreno/ir3/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c index 284c655..e015de9 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.c +++ b/src/gallium/drivers/freedreno/ir3/ir3.c @@ -97,7 +97,10 @@ static uint32_t reg(struct ir3_register *reg, struct ir3_info *info, { reg_t val = { .dummy32 = 0 }; - assert(!(reg->flags & ~valid_flags)); + if (reg->flags & ~valid_flags) { + debug_printf("INVALID FLAGS: %x vs %x\n", + reg->flags, valid_flags); + } if (!(reg->flags & IR3_REG_R)) repeat = 0; From robclark at kemper.freedesktop.org Sat Apr 11 16:59:09 2015 From: robclark at kemper.freedesktop.org (Rob Clark) Date: Sat, 11 Apr 2015 09:59:09 -0700 (PDT) Subject: Mesa (master): freedreno/ir3/nir: add variable-indexing support Message-ID: <20150411165909.87166761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 4cf4006674bd7c507688316e2033d77066c45c90 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4cf4006674bd7c507688316e2033d77066c45c90 Author: Rob Clark Date: Tue Apr 7 20:41:48 2015 -0400 freedreno/ir3/nir: add variable-indexing support A bit fugly.. try and make this cleaner.. note if we hoist all the get_addr() out of the loop we can drop the hashtable and just use create_addr().. Signed-off-by: Rob Clark --- .../drivers/freedreno/ir3/ir3_compiler_nir.c | 220 ++++++++++++++++++-- 1 file changed, 204 insertions(+), 16 deletions(-) diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index aa5b5f7..9af5c16 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -79,6 +79,10 @@ struct ir3_compile { /* mapping from nir_register to defining instruction: */ struct hash_table *def_ht; + /* mapping from nir_variable to ir3_array: */ + struct hash_table *var_ht; + unsigned num_arrays; + /* a common pattern for indirect addressing is to request the * same address register multiple times. To avoid generating * duplicate instruction sequences (which our backend does not @@ -232,6 +236,8 @@ compile_init(struct ir3_shader_variant *so, ctx->next_inloc = 8; ctx->def_ht = _mesa_hash_table_create(ctx, _mesa_hash_pointer, _mesa_key_pointer_equal); + ctx->var_ht = _mesa_hash_table_create(ctx, + _mesa_hash_pointer, _mesa_key_pointer_equal); ctx->addr_ht = _mesa_hash_table_create(ctx, _mesa_hash_pointer, _mesa_key_pointer_equal); @@ -259,6 +265,7 @@ compile_error(struct ir3_compile *ctx, const char *format, ...) va_end(ap); nir_print_shader(ctx->s, stdout); ctx->error = true; + debug_assert(0); } #define compile_assert(ctx, cond) do { \ @@ -271,6 +278,30 @@ compile_free(struct ir3_compile *ctx) ralloc_free(ctx); } + +struct ir3_array { + unsigned length, aid; + struct ir3_instruction *arr[]; +}; + +static void +declare_var(struct ir3_compile *ctx, nir_variable *var) +{ + unsigned length = glsl_get_length(var->type) * 4; /* always vec4, at least with ttn */ + struct ir3_array *arr = ralloc_size(ctx, sizeof(*arr) + + (length * sizeof(arr->arr[0]))); + arr->length = length; + arr->aid = ++ctx->num_arrays; + _mesa_hash_table_insert(ctx->var_ht, var, arr); +} + +static struct ir3_array * +get_var(struct ir3_compile *ctx, nir_variable *var) +{ + struct hash_entry *entry = _mesa_hash_table_search(ctx->var_ht, var); + return entry->data; +} + /* allocate a n element value array (to be populated by caller) and * insert in def_ht */ @@ -371,11 +402,11 @@ get_addr(struct ir3_compile *ctx, struct ir3_instruction *src) } static struct ir3_instruction * -create_uniform(struct ir3_block *block, unsigned n) +create_uniform(struct ir3_compile *ctx, unsigned n) { struct ir3_instruction *mov; - mov = ir3_instr_create(block, 1, 0); + mov = ir3_instr_create(ctx->block, 1, 0); /* TODO get types right? */ mov->cat1.src_type = TYPE_F32; mov->cat1.dst_type = TYPE_F32; @@ -386,33 +417,45 @@ create_uniform(struct ir3_block *block, unsigned n) } static struct ir3_instruction * -create_uniform_indirect(struct ir3_block *block, unsigned n, +create_uniform_indirect(struct ir3_compile *ctx, unsigned n, struct ir3_instruction *address) { struct ir3_instruction *mov; - mov = ir3_instr_create(block, 1, 0); + mov = ir3_instr_create(ctx->block, 1, 0); mov->cat1.src_type = TYPE_U32; mov->cat1.dst_type = TYPE_U32; ir3_reg_create(mov, 0, 0); ir3_reg_create(mov, n, IR3_REG_CONST | IR3_REG_RELATIV); mov->address = address; + array_insert(ctx->ir->indirects, mov); + return mov; } static struct ir3_instruction * -create_indirect(struct ir3_block *block, struct ir3_instruction **arr, - unsigned arrsz, unsigned n, struct ir3_instruction *address) +create_collect(struct ir3_block *block, struct ir3_instruction **arr, + unsigned arrsz) { - struct ir3_instruction *mov, *collect; - struct ir3_register *src; + struct ir3_instruction *collect; collect = ir3_instr_create2(block, -1, OPC_META_FI, 1 + arrsz); ir3_reg_create(collect, 0, 0); for (unsigned i = 0; i < arrsz; i++) ir3_reg_create(collect, 0, IR3_REG_SSA)->instr = arr[i]; + return collect; +} + +static struct ir3_instruction * +create_indirect_load(struct ir3_compile *ctx, unsigned arrsz, unsigned n, + struct ir3_instruction *address, struct ir3_instruction *collect) +{ + struct ir3_block *block = ctx->block; + struct ir3_instruction *mov; + struct ir3_register *src; + mov = ir3_instr_create(block, 1, 0); mov->cat1.src_type = TYPE_U32; mov->cat1.dst_type = TYPE_U32; @@ -420,7 +463,34 @@ create_indirect(struct ir3_block *block, struct ir3_instruction **arr, src = ir3_reg_create(mov, 0, IR3_REG_SSA | IR3_REG_RELATIV); src->instr = collect; src->size = arrsz; + src->offset = n; + mov->address = address; + + array_insert(ctx->ir->indirects, mov); + + return mov; +} + +static struct ir3_instruction * +create_indirect_store(struct ir3_compile *ctx, unsigned arrsz, unsigned n, + struct ir3_instruction *src, struct ir3_instruction *address, + struct ir3_instruction *collect) +{ + struct ir3_block *block = ctx->block; + struct ir3_instruction *mov; + struct ir3_register *dst; + + mov = ir3_instr_create(block, 1, 0); + mov->cat1.src_type = TYPE_U32; + mov->cat1.dst_type = TYPE_U32; + dst = ir3_reg_create(mov, 0, IR3_REG_RELATIV); + dst->size = arrsz; + dst->offset = n; + ir3_reg_create(mov, 0, IR3_REG_SSA)->instr = src; mov->address = address; + mov->fanin = collect; + + array_insert(ctx->ir->indirects, mov); return mov; } @@ -604,6 +674,8 @@ emit_alu(struct ir3_compile *ctx, nir_alu_instr *alu) compile_assert(ctx, !asrc->negate); src[i] = get_src(ctx, &asrc->src)[asrc->swizzle[0]]; + if (!src[i]) + src[i] = create_immed(ctx->block, 0); dst[i] = ir3_MOV(b, src[i], TYPE_U32); } @@ -637,6 +709,9 @@ emit_alu(struct ir3_compile *ctx, nir_alu_instr *alu) case nir_op_imov: dst[0] = ir3_MOV(b, src[0], TYPE_S32); break; + case nir_op_fmov: + dst[0] = ir3_MOV(b, src[0], TYPE_F32); + break; case nir_op_f2b: dst[0] = ir3_CMPS_F(b, src[0], 0, create_immed(b, fui(0.0)), 0); dst[0]->cat2.condition = IR3_COND_NE; @@ -851,6 +926,110 @@ emit_alu(struct ir3_compile *ctx, nir_alu_instr *alu) } } +/* handles array reads: */ +static void +emit_intrinisic_load_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr, + struct ir3_instruction **dst) +{ + nir_deref_var *dvar = intr->variables[0]; + nir_deref_array *darr = nir_deref_as_array(dvar->deref.child); + struct ir3_array *arr = get_var(ctx, dvar->var); + + compile_assert(ctx, dvar->deref.child && + (dvar->deref.child->deref_type == nir_deref_type_array)); + + switch (darr->deref_array_type) { + case nir_deref_array_type_direct: + /* direct access does not require anything special: */ + for (int i = 0; i < intr->num_components; i++) { + unsigned n = darr->base_offset * 4 + i; + compile_assert(ctx, n < arr->length); + dst[i] = arr->arr[n]; + } + break; + case nir_deref_array_type_indirect: { + /* for indirect, we need to collect all the array elements: */ + struct ir3_instruction *collect = + create_collect(ctx->block, arr->arr, arr->length); + struct ir3_instruction *addr = + get_addr(ctx, get_src(ctx, &darr->indirect)[0]); + for (int i = 0; i < intr->num_components; i++) { + unsigned n = darr->base_offset * 4 + i; + compile_assert(ctx, n < arr->length); + dst[i] = create_indirect_load(ctx, arr->length, n, addr, collect); + } + break; + } + default: + compile_error(ctx, "Unhandled load deref type: %u\n", + darr->deref_array_type); + break; + } +} + +/* handles array writes: */ +static void +emit_intrinisic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr) +{ + nir_deref_var *dvar = intr->variables[0]; + nir_deref_array *darr = nir_deref_as_array(dvar->deref.child); + struct ir3_array *arr = get_var(ctx, dvar->var); + struct ir3_instruction **src; + + compile_assert(ctx, dvar->deref.child && + (dvar->deref.child->deref_type == nir_deref_type_array)); + + src = get_src(ctx, &intr->src[0]); + + switch (darr->deref_array_type) { + case nir_deref_array_type_direct: + /* direct access does not require anything special: */ + for (int i = 0; i < intr->num_components; i++) { + unsigned n = darr->base_offset * 4 + i; + compile_assert(ctx, n < arr->length); + arr->arr[n] = src[i]; + } + break; + case nir_deref_array_type_indirect: { + /* for indirect, create indirect-store and fan that out: */ + struct ir3_instruction *collect = + create_collect(ctx->block, arr->arr, arr->length); + struct ir3_instruction *addr = + get_addr(ctx, get_src(ctx, &darr->indirect)[0]); + for (int i = 0; i < intr->num_components; i++) { + struct ir3_instruction *store; + unsigned n = darr->base_offset * 4 + i; + compile_assert(ctx, n < arr->length); + + store = create_indirect_store(ctx, arr->length, + n, src[i], addr, collect); + + store->fanin->fi.aid = arr->aid; + + /* TODO: probably split this out to be used for + * store_output_indirect? or move this into + * create_indirect_store()? + */ + for (int j = i; j < arr->length; j += 4) { + struct ir3_instruction *split; + + split = ir3_instr_create(ctx->block, -1, OPC_META_FO); + split->fo.off = j; + ir3_reg_create(split, 0, 0); + ir3_reg_create(split, 0, IR3_REG_SSA)->instr = store; + + arr->arr[j] = split; + } + } + break; + } + default: + compile_error(ctx, "Unhandled store deref type: %u\n", + darr->deref_array_type); + break; + } +} + static void emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) { @@ -868,7 +1047,7 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) compile_assert(ctx, intr->const_index[1] == 1); for (int i = 0; i < intr->num_components; i++) { unsigned n = idx * 4 + i; - dst[i] = create_uniform(b, n); + dst[i] = create_uniform(ctx, n); } break; case nir_intrinsic_load_uniform_indirect: @@ -876,7 +1055,7 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) src = get_src(ctx, &intr->src[0]); for (int i = 0; i < intr->num_components; i++) { unsigned n = idx * 4 + i; - dst[i] = create_uniform_indirect(b, n, + dst[i] = create_uniform_indirect(ctx, n, get_addr(ctx, src[0])); } break; @@ -890,12 +1069,20 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) case nir_intrinsic_load_input_indirect: compile_assert(ctx, intr->const_index[1] == 1); src = get_src(ctx, &intr->src[0]); + struct ir3_instruction *collect = + create_collect(b, b->inputs, b->ninputs); + struct ir3_instruction *addr = get_addr(ctx, src[0]); for (int i = 0; i < intr->num_components; i++) { unsigned n = idx * 4 + i; - dst[i] = create_indirect(b, b->inputs, b->ninputs, n, - get_addr(ctx, src[i])); + dst[i] = create_indirect_load(ctx, b->ninputs, n, addr, collect); } break; + case nir_intrinsic_load_var: + emit_intrinisic_load_var(ctx, intr, dst); + break; + case nir_intrinsic_store_var: + emit_intrinisic_store_var(ctx, intr); + break; case nir_intrinsic_store_output: compile_assert(ctx, intr->const_index[1] == 1); src = get_src(ctx, &intr->src[0]); @@ -1419,15 +1606,16 @@ emit_instructions(struct ir3_compile *ctx) /* Setup inputs: */ foreach_list_typed(nir_variable, var, node, &ctx->s->inputs) { setup_input(ctx, var); - if (ctx->error) - return; } /* Setup outputs: */ foreach_list_typed(nir_variable, var, node, &ctx->s->outputs) { setup_output(ctx, var); - if (ctx->error) - return; + } + + /* Setup variables (which should only be arrays): */ + foreach_list_typed(nir_variable, var, node, &ctx->s->globals) { + declare_var(ctx, var); } /* Find the main function and emit the body: */ From robclark at kemper.freedesktop.org Sat Apr 11 16:59:09 2015 From: robclark at kemper.freedesktop.org (Rob Clark) Date: Sat, 11 Apr 2015 09:59:09 -0700 (PDT) Subject: Mesa (master): freedreno/ir3/nir: simplify emit_tex() Message-ID: <20150411165909.A0B53761E8@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 6e8160d6e3ea7b000de112538dcbb0e29a6c3838 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6e8160d6e3ea7b000de112538dcbb0e29a6c3838 Author: Rob Clark Date: Thu Apr 9 12:55:49 2015 -0400 freedreno/ir3/nir: simplify emit_tex() Just build up arrays for src0/src1, and use create_collect().. Also add back missing .3d flag for 3d/cube textures. Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/ir3/ir3.h | 28 ++++++ .../drivers/freedreno/ir3/ir3_compiler_nir.c | 99 ++++++++------------ 2 files changed, 66 insertions(+), 61 deletions(-) diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h index 1a8bead..85daf10 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.h +++ b/src/gallium/drivers/freedreno/ir3/ir3.h @@ -908,6 +908,34 @@ INSTR1(4, SQRT) INSTR1(5, DSX) INSTR1(5, DSY) +static inline struct ir3_instruction * +ir3_SAM(struct ir3_block *block, opc_t opc, type_t type, + unsigned wrmask, unsigned flags, unsigned samp, unsigned tex, + struct ir3_instruction *src0, struct ir3_instruction *src1) +{ + struct ir3_instruction *sam; + struct ir3_register *reg; + + sam = ir3_instr_create(block, 5, opc); + sam->flags |= flags; + ir3_reg_create(sam, 0, 0)->wrmask = wrmask; + if (src0) { + reg = ir3_reg_create(sam, 0, IR3_REG_SSA); + reg->wrmask = (1 << (src0->regs_count - 1)) - 1; + reg->instr = src0; + } + if (src1) { + reg = ir3_reg_create(sam, 0, IR3_REG_SSA); + reg->instr = src1; + reg->wrmask = (1 << (src1->regs_count - 1)) - 1; + } + sam->cat5.samp = samp; + sam->cat5.tex = tex; + sam->cat5.type = type; + + return sam; +} + /* cat6 instructions: */ INSTR2(6, LDLV) diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 9af5c16..0139c20 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -440,6 +440,9 @@ create_collect(struct ir3_block *block, struct ir3_instruction **arr, { struct ir3_instruction *collect; + if (arrsz == 0) + return NULL; + collect = ir3_instr_create2(block, -1, OPC_META_FI, 1 + arrsz); ir3_reg_create(collect, 0, 0); for (unsigned i = 0; i < arrsz; i++) @@ -1153,11 +1156,12 @@ static void emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex) { struct ir3_block *b = ctx->block; - struct ir3_instruction **dst, *src0, *src1, *sam; + struct ir3_instruction **dst, *sam, *src0[12], *src1[4]; struct ir3_instruction **coord, *lod, *compare, *proj, **off, **ddx, **ddy; - struct ir3_register *reg; bool has_bias = false, has_lod = false, has_proj = false, has_off = false; unsigned i, coords, flags = 0; + unsigned nsrc0 = 0, nsrc1 = 0; + type_t type; opc_t opc; /* TODO: might just be one component for gathers? */ @@ -1211,61 +1215,51 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex) * bias/lod go into the second arg */ - src0 = ir3_instr_create2(b, -1, OPC_META_FI, 12); - ir3_reg_create(src0, 0, 0); - coords = tex->coord_components; if (tex->is_array) /* array idx goes after shadow ref */ coords--; /* insert tex coords: */ for (i = 0; i < coords; i++) - ir3_reg_create(src0, 0, IR3_REG_SSA)->instr = coord[i]; + src0[nsrc0++] = coord[i]; if (coords == 1) { /* hw doesn't do 1d, so we treat it as 2d with * height of 1, and patch up the y coord. * TODO: y coord should be (int)0 in some cases.. */ - ir3_reg_create(src0, 0, IR3_REG_SSA)->instr = - create_immed(b, fui(0.5)); + src0[nsrc0++] = create_immed(b, fui(0.5)); + } else if (coords == 3) { + flags |= IR3_INSTR_3D; } if (tex->is_shadow) { - ir3_reg_create(src0, 0, IR3_REG_SSA)->instr = compare; + src0[nsrc0++] = compare; flags |= IR3_INSTR_S; } if (tex->is_array) { - ir3_reg_create(src0, 0, IR3_REG_SSA)->instr = coord[coords]; + src0[nsrc0++] = coord[coords]; flags |= IR3_INSTR_A; } if (has_proj) { - ir3_reg_create(src0, 0, IR3_REG_SSA)->instr = proj; + src0[nsrc0++] = proj; flags |= IR3_INSTR_P; } /* pad to 4, then ddx/ddy: */ if (tex->op == nir_texop_txd) { - while (src0->regs_count < 5) { - ir3_reg_create(src0, 0, IR3_REG_SSA)->instr = - create_immed(b, fui(0.0)); - } - for (i = 0; i < coords; i++) { - ir3_reg_create(src0, 0, IR3_REG_SSA)->instr = ddx[i]; - } - if (coords < 2) { - ir3_reg_create(src0, 0, IR3_REG_SSA)->instr = - create_immed(b, fui(0.0)); - } - for (i = 0; i < coords; i++) { - ir3_reg_create(src0, 0, IR3_REG_SSA)->instr = ddy[i]; - } - if (coords < 2) { - ir3_reg_create(src0, 0, IR3_REG_SSA)->instr = - create_immed(b, fui(0.0)); - } + while (nsrc0 < 4) + src0[nsrc0++] = create_immed(b, fui(0.0)); + for (i = 0; i < coords; i++) + src0[nsrc0++] = ddx[i]; + if (coords < 2) + src0[nsrc0++] = create_immed(b, fui(0.0)); + for (i = 0; i < coords; i++) + src0[nsrc0++] = ddy[i]; + if (coords < 2) + src0[nsrc0++] = create_immed(b, fui(0.0)); } /* @@ -1275,25 +1269,16 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex) * - bias */ if (has_off | has_lod | has_bias) { - src1 = ir3_instr_create2(b, -1, OPC_META_FI, 5); - ir3_reg_create(src1, 0, 0); - if (has_off) { - for (i = 0; i < coords; i++) { - ir3_reg_create(src0, 0, IR3_REG_SSA)->instr = off[i]; - } - if (coords < 2) { - ir3_reg_create(src0, 0, IR3_REG_SSA)->instr = - create_immed(b, fui(0.0)); - } + for (i = 0; i < coords; i++) + src1[nsrc1++] = off[i]; + if (coords < 2) + src1[nsrc1++] = create_immed(b, fui(0.0)); flags |= IR3_INSTR_O; } - if (has_lod | has_bias) { - ir3_reg_create(src1, 0, IR3_REG_SSA)->instr = lod; - } - } else { - src1 = NULL; + if (has_lod | has_bias) + src1[nsrc1++] = lod; } switch (tex->op) { @@ -1311,33 +1296,25 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex) return; } - sam = ir3_instr_create(b, 5, opc); - sam->flags |= flags; - ir3_reg_create(sam, 0, 0)->wrmask = 0xf; // TODO proper wrmask?? - reg = ir3_reg_create(sam, 0, IR3_REG_SSA); - reg->wrmask = (1 << (src0->regs_count - 1)) - 1; - reg->instr = src0; - if (src1) { - reg = ir3_reg_create(sam, 0, IR3_REG_SSA); - reg->instr = src1; - reg->wrmask = (1 << (src1->regs_count - 1)) - 1; - } - sam->cat5.samp = tex->sampler_index; - sam->cat5.tex = tex->sampler_index; - switch (tex->dest_type) { case nir_type_invalid: case nir_type_float: - sam->cat5.type = TYPE_F32; + type = TYPE_F32; break; case nir_type_int: - sam->cat5.type = TYPE_S32; + type = TYPE_S32; break; case nir_type_unsigned: case nir_type_bool: - sam->cat5.type = TYPE_U32; + type = TYPE_U32; + break; } + sam = ir3_SAM(b, opc, type, 0xf, flags, + tex->sampler_index, tex->sampler_index, + create_collect(b, src0, nsrc0), + create_collect(b, src1, nsrc1)); + // TODO maybe split this out into a helper, for other cases that // write multiple? struct ir3_instruction *prev = NULL; From robclark at kemper.freedesktop.org Sat Apr 11 16:59:09 2015 From: robclark at kemper.freedesktop.org (Rob Clark) Date: Sat, 11 Apr 2015 09:59:09 -0700 (PDT) Subject: Mesa (master): freedreno/ir3/nir: couple little fixes Message-ID: <20150411165909.CB472761E8@kemper.freedesktop.org> Module: Mesa Branch: master Commit: b98c0262d1183d24a37272558c51678cd6a0e9ec URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b98c0262d1183d24a37272558c51678cd6a0e9ec Author: Rob Clark Date: Sat Apr 11 10:04:50 2015 -0400 freedreno/ir3/nir: couple little fixes Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 34598db..f7ea879 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -303,6 +303,11 @@ declare_var(struct ir3_compile *ctx, nir_variable *var) (length * sizeof(arr->arr[0]))); arr->length = length; arr->aid = ++ctx->num_arrays; + /* Some shaders end up reading array elements without first writing.. + * so initialize things to prevent null instr ptrs later: + */ + for (unsigned i = 0; i < length; i++) + arr->arr[i] = create_immed(ctx->block, 0); _mesa_hash_table_insert(ctx->var_ht, var, arr); } @@ -733,6 +738,8 @@ emit_alu(struct ir3_compile *ctx, nir_alu_instr *alu) compile_assert(ctx, !asrc->negate); src[i] = get_src(ctx, &asrc->src)[asrc->swizzle[chan]]; + + compile_assert(ctx, src[i]); } switch (alu->op) { @@ -1613,8 +1620,9 @@ setup_input(struct ir3_compile *ctx, nir_variable *in) /* with NIR, we need to infer TGSI_INTERPOLATE_COLOR * from the semantic name: */ - if ((semantic_name == TGSI_SEMANTIC_COLOR) || - (semantic_name == TGSI_SEMANTIC_BCOLOR)) + if ((in->data.interpolation == INTERP_QUALIFIER_NONE) && + ((semantic_name == TGSI_SEMANTIC_COLOR) || + (semantic_name == TGSI_SEMANTIC_BCOLOR))) so->inputs[n].interpolate = TGSI_INTERPOLATE_COLOR; if (ctx->flat_bypass) { From robclark at kemper.freedesktop.org Sat Apr 11 16:59:09 2015 From: robclark at kemper.freedesktop.org (Rob Clark) Date: Sat, 11 Apr 2015 09:59:09 -0700 (PDT) Subject: Mesa (master): freedreno/ir3/sched: avoid getting stuck on addr conflicts Message-ID: <20150411165909.8F765761E8@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 49be76166b0b3c93bd2287fabc31d76d143d314c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=49be76166b0b3c93bd2287fabc31d76d143d314c Author: Rob Clark Date: Wed Apr 8 11:04:37 2015 -0400 freedreno/ir3/sched: avoid getting stuck on addr conflicts When we get in a scenario where we cannot schedule any more instructions due to address register conflict, clone the instruction that writes the address register, and switch the remaining unscheduled users for the current address register over to the new clone. This is simpler and more robust than the previous attempt (which tried and sometimes failed to ensure all other dependencies of users of the address register were scheduled first).. hint it would try to schedule instructions that were not actually needed for any output value. We probably need to do the same with predicate register, although so far it isn't so heavily used so we aren't running into problems with it (yet). Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/ir3/ir3_sched.c | 74 ++++++++++++++----------- 1 file changed, 42 insertions(+), 32 deletions(-) diff --git a/src/gallium/drivers/freedreno/ir3/ir3_sched.c b/src/gallium/drivers/freedreno/ir3/ir3_sched.c index 653f679..a790cba 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_sched.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_sched.c @@ -267,36 +267,6 @@ static int trysched(struct ir3_sched_ctx *ctx, } } - /* if instruction writes address register, we need to ensure - * that the instructions which use the address register value - * have all their other dependencies scheduled. - * TODO we may possibly need to do the same thing with predicate - * register usage, but for now we get by without since the - * predicate usage patterns are more simple - */ - if (writes_addr(instr)) { - struct ir3 *ir = instr->block->shader; - unsigned i; - - for (i = 0; i < ir->indirects_count; i++) { - struct ir3_instruction *indirect = ir->indirects[i]; - if (indirect->depth == DEPTH_UNUSED) - continue; - if (indirect->address != instr) - continue; - /* NOTE: avoid recursively scheduling the dependency - * on ourself (ie. avoid infinite recursion): - */ - foreach_ssa_src(src, indirect) { - if ((src == instr) || (src->address == instr)) - continue; - delay = trysched(ctx, src); - if (delay) - return delay; - } - } - } - /* if this is a write to address/predicate register, and that * register is currently in use, we need to defer until it is * free: @@ -390,8 +360,48 @@ static int block_sched_undelayed(struct ir3_sched_ctx *ctx, /* detect if we've gotten ourselves into an impossible situation * and bail if needed */ - if (all_delayed && (attempted > 0)) - ctx->error = true; + if (all_delayed && (attempted > 0)) { + if (pred_in_use) { + /* TODO we probably need to keep a list of instructions + * that reference predicate, similar to indirects + */ + ctx->error = true; + return DELAYED; + } + if (addr_in_use) { + struct ir3 *ir = ctx->addr->block->shader; + struct ir3_instruction *new_addr = + ir3_instr_clone(ctx->addr); + unsigned i; + + /* original addr is scheduled, but new one isn't: */ + new_addr->flags &= ~IR3_INSTR_MARK; + + for (i = 0; i < ir->indirects_count; i++) { + struct ir3_instruction *indirect = ir->indirects[i]; + + /* skip instructions already scheduled: */ + if (indirect->flags & IR3_INSTR_MARK) + continue; + + /* remap remaining instructions using current addr + * to new addr: + */ + if (indirect->address == ctx->addr) + indirect->address = new_addr; + } + + /* all remaining indirects remapped to new addr: */ + ctx->addr = NULL; + + /* not really, but this will trigger us to go back to + * main trysched() loop now that we've resolved the + * conflict by duplicating the instr that writes to + * the address register. + */ + return SCHEDULED; + } + } return cnt; } From robclark at kemper.freedesktop.org Sat Apr 11 16:59:09 2015 From: robclark at kemper.freedesktop.org (Rob Clark) Date: Sat, 11 Apr 2015 09:59:09 -0700 (PDT) Subject: Mesa (master): freedreno/ir3/cp: handle indirect properly Message-ID: <20150411165909.97535761E8@kemper.freedesktop.org> Module: Mesa Branch: master Commit: d5357c16cc0ccd84c3475778fcc08a025b8c24f7 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d5357c16cc0ccd84c3475778fcc08a025b8c24f7 Author: Rob Clark Date: Wed Apr 8 14:10:00 2015 -0400 freedreno/ir3/cp: handle indirect properly I noticed some cases where we where trying to copy-propagate indirect src's into places they cannot go, like 2nd src for cat3 (mad, etc). Expand out valid_flags() to be aware of relativ flag, and fix up a few related spots. Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/ir3/ir3_cp.c | 33 +++++++++++++++++----------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cp.c b/src/gallium/drivers/freedreno/ir3/ir3_cp.c index 77bfbc5..313a423 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_cp.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_cp.c @@ -89,7 +89,7 @@ static unsigned cp_flags(unsigned flags) flags &= (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_FNEG | IR3_REG_FABS | IR3_REG_SNEG | IR3_REG_SABS | - IR3_REG_BNOT); + IR3_REG_BNOT | IR3_REG_RELATIV); return flags; } @@ -102,6 +102,10 @@ static bool valid_flags(struct ir3_instruction *instr, unsigned n, /* clear flags that are 'ok' */ switch (instr->category) { case 1: + valid_flags = IR3_REG_IMMED | IR3_REG_RELATIV; + if (flags & ~valid_flags) + return false; + break; case 5: /* no flags allowed */ if (flags) @@ -113,7 +117,8 @@ static bool valid_flags(struct ir3_instruction *instr, unsigned n, return false; break; case 2: - valid_flags = ir3_cat2_absneg(instr->opc) | IR3_REG_CONST; + valid_flags = ir3_cat2_absneg(instr->opc) | + IR3_REG_CONST | IR3_REG_RELATIV; if (ir3_cat2_int(instr->opc)) valid_flags |= IR3_REG_IMMED; @@ -140,19 +145,19 @@ static bool valid_flags(struct ir3_instruction *instr, unsigned n, } break; case 3: - valid_flags = ir3_cat3_absneg(instr->opc) | IR3_REG_CONST; + valid_flags = ir3_cat3_absneg(instr->opc) | + IR3_REG_CONST | IR3_REG_RELATIV; if (flags & ~valid_flags) return false; - if (flags & IR3_REG_CONST) { - /* cannot deal w/ const in 2nd src: */ - /* TODO in some common cases, like mad, we can swap - * first two args.. possibly we should allow that here - * and fixup in legalize? - */ + if (flags & (IR3_REG_CONST | IR3_REG_RELATIV)) { + /* cannot deal w/ const/relativ in 2nd src: */ if (n == 1) return false; + } + + if (flags & IR3_REG_CONST) { /* cannot be const + ABS|NEG: */ if (flags & (IR3_REG_FABS | IR3_REG_FNEG | IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT)) @@ -240,16 +245,17 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n) if (!valid_flags(instr, n, reg->flags)) { /* insert an absneg.f */ - if (reg->flags & (IR3_REG_SNEG | IR3_REG_SABS)) { + if (reg->flags & (IR3_REG_SNEG | IR3_REG_SABS | IR3_REG_BNOT)) { debug_assert(!(reg->flags & (IR3_REG_FNEG | IR3_REG_FABS))); reg->instr = ir3_ABSNEG_S(instr->block, reg->instr, cp_flags(src_flags)); } else { - debug_assert(!(reg->flags & (IR3_REG_SNEG | IR3_REG_SABS))); + debug_assert(!(reg->flags & (IR3_REG_SNEG | IR3_REG_SABS | IR3_REG_BNOT))); reg->instr = ir3_ABSNEG_F(instr->block, reg->instr, cp_flags(src_flags)); } reg->flags &= ~cp_flags(src_flags); + debug_assert(valid_flags(instr, n, reg->flags)); /* send it through instr_cp() again since * the absneg src might be a mov from const * that could be cleaned up: @@ -269,7 +275,7 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n) * try swapping the first two args if that fits better. */ if ((n == 1) && is_valid_mad(instr) && - !(instr->regs[0 + 1]->flags & IR3_REG_CONST) && + !(instr->regs[0 + 1]->flags & (IR3_REG_CONST | IR3_REG_RELATIV)) && valid_flags(instr, 0, new_flags)) { /* swap src[0] and src[1]: */ struct ir3_register *tmp; @@ -327,7 +333,8 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n) if (src_reg->flags & IR3_REG_IMMED) { int32_t iim_val = src_reg->iim_val; - debug_assert((instr->category == 6) || + debug_assert((instr->category == 1) || + (instr->category == 6) || ((instr->category == 2) && ir3_cat2_int(instr->opc))); From robclark at kemper.freedesktop.org Sat Apr 11 16:59:09 2015 From: robclark at kemper.freedesktop.org (Rob Clark) Date: Sat, 11 Apr 2015 09:59:09 -0700 (PDT) Subject: Mesa (master): freedreno/ir3/nir: split out tex helpers Message-ID: <20150411165909.A9348761E8@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 97e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=97e8fc3fdd8a5e7e9e5635cfde81c3cb297e9f1a Author: Rob Clark Date: Thu Apr 9 16:44:38 2015 -0400 freedreno/ir3/nir: split out tex helpers We'll need these in one or two other spots. Signed-off-by: Rob Clark --- .../drivers/freedreno/ir3/ir3_compiler_nir.c | 106 +++++++++++++------- 1 file changed, 72 insertions(+), 34 deletions(-) diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 0139c20..d044c1a 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -606,6 +606,34 @@ create_frag_face(struct ir3_compile *ctx, unsigned comp) } } +/* helper for instructions that produce multiple consecutive scalar + * outputs which need to have a split/fanout meta instruction inserted + */ +static void +split_dest(struct ir3_block *block, struct ir3_instruction **dst, + struct ir3_instruction *src) +{ + struct ir3_instruction *prev = NULL; + for (int i = 0, j = 0; i < 4; i++) { + struct ir3_instruction *split = + ir3_instr_create(block, -1, OPC_META_FO); + ir3_reg_create(split, 0, IR3_REG_SSA); + ir3_reg_create(split, 0, IR3_REG_SSA)->instr = src; + split->fo.off = i; + + if (prev) { + split->cp.left = prev; + split->cp.left_cnt++; + prev->cp.right = split; + prev->cp.right_cnt++; + } + prev = split; + + if (src->regs[0]->wrmask & (1 << i)) + dst[j++] = split; + } +} + /* * Adreno uses uint rather than having dedicated bool type, * which (potentially) requires some conversion, in particular @@ -1153,13 +1181,50 @@ emit_undef(struct ir3_compile *ctx, nir_ssa_undef_instr *undef) */ static void +tex_info(nir_tex_instr *tex, unsigned *flagsp, unsigned *coordsp) +{ + unsigned coords, flags = 0; + + /* note: would use tex->coord_components.. except txs.. also, + * since array index goes after shadow ref, we don't want to + * count it: + */ + switch (tex->sampler_dim) { + case GLSL_SAMPLER_DIM_1D: + case GLSL_SAMPLER_DIM_BUF: + coords = 1; + break; + case GLSL_SAMPLER_DIM_2D: + case GLSL_SAMPLER_DIM_RECT: + case GLSL_SAMPLER_DIM_EXTERNAL: + case GLSL_SAMPLER_DIM_MS: + coords = 2; + break; + case GLSL_SAMPLER_DIM_3D: + case GLSL_SAMPLER_DIM_CUBE: + coords = 3; + flags |= IR3_INSTR_3D; + break; + } + + if (tex->is_shadow) + flags |= IR3_INSTR_S; + + if (tex->is_array) + flags |= IR3_INSTR_A; + + *flagsp = flags; + *coordsp = coords; +} + +static void emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex) { struct ir3_block *b = ctx->block; struct ir3_instruction **dst, *sam, *src0[12], *src1[4]; struct ir3_instruction **coord, *lod, *compare, *proj, **off, **ddx, **ddy; bool has_bias = false, has_lod = false, has_proj = false, has_off = false; - unsigned i, coords, flags = 0; + unsigned i, coords, flags; unsigned nsrc0 = 0, nsrc1 = 0; type_t type; opc_t opc; @@ -1215,9 +1280,7 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex) * bias/lod go into the second arg */ - coords = tex->coord_components; - if (tex->is_array) /* array idx goes after shadow ref */ - coords--; + tex_info(tex, &flags, &coords); /* insert tex coords: */ for (i = 0; i < coords; i++) @@ -1229,19 +1292,13 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex) * TODO: y coord should be (int)0 in some cases.. */ src0[nsrc0++] = create_immed(b, fui(0.5)); - } else if (coords == 3) { - flags |= IR3_INSTR_3D; } - if (tex->is_shadow) { + if (tex->is_shadow) src0[nsrc0++] = compare; - flags |= IR3_INSTR_S; - } - if (tex->is_array) { + if (tex->is_array) src0[nsrc0++] = coord[coords]; - flags |= IR3_INSTR_A; - } if (has_proj) { src0[nsrc0++] = proj; @@ -1310,31 +1367,12 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex) break; } - sam = ir3_SAM(b, opc, type, 0xf, flags, - tex->sampler_index, tex->sampler_index, + sam = ir3_SAM(b, opc, type, TGSI_WRITEMASK_XYZW, + flags, tex->sampler_index, tex->sampler_index, create_collect(b, src0, nsrc0), create_collect(b, src1, nsrc1)); - // TODO maybe split this out into a helper, for other cases that - // write multiple? - struct ir3_instruction *prev = NULL; - for (int i = 0; i < 4; i++) { - struct ir3_instruction *split = - ir3_instr_create(b, -1, OPC_META_FO); - ir3_reg_create(split, 0, IR3_REG_SSA); - ir3_reg_create(split, 0, IR3_REG_SSA)->instr = sam; - split->fo.off = i; - - if (prev) { - split->cp.left = prev; - split->cp.left_cnt++; - prev->cp.right = split; - prev->cp.right_cnt++; - } - prev = split; - - dst[i] = split; - } + split_dest(b, dst, sam); } From mattst88 at kemper.freedesktop.org Sat Apr 11 16:59:57 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Sat, 11 Apr 2015 09:59:57 -0700 (PDT) Subject: Mesa (master): i965: Remove useless reg_offset >= 0 tests. Message-ID: <20150411165957.0F764761E8@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 1ac230975e2d3d9429e7a54f05d4fd803419fcd3 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1ac230975e2d3d9429e7a54f05d4fd803419fcd3 Author: Matt Turner Date: Sat Apr 11 09:47:39 2015 -0700 i965: Remove useless reg_offset >= 0 tests. Commit eb9bd3a1 changed the type of this field to uint16_t. --- src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 1 - src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp | 1 - 2 files changed, 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 72c490b..2dfafdf 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -34,7 +34,6 @@ static void assign_reg(unsigned *reg_hw_locations, fs_reg *reg) { if (reg->file == GRF) { - assert(reg->reg_offset >= 0); reg->reg = reg_hw_locations[reg->reg] + reg->reg_offset; reg->reg_offset = 0; } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp index 3186824..3f2bb05 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp @@ -35,7 +35,6 @@ static void assign(unsigned int *reg_hw_locations, backend_reg *reg) { if (reg->file == GRF) { - assert(reg->reg_offset >= 0); reg->reg = reg_hw_locations[reg->reg] + reg->reg_offset; reg->reg_offset = 0; } From mattst88 at kemper.freedesktop.org Sat Apr 11 16:59:57 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Sat, 11 Apr 2015 09:59:57 -0700 (PDT) Subject: Mesa (master): i965: Remove useless null check. Message-ID: <20150411165957.25CDD761E8@kemper.freedesktop.org> Module: Mesa Branch: master Commit: ea0c35faf88962e049c0a67ce714e03933383be1 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ea0c35faf88962e049c0a67ce714e03933383be1 Author: Matt Turner Date: Sat Apr 11 09:54:38 2015 -0700 i965: Remove useless null check. If it were null, we'd have just derefernced it two lines above. --- src/mesa/drivers/dri/i965/brw_context.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index d4a7d3d..a4884ed 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -925,10 +925,6 @@ intelDestroyContext(__DRIcontext * driContextPriv) (struct brw_context *) driContextPriv->driverPrivate; struct gl_context *ctx = &brw->ctx; - assert(brw); /* should never be null */ - if (!brw) - return; - /* Dump a final BMP in case the application doesn't call SwapBuffers */ if (INTEL_DEBUG & DEBUG_AUB) { intel_batchbuffer_flush(brw); From mattst88 at kemper.freedesktop.org Sat Apr 11 16:59:57 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Sat, 11 Apr 2015 09:59:57 -0700 (PDT) Subject: Mesa (master): i965/fs/nir: Mark fallthrough. Message-ID: <20150411165957.1B21E761E8@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 024ecc783b763712d2896fd315d8b5222c27b1ec URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=024ecc783b763712d2896fd315d8b5222c27b1ec Author: Matt Turner Date: Sat Apr 11 09:49:36 2015 -0700 i965/fs/nir: Mark fallthrough. --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 7c56290..c179d71 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -1515,6 +1515,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) case nir_intrinsic_load_uniform_indirect: has_indirect = true; + /* fallthrough */ case nir_intrinsic_load_uniform: { unsigned index = instr->const_index[0]; @@ -1742,6 +1743,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) case nir_intrinsic_store_output_indirect: has_indirect = true; + /* fallthrough */ case nir_intrinsic_store_output: { fs_reg src = get_nir_src(instr->src[0]); unsigned index = 0; From mattst88 at kemper.freedesktop.org Sat Apr 11 17:23:11 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Sat, 11 Apr 2015 10:23:11 -0700 (PDT) Subject: Mesa (master): glsl: Mark path as unreachable. Message-ID: <20150411172311.4959C761E8@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 8e414cbdec76ce33a16425631ac87cb4ba827409 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8e414cbdec76ce33a16425631ac87cb4ba827409 Author: Matt Turner Date: Sat Apr 11 10:11:13 2015 -0700 glsl: Mark path as unreachable. --- src/glsl/ast_function.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp index 918be69..36a0d19 100644 --- a/src/glsl/ast_function.cpp +++ b/src/glsl/ast_function.cpp @@ -1791,7 +1791,7 @@ ast_function_expression::hir(exec_list *instructions, return value; } - return ir_rvalue::error_value(ctx); + unreachable("not reached"); } ir_rvalue * From kwg at kemper.freedesktop.org Sat Apr 11 19:39:54 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Sat, 11 Apr 2015 12:39:54 -0700 (PDT) Subject: Mesa (master): i965: Move lower_output_reads to brw_link_shader(). Message-ID: <20150411193954.D84B4761E8@kemper.freedesktop.org> Module: Mesa Branch: master Commit: f41f07f685e7f585e433b5fd1fadf602e74f0f1e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f41f07f685e7f585e433b5fd1fadf602e74f0f1e Author: Kenneth Graunke Date: Tue Apr 7 15:05:12 2015 -0700 i965: Move lower_output_reads to brw_link_shader(). This makes it so emit_nir_code() doesn't modify the GLSL IR. Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand Reviewed-by: Ian Romanick --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 1 - src/mesa/drivers/dri/i965/brw_shader.cpp | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index c179d71..6bb52eb 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -90,7 +90,6 @@ fs_visitor::emit_nir_code() nir_shader *nir; /* First, lower the GLSL IR or Mesa IR to NIR */ if (shader_prog) { - lower_output_reads(shader->base.ir); nir = glsl_to_nir(&shader->base, options); } else { nir = prog_to_nir(prog, options); diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index bf9aceb..8700077 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -199,6 +199,9 @@ process_glsl_ir(struct brw_context *brw, options, ctx->Const.NativeIntegers) || progress; } while (progress); + if (options->NirOptions != NULL) + lower_output_reads(shader->ir); + validate_ir_tree(shader->ir); /* Now that we've finished altering the linked IR, reparent any live IR back From kwg at kemper.freedesktop.org Sat Apr 11 19:39:54 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Sat, 11 Apr 2015 12:39:54 -0700 (PDT) Subject: Mesa (master): nir: Store num_direct_uniforms in the nir_shader. Message-ID: <20150411193954.E2D64761E8@kemper.freedesktop.org> Module: Mesa Branch: master Commit: b3e286c4575bf6af343c1a03471fd876cdfb5c43 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b3e286c4575bf6af343c1a03471fd876cdfb5c43 Author: Kenneth Graunke Date: Tue Apr 7 17:13:45 2015 -0700 nir: Store num_direct_uniforms in the nir_shader. Storing this here is pretty sketchy - I don't know if any driver other than i965 will want to use it. But this will make it a lot easier to generate NIR code at link time. We'll probably rework it anyway. (Ian suggested making nir_assign_var_locations_scalar_direct_first simply modify the nir_shader's fields, rather than passing pointers to them. If this stays long term, we should do that. But Jason and I suspect we'll be reworking this area again in the near future.) Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand --- src/glsl/nir/nir.h | 3 +++ src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 5 +++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 6531237..2d1d870 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1431,6 +1431,9 @@ typedef struct nir_shader { * access plus one */ unsigned num_inputs, num_uniforms, num_outputs; + + /** the number of uniforms that are only accessed directly */ + unsigned num_direct_uniforms; } nir_shader; #define nir_foreach_overload(shader, overload) \ diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 6bb52eb..4ee92a8 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -120,7 +120,7 @@ fs_visitor::emit_nir_code() if (shader_prog) { nir_assign_var_locations_scalar_direct_first(nir, &nir->uniforms, - &num_direct_uniforms, + &nir->num_direct_uniforms, &nir->num_uniforms); } else { /* ARB programs generally create a giant array of "uniform" data, and allow @@ -128,7 +128,7 @@ fs_visitor::emit_nir_code() * analysis, it's all or nothing. num_direct_uniforms is only useful when * we have some direct and some indirect access; it doesn't matter here. */ - num_direct_uniforms = 0; + nir->num_direct_uniforms = 0; } nir_assign_var_locations_scalar(&nir->inputs, &nir->num_inputs); nir_assign_var_locations_scalar(&nir->outputs, &nir->num_outputs); @@ -343,6 +343,7 @@ void fs_visitor::nir_setup_uniforms(nir_shader *shader) { uniforms = shader->num_uniforms; + num_direct_uniforms = shader->num_direct_uniforms; /* We split the uniform register file in half. The first half is * entirely direct uniforms. The second half is indirect. From kwg at kemper.freedesktop.org Sat Apr 11 19:39:54 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Sat, 11 Apr 2015 12:39:54 -0700 (PDT) Subject: Mesa (master): i965: Create NIR during LinkShader() and ProgramStringNotify(). Message-ID: <20150411193955.005BE761E8@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 89c1feb78d010bc457f5d02be84c955eebf3549f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=89c1feb78d010bc457f5d02be84c955eebf3549f Author: Kenneth Graunke Date: Tue Apr 7 15:15:09 2015 -0700 i965: Create NIR during LinkShader() and ProgramStringNotify(). Previously, we translated into NIR and did all the optimizations and lowering as part of running fs_visitor. This meant that we did all of that work twice for fragment shaders - once for SIMD8, and again for SIMD16. We also had to redo it every time we hit a state based recompile. We now generate NIR once at link time. ARB programs don't have linking, so we instead generate it at ProgramStringNotify time. Mesa's fixed function vertex program handling doesn't bother to inform the driver about new programs at all (which is rather mean), so we generate NIR at the last minute, if it hasn't happened already. shader-db runs ~9.4% faster on my i7-5600U, with a release build. v2: Check NirOptions != NULL in ProgramStringNotify(). Don't bother using _mesa_program_enum_to_shader_stage as we already know it. Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/Makefile.sources | 1 + src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 174 +---------------------- src/mesa/drivers/dri/i965/brw_nir.c | 213 ++++++++++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_nir.h | 6 + src/mesa/drivers/dri/i965/brw_program.c | 9 ++ src/mesa/drivers/dri/i965/brw_shader.cpp | 6 + src/mesa/drivers/dri/i965/brw_vec4.cpp | 17 ++- src/mesa/main/mtypes.h | 2 + src/mesa/program/program.c | 5 + 9 files changed, 257 insertions(+), 176 deletions(-) diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 498d5a7..6d4659f 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -77,6 +77,7 @@ i965_FILES = \ brw_misc_state.c \ brw_multisample_state.h \ brw_nir.h \ + brw_nir.c \ brw_nir_analyze_boolean_resolves.c \ brw_object_purgeable.c \ brw_packed_float.c \ diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 4ee92a8..d6508fc 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -28,175 +28,10 @@ #include "brw_fs.h" #include "brw_nir.h" -static void -nir_optimize(nir_shader *nir) -{ - bool progress; - do { - progress = false; - nir_lower_vars_to_ssa(nir); - nir_validate_shader(nir); - nir_lower_alu_to_scalar(nir); - nir_validate_shader(nir); - progress |= nir_copy_prop(nir); - nir_validate_shader(nir); - nir_lower_phis_to_scalar(nir); - nir_validate_shader(nir); - progress |= nir_copy_prop(nir); - nir_validate_shader(nir); - progress |= nir_opt_dce(nir); - nir_validate_shader(nir); - progress |= nir_opt_cse(nir); - nir_validate_shader(nir); - progress |= nir_opt_peephole_select(nir); - nir_validate_shader(nir); - progress |= nir_opt_algebraic(nir); - nir_validate_shader(nir); - progress |= nir_opt_constant_folding(nir); - nir_validate_shader(nir); - progress |= nir_opt_remove_phis(nir); - nir_validate_shader(nir); - } while (progress); -} - -static bool -count_nir_instrs_in_block(nir_block *block, void *state) -{ - int *count = (int *) state; - nir_foreach_instr(block, instr) { - *count = *count + 1; - } - return true; -} - -static int -count_nir_instrs(nir_shader *nir) -{ - int count = 0; - nir_foreach_overload(nir, overload) { - if (!overload->impl) - continue; - nir_foreach_block(overload->impl, count_nir_instrs_in_block, &count); - } - return count; -} - void fs_visitor::emit_nir_code() { - const nir_shader_compiler_options *options = - ctx->Const.ShaderCompilerOptions[stage].NirOptions; - - nir_shader *nir; - /* First, lower the GLSL IR or Mesa IR to NIR */ - if (shader_prog) { - nir = glsl_to_nir(&shader->base, options); - } else { - nir = prog_to_nir(prog, options); - nir_convert_to_ssa(nir); /* turn registers into SSA */ - } - nir_validate_shader(nir); - - nir_lower_global_vars_to_local(nir); - nir_validate_shader(nir); - - nir_lower_tex_projector(nir); - nir_validate_shader(nir); - - nir_normalize_cubemap_coords(nir); - nir_validate_shader(nir); - - nir_split_var_copies(nir); - nir_validate_shader(nir); - - nir_optimize(nir); - - /* Lower a bunch of stuff */ - nir_lower_var_copies(nir); - nir_validate_shader(nir); - - /* Get rid of split copies */ - nir_optimize(nir); - - if (shader_prog) { - nir_assign_var_locations_scalar_direct_first(nir, &nir->uniforms, - &nir->num_direct_uniforms, - &nir->num_uniforms); - } else { - /* ARB programs generally create a giant array of "uniform" data, and allow - * indirect addressing without any boundaries. In the absence of bounds - * analysis, it's all or nothing. num_direct_uniforms is only useful when - * we have some direct and some indirect access; it doesn't matter here. - */ - nir->num_direct_uniforms = 0; - } - nir_assign_var_locations_scalar(&nir->inputs, &nir->num_inputs); - nir_assign_var_locations_scalar(&nir->outputs, &nir->num_outputs); - - nir_lower_io(nir); - nir_validate_shader(nir); - - nir_remove_dead_variables(nir); - nir_validate_shader(nir); - - if (shader_prog) { - nir_lower_samplers(nir, shader_prog, stage); - nir_validate_shader(nir); - } - - nir_lower_system_values(nir); - nir_validate_shader(nir); - - nir_lower_atomics(nir); - nir_validate_shader(nir); - - nir_optimize(nir); - - if (brw->gen >= 6) { - /* Try and fuse multiply-adds */ - nir_opt_peephole_ffma(nir); - nir_validate_shader(nir); - } - - nir_opt_algebraic_late(nir); - nir_validate_shader(nir); - - nir_lower_locals_to_regs(nir); - nir_validate_shader(nir); - - nir_lower_to_source_mods(nir); - nir_validate_shader(nir); - nir_copy_prop(nir); - nir_validate_shader(nir); - nir_opt_dce(nir); - nir_validate_shader(nir); - - if (unlikely(debug_enabled)) { - fprintf(stderr, "NIR (SSA form) for %s shader:\n", stage_name); - nir_print_shader(nir, stderr); - } - - if (dispatch_width == 8) { - static GLuint msg_id = 0; - _mesa_gl_debug(&brw->ctx, &msg_id, - MESA_DEBUG_SOURCE_SHADER_COMPILER, - MESA_DEBUG_TYPE_OTHER, - MESA_DEBUG_SEVERITY_NOTIFICATION, - "%s NIR shader: %d inst\n", - stage_abbrev, - count_nir_instrs(nir)); - } - - nir_convert_from_ssa(nir); - nir_validate_shader(nir); - - /* This is the last pass we run before we start emitting stuff. It - * determines when we need to insert boolean resolves on Gen <= 5. We - * run it last because it stashes data in instr->pass_flags and we don't - * want that to be squashed by other NIR passes. - */ - if (brw->gen <= 5) - brw_nir_analyze_boolean_resolves(nir); + nir_shader *nir = prog->nir; /* emit the arrays used for inputs and outputs - load/store intrinsics will * be converted to reads/writes of these arrays @@ -232,13 +67,6 @@ fs_visitor::emit_nir_code() assert(overload->impl); nir_emit_impl(overload->impl); } - - if (unlikely(debug_enabled)) { - fprintf(stderr, "NIR (final form) for %s shader:\n", stage_name); - nir_print_shader(nir, stderr); - } - - ralloc_free(nir); } void diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c new file mode 100644 index 0000000..de4d7aa --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -0,0 +1,213 @@ +/* + * Copyright ? 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_nir.h" +#include "glsl/glsl_parser_extras.h" +#include "glsl/nir/glsl_to_nir.h" +#include "program/prog_to_nir.h" + +static void +nir_optimize(nir_shader *nir) +{ + bool progress; + do { + progress = false; + nir_lower_vars_to_ssa(nir); + nir_validate_shader(nir); + nir_lower_alu_to_scalar(nir); + nir_validate_shader(nir); + progress |= nir_copy_prop(nir); + nir_validate_shader(nir); + nir_lower_phis_to_scalar(nir); + nir_validate_shader(nir); + progress |= nir_copy_prop(nir); + nir_validate_shader(nir); + progress |= nir_opt_dce(nir); + nir_validate_shader(nir); + progress |= nir_opt_cse(nir); + nir_validate_shader(nir); + progress |= nir_opt_peephole_select(nir); + nir_validate_shader(nir); + progress |= nir_opt_algebraic(nir); + nir_validate_shader(nir); + progress |= nir_opt_constant_folding(nir); + nir_validate_shader(nir); + progress |= nir_opt_remove_phis(nir); + nir_validate_shader(nir); + } while (progress); +} + +static bool +count_nir_instrs_in_block(nir_block *block, void *state) +{ + int *count = (int *) state; + nir_foreach_instr(block, instr) { + *count = *count + 1; + } + return true; +} + +static int +count_nir_instrs(nir_shader *nir) +{ + int count = 0; + nir_foreach_overload(nir, overload) { + if (!overload->impl) + continue; + nir_foreach_block(overload->impl, count_nir_instrs_in_block, &count); + } + return count; +} + +nir_shader * +brw_create_nir(struct brw_context *brw, + const struct gl_shader_program *shader_prog, + const struct gl_program *prog, + gl_shader_stage stage) +{ + struct gl_context *ctx = &brw->ctx; + const nir_shader_compiler_options *options = + ctx->Const.ShaderCompilerOptions[stage].NirOptions; + struct gl_shader *shader = shader_prog ? shader_prog->_LinkedShaders[stage] : NULL; + bool debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage); + nir_shader *nir; + + /* First, lower the GLSL IR or Mesa IR to NIR */ + if (shader_prog) { + nir = glsl_to_nir(shader, options); + } else { + nir = prog_to_nir(prog, options); + nir_convert_to_ssa(nir); /* turn registers into SSA */ + } + nir_validate_shader(nir); + + nir_lower_global_vars_to_local(nir); + nir_validate_shader(nir); + + nir_lower_tex_projector(nir); + nir_validate_shader(nir); + + nir_normalize_cubemap_coords(nir); + nir_validate_shader(nir); + + nir_split_var_copies(nir); + nir_validate_shader(nir); + + nir_optimize(nir); + + /* Lower a bunch of stuff */ + nir_lower_var_copies(nir); + nir_validate_shader(nir); + + /* Get rid of split copies */ + nir_optimize(nir); + + if (shader_prog) { + nir_assign_var_locations_scalar_direct_first(nir, &nir->uniforms, + &nir->num_direct_uniforms, + &nir->num_uniforms); + } else { + /* ARB programs generally create a giant array of "uniform" data, and allow + * indirect addressing without any boundaries. In the absence of bounds + * analysis, it's all or nothing. num_direct_uniforms is only useful when + * we have some direct and some indirect access; it doesn't matter here. + */ + nir->num_direct_uniforms = 0; + } + nir_assign_var_locations_scalar(&nir->inputs, &nir->num_inputs); + nir_assign_var_locations_scalar(&nir->outputs, &nir->num_outputs); + + nir_lower_io(nir); + nir_validate_shader(nir); + + nir_remove_dead_variables(nir); + nir_validate_shader(nir); + + if (shader_prog) { + nir_lower_samplers(nir, shader_prog, stage); + nir_validate_shader(nir); + } + + nir_lower_system_values(nir); + nir_validate_shader(nir); + + nir_lower_atomics(nir); + nir_validate_shader(nir); + + nir_optimize(nir); + + if (brw->gen >= 6) { + /* Try and fuse multiply-adds */ + nir_opt_peephole_ffma(nir); + nir_validate_shader(nir); + } + + nir_opt_algebraic_late(nir); + nir_validate_shader(nir); + + nir_lower_locals_to_regs(nir); + nir_validate_shader(nir); + + nir_lower_to_source_mods(nir); + nir_validate_shader(nir); + nir_copy_prop(nir); + nir_validate_shader(nir); + nir_opt_dce(nir); + nir_validate_shader(nir); + + if (unlikely(debug_enabled)) { + fprintf(stderr, "NIR (SSA form) for %s shader:\n", + _mesa_shader_stage_to_string(stage)); + nir_print_shader(nir, stderr); + } + + static GLuint msg_id = 0; + _mesa_gl_debug(&brw->ctx, &msg_id, + MESA_DEBUG_SOURCE_SHADER_COMPILER, + MESA_DEBUG_TYPE_OTHER, + MESA_DEBUG_SEVERITY_NOTIFICATION, + "%s NIR shader: %d inst\n", + _mesa_shader_stage_to_abbrev(stage), + count_nir_instrs(nir)); + + nir_convert_from_ssa(nir); + nir_validate_shader(nir); + + /* This is the last pass we run before we start emitting stuff. It + * determines when we need to insert boolean resolves on Gen <= 5. We + * run it last because it stashes data in instr->pass_flags and we don't + * want that to be squashed by other NIR passes. + */ + if (brw->gen <= 5) + brw_nir_analyze_boolean_resolves(nir); + + nir_sweep(nir); + + if (unlikely(debug_enabled)) { + fprintf(stderr, "NIR (final form) for %s shader:\n", + _mesa_shader_stage_to_string(stage)); + nir_print_shader(nir, stderr); + } + + return nir; +} diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h index 27782a3..3131109 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.h +++ b/src/mesa/drivers/dri/i965/brw_nir.h @@ -23,6 +23,7 @@ #pragma once +#include "brw_context.h" #include "glsl/nir/nir.h" #ifdef __cplusplus @@ -73,6 +74,11 @@ enum { void brw_nir_analyze_boolean_resolves(nir_shader *nir); +nir_shader *brw_create_nir(struct brw_context *brw, + const struct gl_shader_program *shader_prog, + const struct gl_program *prog, + gl_shader_stage stage); + #ifdef __cplusplus } #endif diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 8920c34..9e27c2a 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -43,6 +43,7 @@ #include "brw_context.h" #include "brw_shader.h" +#include "brw_nir.h" #include "brw_wm.h" #include "intel_batchbuffer.h" @@ -141,6 +142,10 @@ brwProgramStringNotify(struct gl_context *ctx, brw_add_texrect_params(prog); + if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_FRAGMENT].NirOptions) { + prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT); + } + brw_fs_precompile(ctx, NULL, prog); break; } @@ -163,6 +168,10 @@ brwProgramStringNotify(struct gl_context *ctx, brw_add_texrect_params(prog); + if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].NirOptions) { + prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX); + } + brw_vs_precompile(ctx, NULL, prog); break; } diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 8700077..335a800 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -27,6 +27,7 @@ #include "brw_gs.h" #include "brw_fs.h" #include "brw_cfg.h" +#include "brw_nir.h" #include "glsl/ir_optimization.h" #include "glsl/glsl_parser_extras.h" #include "main/shaderapi.h" @@ -229,6 +230,8 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) { struct gl_shader *shader = shProg->_LinkedShaders[stage]; + const struct gl_shader_compiler_options *options = + &ctx->Const.ShaderCompilerOptions[stage]; if (!shader) continue; @@ -277,6 +280,9 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) brw_add_texrect_params(prog); + if (options->NirOptions) + prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage); + _mesa_reference_program(ctx, &prog, NULL); } diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index ef2fd40..c4c77b2 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -25,6 +25,7 @@ #include "brw_fs.h" #include "brw_cfg.h" #include "brw_vs.h" +#include "brw_nir.h" #include "brw_vec4_live_variables.h" #include "brw_dead_control_flow.h" @@ -1809,6 +1810,8 @@ brw_vs_emit(struct brw_context *brw, bool start_busy = false; double start_time = 0; const unsigned *assembly = NULL; + bool use_nir = + brw->ctx.Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].NirOptions != NULL; if (unlikely(brw->perf_debug)) { start_busy = (brw->batch.last_bo && @@ -1823,9 +1826,17 @@ brw_vs_emit(struct brw_context *brw, if (unlikely(INTEL_DEBUG & DEBUG_VS)) brw_dump_ir("vertex", prog, &shader->base, &c->vp->program.Base); - if (brw->scalar_vs && - (prog || - brw->ctx.Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].NirOptions)) { + if (use_nir && !c->vp->program.Base.nir) { + /* Normally we generate NIR in LinkShader() or ProgramStringNotify(), but + * Mesa's fixed-function vertex program handling doesn't notify the driver + * at all. Just do it here, at the last minute, even though it's lame. + */ + assert(c->vp->program.Base.Id == 0 && prog == NULL); + c->vp->program.Base.nir = + brw_create_nir(brw, NULL, &c->vp->program.Base, MESA_SHADER_VERTEX); + } + + if (brw->scalar_vs && (prog || use_nir)) { fs_visitor v(brw, mem_ctx, &c->key, prog_data, prog, &c->vp->program, 8); if (!v.run_vs()) { if (prog) { diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 6184028..5d726b4 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2087,6 +2087,8 @@ struct gl_program struct prog_instruction *Instructions; + struct nir_shader *nir; + GLbitfield64 InputsRead; /**< Bitmask of which input regs are read */ GLbitfield64 OutputsWritten; /**< Bitmask of which output regs are written */ GLbitfield SystemValuesRead; /**< Bitmask of SYSTEM_VALUE_x inputs used */ diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c index 3c214d5..4f28e2a 100644 --- a/src/mesa/program/program.c +++ b/src/mesa/program/program.c @@ -37,6 +37,7 @@ #include "prog_cache.h" #include "prog_parameter.h" #include "prog_instruction.h" +#include "util/ralloc.h" /** @@ -380,6 +381,10 @@ _mesa_delete_program(struct gl_context *ctx, struct gl_program *prog) _mesa_free_parameter_list(prog->Parameters); } + if (prog->nir) { + ralloc_free(prog->nir); + } + free(prog); } From kwg at kemper.freedesktop.org Sat Apr 11 19:39:55 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Sat, 11 Apr 2015 12:39:55 -0700 (PDT) Subject: Mesa (master): i965/nir: Make INTEL_DEBUG=ann work with NIR. Message-ID: <20150411193955.0AE8F761E8@kemper.freedesktop.org> Module: Mesa Branch: master Commit: b6354d9bb077815d2e388dc5d0e7411ea6d89748 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b6354d9bb077815d2e388dc5d0e7411ea6d89748 Author: Kenneth Graunke Date: Sat Jan 24 02:05:56 2015 -0800 i965/nir: Make INTEL_DEBUG=ann work with NIR. Now that we store a copy of the NIR shader, and don't immediately free it, we can use it in annotations as well. Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand Reviewed-by: Ian Romanick --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 4 ++++ src/mesa/drivers/dri/i965/intel_asm_annotation.c | 5 ++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index d6508fc..3972581 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -438,6 +438,8 @@ fs_visitor::nir_emit_block(nir_block *block) void fs_visitor::nir_emit_instr(nir_instr *instr) { + this->base_ir = instr; + switch (instr->type) { case nir_instr_type_alu: nir_emit_alu(nir_instr_as_alu(instr)); @@ -464,6 +466,8 @@ fs_visitor::nir_emit_instr(nir_instr *instr) default: unreachable("unknown instruction type"); } + + this->base_ir = NULL; } static brw_reg_type diff --git a/src/mesa/drivers/dri/i965/intel_asm_annotation.c b/src/mesa/drivers/dri/i965/intel_asm_annotation.c index ac12655..eed5756 100644 --- a/src/mesa/drivers/dri/i965/intel_asm_annotation.c +++ b/src/mesa/drivers/dri/i965/intel_asm_annotation.c @@ -29,6 +29,7 @@ #include "program/prog_print.h" #include "program/prog_instruction.h" #include "main/macros.h" +#include "glsl/nir/nir.h" void dump_assembly(void *assembly, int num_annotations, struct annotation *annotation, @@ -55,7 +56,9 @@ dump_assembly(void *assembly, int num_annotations, struct annotation *annotation last_annotation_ir = annotation[i].ir; if (last_annotation_ir) { fprintf(stderr, " "); - if (!prog->Instructions) + if (prog->nir) + nir_print_instr(annotation[i].ir, stderr); + else if (!prog->Instructions) fprint_ir(stderr, annotation[i].ir); else { const struct prog_instruction *pi = From evelikov at kemper.freedesktop.org Sun Apr 12 22:13:56 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Sun, 12 Apr 2015 15:13:56 -0700 (PDT) Subject: Mesa (10.5): Update version to 10.5.3 Message-ID: <20150412221356.A3890761EA@kemper.freedesktop.org> Module: Mesa Branch: 10.5 Commit: b17312cac2618d28decf32912d3e96364b9db42f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b17312cac2618d28decf32912d3e96364b9db42f Author: Emil Velikov Date: Sun Apr 12 22:21:07 2015 +0100 Update version to 10.5.3 Signed-off-by: Emil Velikov --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index a39233b..1e9c35f 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -10.5.2 +10.5.3 From evelikov at kemper.freedesktop.org Sun Apr 12 22:13:56 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Sun, 12 Apr 2015 15:13:56 -0700 (PDT) Subject: Mesa (10.5): dist: add the VG depedencies into the tarball Message-ID: <20150412221356.9D53F761E9@kemper.freedesktop.org> Module: Mesa Branch: 10.5 Commit: 37d925a63538bde60aae4d185c4017c0b8d210b6 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=37d925a63538bde60aae4d185c4017c0b8d210b6 Author: Emil Velikov Date: Wed Apr 8 18:23:13 2015 +0100 dist: add the VG depedencies into the tarball Otherwise the scons build will fail. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89905 Signed-off-by: Emil Velikov --- Makefile.am | 1 + src/mapi/Makefile.am | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile.am b/Makefile.am index f4f0912..82db4be 100644 --- a/Makefile.am +++ b/Makefile.am @@ -49,6 +49,7 @@ noinst_HEADERS = \ include/c99 \ include/c11 \ include/D3D9 \ + include/VG \ include/HaikuGL \ include/pci_ids diff --git a/src/mapi/Makefile.am b/src/mapi/Makefile.am index 572f340..d18c464 100644 --- a/src/mapi/Makefile.am +++ b/src/mapi/Makefile.am @@ -231,7 +231,7 @@ es2api/glapi_mapi_tmp.h: glapi/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps) $(call glapi_gen_mapi,$<,es2api) # XXX: Inline vgapi's Makefile.am here. -EXTRA_DIST += vgapi +EXTRA_DIST += vgapi mapi.c mapi.h # if HAVE_OPENVG # SUBDIRS += vgapi # endif From evelikov at kemper.freedesktop.org Sun Apr 12 22:13:56 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Sun, 12 Apr 2015 15:13:56 -0700 (PDT) Subject: Mesa (10.5): Add release notes for the 10.5.3 release Message-ID: <20150412221356.ACF82761EB@kemper.freedesktop.org> Module: Mesa Branch: 10.5 Commit: c4b8bff6e24c6661a8a05ec05f8ed5762e95021b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c4b8bff6e24c6661a8a05ec05f8ed5762e95021b Author: Emil Velikov Date: Sun Apr 12 22:29:06 2015 +0100 Add release notes for the 10.5.3 release Signed-off-by: Emil Velikov --- docs/relnotes/10.5.3.html | 124 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) diff --git a/docs/relnotes/10.5.3.html b/docs/relnotes/10.5.3.html new file mode 100644 index 0000000..8fe8390 --- /dev/null +++ b/docs/relnotes/10.5.3.html @@ -0,0 +1,124 @@ + + + + + Mesa Release Notes + + + + +
    +

    The Mesa 3D Graphics Library

    +
    + + +
    + +

    Mesa 10.5.3 Release Notes / April 12, 2015

    + +

    +Mesa 10.5.3 is a bug fix release which fixes bugs found since the 10.5.2 release. +

    +

    +Mesa 10.5.3 implements the OpenGL 3.3 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 3.3. OpenGL +3.3 is only available if requested at context creation +because compatibility contexts are not supported. +

    + + +

    SHA256 checksums

    +
    +TBD
    +
    + + +

    New features

    +

    None

    + +

    Bug fixes

    + +

    This list is likely incomplete.

    + +
      + +
    • Bug 83962 - [HSW/BYT]Piglit spec_ARB_gpu_shader5_arb_gpu_shader5-emitstreamvertex_nodraw fails
    • + +
    • Bug 89679 - [NV50] Portal/Half-Life 2 will not start (native Steam)
    • + +
    • Bug 89746 - Mesa and LLVM 3.6+ break opengl for genymotion
    • + +
    • Bug 89754 - vertexAttrib fails WebGL Conformance test with mesa drivers
    • + +
    • Bug 89758 - pow WebGL Conformance test with mesa drivers
    • + +
    • Bug 89759 - WebGL OGL ES GLSL conformance test with mesa drivers fails
    • + +
    • Bug 89905 - scons build broken on 10.5.2 due to activated vega st
    • + +
    + +

    Changes

    + +

    Dave Airlie (1):

    +
      +
    • st_glsl_to_tgsi: only do mov copy propagation on temps (v2)
    • +
    + +

    Emil Velikov (5):

    +
      +
    • docs: Add sha256 sums for the 10.5.2 release
    • +
    • xmlpool: don't forget to ship the MOS
    • +
    • configure.ac: error out if python/mako is not found when required
    • +
    • dist: add the VG depedencies into the tarball
    • +
    • Update version to 10.5.3
    • +
    + +

    Iago Toral Quiroga (1):

    +
      +
    • i965: Do not render primitives in non-zero streams then TF is disabled
    • +
    + +

    Ilia Mirkin (7):

    +
      +
    • st/mesa: update arrays when the current attrib has been updated
    • +
    • nv50/ir: take postFactor into account when doing peephole optimizations
    • +
    • nv50/ir/gk110: fix offset flag position for TXD opcode
    • +
    • freedreno/a3xx: fix 3d texture layout
    • +
    • freedreno/a3xx: point size should not be divided by 2
    • +
    • nv50: allocate more offset space for occlusion queries
    • +
    • nv50,nvc0: limit the y-tiling of 3d textures to the first level's tiling
    • +
    + +

    Kenneth Graunke (2):

    +
      +
    • i965: Fix instanced geometry shaders on Gen8+.
    • +
    • i965: Add forgotten multi-stream code to Gen8 SOL state.
    • +
    + +

    Marcin ?lusarz (1):

    +
      +
    • nouveau: synchronize "scratch runout" destruction with the command stream
    • +
    + +

    Michel D?nzer (1):

    +
      +
    • radeonsi: Cache LLVMTargetMachineRef in context instead of in screen
    • +
    + +

    Tom Stellard (1):

    +
      +
    • clover: Return CL_BUILD_ERROR for CL_PROGRAM_BUILD_STATUS when compilation fails v2
    • +
    + +

    Ville Syrj?l? (1):

    +
      +
    • i965: Fix URB size for CHV
    • +
    + + +
    + + From evelikov at kemper.freedesktop.org Sun Apr 12 22:13:56 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Sun, 12 Apr 2015 15:13:56 -0700 (PDT) Subject: Mesa (10.5): docs: Add 256 sums for the 10.5.3 release Message-ID: <20150412221356.BA219761E9@kemper.freedesktop.org> Module: Mesa Branch: 10.5 Commit: 65776421fe59a0ffd5388c0968c9b0b1c1b230ed URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=65776421fe59a0ffd5388c0968c9b0b1c1b230ed Author: Emil Velikov Date: Sun Apr 12 23:10:42 2015 +0100 docs: Add 256 sums for the 10.5.3 release Signed-off-by: Emil Velikov --- docs/relnotes/10.5.3.html | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/relnotes/10.5.3.html b/docs/relnotes/10.5.3.html index 8fe8390..e5e0d30 100644 --- a/docs/relnotes/10.5.3.html +++ b/docs/relnotes/10.5.3.html @@ -31,7 +31,8 @@ because compatibility contexts are not supported.

    SHA256 checksums

    -TBD
    +2371b8e210ccd19f61dd94b6664d612e5a479ba7d431a074512d87633bd6aeb4  mesa-10.5.3.tar.gz
    +8701ee1be4f5c03238f5e63c1a9bd4cc03a2f6c0155ed42a1ae7d58f18912ba2  mesa-10.5.3.tar.xz
     
    From evelikov at kemper.freedesktop.org Sun Apr 12 22:13:56 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Sun, 12 Apr 2015 15:13:56 -0700 (PDT) Subject: Mesa (master): docs: Add 256 sums for the 10.5.3 release Message-ID: <20150412221356.EF0F0761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: a94f8e712f97fec95e42643b0ad409814b7984f1 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a94f8e712f97fec95e42643b0ad409814b7984f1 Author: Emil Velikov Date: Sun Apr 12 23:10:42 2015 +0100 docs: Add 256 sums for the 10.5.3 release Signed-off-by: Emil Velikov (cherry picked from commit 65776421fe59a0ffd5388c0968c9b0b1c1b230ed) --- docs/relnotes/10.5.3.html | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/relnotes/10.5.3.html b/docs/relnotes/10.5.3.html index 8fe8390..e5e0d30 100644 --- a/docs/relnotes/10.5.3.html +++ b/docs/relnotes/10.5.3.html @@ -31,7 +31,8 @@ because compatibility contexts are not supported.

    SHA256 checksums

    -TBD
    +2371b8e210ccd19f61dd94b6664d612e5a479ba7d431a074512d87633bd6aeb4  mesa-10.5.3.tar.gz
    +8701ee1be4f5c03238f5e63c1a9bd4cc03a2f6c0155ed42a1ae7d58f18912ba2  mesa-10.5.3.tar.xz
     
    From evelikov at kemper.freedesktop.org Sun Apr 12 22:13:57 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Sun, 12 Apr 2015 15:13:57 -0700 (PDT) Subject: Mesa: tag mesa-10.5.3: Mesa 10.5.3 release Message-ID: <20150412221357.18CDD761E9@kemper.freedesktop.org> Module: Mesa Branch: refs/tags/mesa-10.5.3 Tag: 13ebd5bf902a6f68f50acfa9c49e316d5ecfb50f URL: http://cgit.freedesktop.org/mesa/mesa/tag/?id=13ebd5bf902a6f68f50acfa9c49e316d5ecfb50f Tagger: Emil Velikov Date: Sun Apr 12 22:29:45 2015 +0100 Mesa 10.5.3 release From evelikov at kemper.freedesktop.org Sun Apr 12 22:13:56 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Sun, 12 Apr 2015 15:13:56 -0700 (PDT) Subject: Mesa (master): docs: remove the --with-max-{width,height} note Message-ID: <20150412221356.DBC22761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 61c6cc4a4aa23724bd51089b38741feed67617bc URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=61c6cc4a4aa23724bd51089b38741feed67617bc Author: Emil Velikov Date: Thu Apr 2 08:37:12 2015 +0000 docs: remove the --with-max-{width,height} note Missed out with commit d99135b2e9b(configure: nuke --with-max-{width,height}) Signed-off-by: Emil Velikov Reviewed-by: Brian Paul --- docs/faq.html | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/docs/faq.html b/docs/faq.html index d4037e8..b7c6fbf 100644 --- a/docs/faq.html +++ b/docs/faq.html @@ -327,19 +327,6 @@ Basically, applying a translation of (0.375, 0.375, 0.0) to your coordinates will fix the problem.

    -

    3.6 How can I change the maximum framebuffer size in Mesa's -swrast backend?

    -

    -These can be overridden by using the --with-max-width and ---with-max-height options. The two need not be equal. -

    -Do note that Mesa uses these values to size some internal buffers, -so increasing these sizes will cause Mesa to require additional -memory. Furthermore, increasing these limits beyond 4096 -may introduce rasterization artifacts; see the leading comments in -src/mesa/swrast/s_tritemp.h. -

    -

    From evelikov at kemper.freedesktop.org Sun Apr 12 22:13:56 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Sun, 12 Apr 2015 15:13:56 -0700 (PDT) Subject: Mesa (master): Add release notes for the 10.5.3 release Message-ID: <20150412221356.E602C761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 794b9bf26af561f2a19a33df88bef1b96b1dd56b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=794b9bf26af561f2a19a33df88bef1b96b1dd56b Author: Emil Velikov Date: Sun Apr 12 22:29:06 2015 +0100 Add release notes for the 10.5.3 release Signed-off-by: Emil Velikov (cherry picked from commit c4b8bff6e24c6661a8a05ec05f8ed5762e95021b) --- docs/relnotes/10.5.3.html | 124 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) diff --git a/docs/relnotes/10.5.3.html b/docs/relnotes/10.5.3.html new file mode 100644 index 0000000..8fe8390 --- /dev/null +++ b/docs/relnotes/10.5.3.html @@ -0,0 +1,124 @@ + + + + + Mesa Release Notes + + + + +
    +

    The Mesa 3D Graphics Library

    +
    + + +
    + +

    Mesa 10.5.3 Release Notes / April 12, 2015

    + +

    +Mesa 10.5.3 is a bug fix release which fixes bugs found since the 10.5.2 release. +

    +

    +Mesa 10.5.3 implements the OpenGL 3.3 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 3.3. OpenGL +3.3 is only available if requested at context creation +because compatibility contexts are not supported. +

    + + +

    SHA256 checksums

    +
    +TBD
    +
    + + +

    New features

    +

    None

    + +

    Bug fixes

    + +

    This list is likely incomplete.

    + +
      + +
    • Bug 83962 - [HSW/BYT]Piglit spec_ARB_gpu_shader5_arb_gpu_shader5-emitstreamvertex_nodraw fails
    • + +
    • Bug 89679 - [NV50] Portal/Half-Life 2 will not start (native Steam)
    • + +
    • Bug 89746 - Mesa and LLVM 3.6+ break opengl for genymotion
    • + +
    • Bug 89754 - vertexAttrib fails WebGL Conformance test with mesa drivers
    • + +
    • Bug 89758 - pow WebGL Conformance test with mesa drivers
    • + +
    • Bug 89759 - WebGL OGL ES GLSL conformance test with mesa drivers fails
    • + +
    • Bug 89905 - scons build broken on 10.5.2 due to activated vega st
    • + +
    + +

    Changes

    + +

    Dave Airlie (1):

    +
      +
    • st_glsl_to_tgsi: only do mov copy propagation on temps (v2)
    • +
    + +

    Emil Velikov (5):

    +
      +
    • docs: Add sha256 sums for the 10.5.2 release
    • +
    • xmlpool: don't forget to ship the MOS
    • +
    • configure.ac: error out if python/mako is not found when required
    • +
    • dist: add the VG depedencies into the tarball
    • +
    • Update version to 10.5.3
    • +
    + +

    Iago Toral Quiroga (1):

    +
      +
    • i965: Do not render primitives in non-zero streams then TF is disabled
    • +
    + +

    Ilia Mirkin (7):

    +
      +
    • st/mesa: update arrays when the current attrib has been updated
    • +
    • nv50/ir: take postFactor into account when doing peephole optimizations
    • +
    • nv50/ir/gk110: fix offset flag position for TXD opcode
    • +
    • freedreno/a3xx: fix 3d texture layout
    • +
    • freedreno/a3xx: point size should not be divided by 2
    • +
    • nv50: allocate more offset space for occlusion queries
    • +
    • nv50,nvc0: limit the y-tiling of 3d textures to the first level's tiling
    • +
    + +

    Kenneth Graunke (2):

    +
      +
    • i965: Fix instanced geometry shaders on Gen8+.
    • +
    • i965: Add forgotten multi-stream code to Gen8 SOL state.
    • +
    + +

    Marcin ?lusarz (1):

    +
      +
    • nouveau: synchronize "scratch runout" destruction with the command stream
    • +
    + +

    Michel D?nzer (1):

    +
      +
    • radeonsi: Cache LLVMTargetMachineRef in context instead of in screen
    • +
    + +

    Tom Stellard (1):

    +
      +
    • clover: Return CL_BUILD_ERROR for CL_PROGRAM_BUILD_STATUS when compilation fails v2
    • +
    + +

    Ville Syrj?l? (1):

    +
      +
    • i965: Fix URB size for CHV
    • +
    + + +
    + + From evelikov at kemper.freedesktop.org Sun Apr 12 22:13:57 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Sun, 12 Apr 2015 15:13:57 -0700 (PDT) Subject: Mesa (master): docs: add news item and link release notes for mesa 10.5.3 Message-ID: <20150412221357.0514E761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 5ddeab8a069984f9ffc4e716528eeb43b3a8f77b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5ddeab8a069984f9ffc4e716528eeb43b3a8f77b Author: Emil Velikov Date: Sun Apr 12 23:16:42 2015 +0100 docs: add news item and link release notes for mesa 10.5.3 Signed-off-by: Emil Velikov --- docs/index.html | 6 ++++++ docs/relnotes.html | 1 + 2 files changed, 7 insertions(+) diff --git a/docs/index.html b/docs/index.html index f6be764..9e4644c 100644 --- a/docs/index.html +++ b/docs/index.html @@ -16,6 +16,12 @@

    News

    +

    April 12, 2015

    +

    +Mesa 10.5.3 is released. +This is a bug-fix release. +

    +

    March 28, 2015

    Mesa 10.5.2 is released. diff --git a/docs/relnotes.html b/docs/relnotes.html index 2bfd9ce..6ec35d1 100644 --- a/docs/relnotes.html +++ b/docs/relnotes.html @@ -21,6 +21,7 @@ The release notes summarize what's new or changed in each Mesa release.

      +
    • 10.5.3 release notes
    • 10.5.2 release notes
    • 10.4.7 release notes
    • 10.5.1 release notes From evelikov at kemper.freedesktop.org Sun Apr 12 22:13:56 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Sun, 12 Apr 2015 15:13:56 -0700 (PDT) Subject: Mesa (master): configure.ac: remove deprecated --with-libclc-path Message-ID: <20150412221356.D201B761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 0e742b1cb39c0df5846068d36c4f32c126b83fac URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=0e742b1cb39c0df5846068d36c4f32c126b83fac Author: Emil Velikov Date: Wed Apr 1 17:46:09 2015 +0100 configure.ac: remove deprecated --with-libclc-path The option was deprecated with commit 959e83d6507(clover: Adapt libclc's INCLUDEDIR and LIBEXECDIR to make use of the new introduced libclc.pc.) back in 2012 with mesa 9.2. Signed-off-by: Emil Velikov Reviewed-by: Tom Stellard --- configure.ac | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/configure.ac b/configure.ac index 4ed4b74..a40cc20 100644 --- a/configure.ac +++ b/configure.ac @@ -1656,19 +1656,6 @@ dnl dnl OpenCL configuration dnl -AC_ARG_WITH([libclc-path], - [AS_HELP_STRING([--with-libclc-path], - [DEPRECATED: See http://dri.freedesktop.org/wiki/GalliumCompute#How_to_Install])], - [LIBCLC_PATH="$withval"], - [LIBCLC_PATH='']) - -if test -n "$LIBCLC_PATH"; then - AC_MSG_ERROR([The --with-libclc-path option has been deprecated. - Please review the updated build instructions for clover: - http://dri.freedesktop.org/wiki/GalliumCompute]) -fi - - AC_ARG_WITH([clang-libdir], [AS_HELP_STRING([--with-clang-libdir], [Path to Clang libraries @<:@default=llvm-config --libdir@:>@])], From jrfonseca at kemper.freedesktop.org Mon Apr 13 12:09:39 2015 From: jrfonseca at kemper.freedesktop.org (Jose Fonseca) Date: Mon, 13 Apr 2015 05:09:39 -0700 (PDT) Subject: Mesa (master): util/ralloc: Fix `extern "C"` usage. Message-ID: <20150413120939.82F1076028@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 978753e84368ef3afa9288cbfbee1c85b3ab09d1 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=978753e84368ef3afa9288cbfbee1c85b3ab09d1 Author: Jose Fonseca Date: Thu Apr 2 11:24:26 2015 +0100 util/ralloc: Fix `extern "C"` usage. Reviewed-by: Roland Scheidegger Reviewed-by: Kenneth Graunke --- src/util/ralloc.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/util/ralloc.h b/src/util/ralloc.h index 01f102b..7587e11 100644 --- a/src/util/ralloc.h +++ b/src/util/ralloc.h @@ -46,16 +46,16 @@ #ifndef RALLOC_H #define RALLOC_H -#ifdef __cplusplus -extern "C" { -#endif - #include #include #include #include "macros.h" +#ifdef __cplusplus +extern "C" { +#endif + /** * \def ralloc(ctx, type) * Allocate a new object chained off of the given context. From jrfonseca at kemper.freedesktop.org Mon Apr 13 12:09:39 2015 From: jrfonseca at kemper.freedesktop.org (Jose Fonseca) Date: Mon, 13 Apr 2015 05:09:39 -0700 (PDT) Subject: Mesa (master): mesa: Remove pointless USE_EXTERNAL_DXTN_LIB macro. Message-ID: <20150413120939.7B2D8761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 85dd46d90cd7d3d6898d28626063563c1aaba369 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=85dd46d90cd7d3d6898d28626063563c1aaba369 Author: Jose Fonseca Date: Thu Apr 2 10:09:38 2015 +0100 mesa: Remove pointless USE_EXTERNAL_DXTN_LIB macro. I'm not sure what was the original intention, but currently USE_EXTERNAL_DXTN_LIB always ends up defined, one way or another. Reviewed-by: Roland Scheidegger --- configure.ac | 2 +- src/mesa/main/texcompress_s3tc.c | 8 -------- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/configure.ac b/configure.ac index a40cc20..9e8c1d8 100644 --- a/configure.ac +++ b/configure.ac @@ -230,7 +230,7 @@ _SAVE_LDFLAGS="$LDFLAGS" _SAVE_CPPFLAGS="$CPPFLAGS" dnl Compiler macros -DEFINES="-DUSE_EXTERNAL_DXTN_LIB=1" +DEFINES="" AC_SUBST([DEFINES]) case "$host_os" in linux*|*-gnu*|gnu*) diff --git a/src/mesa/main/texcompress_s3tc.c b/src/mesa/main/texcompress_s3tc.c index 38ce5f8..7ce3cb8 100644 --- a/src/mesa/main/texcompress_s3tc.c +++ b/src/mesa/main/texcompress_s3tc.c @@ -29,10 +29,6 @@ * GL_EXT_texture_compression_s3tc support. */ -#ifndef USE_EXTERNAL_DXTN_LIB -#define USE_EXTERNAL_DXTN_LIB 1 -#endif - #include "glheader.h" #include "imports.h" #include "dlopen.h" @@ -76,7 +72,6 @@ _mesa_init_texture_s3tc( struct gl_context *ctx ) { /* called during context initialization */ ctx->Mesa_DXTn = GL_FALSE; -#if USE_EXTERNAL_DXTN_LIB if (!dxtlibhandle) { dxtlibhandle = _mesa_dlopen(DXTN_LIBNAME, 0); if (!dxtlibhandle) { @@ -117,9 +112,6 @@ _mesa_init_texture_s3tc( struct gl_context *ctx ) if (dxtlibhandle) { ctx->Mesa_DXTn = GL_TRUE; } -#else - (void) ctx; -#endif } /** From jrfonseca at kemper.freedesktop.org Mon Apr 13 12:09:39 2015 From: jrfonseca at kemper.freedesktop.org (Jose Fonseca) Date: Mon, 13 Apr 2015 05:09:39 -0700 (PDT) Subject: Mesa (master): glx: Include util/ macros.h instead of redefining PRINTFLIKE. Message-ID: <20150413120939.8EA54761EB@kemper.freedesktop.org> Module: Mesa Branch: master Commit: fa1b3e1501da3d24ec4205e0056d67ef9d2663ac URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=fa1b3e1501da3d24ec4205e0056d67ef9d2663ac Author: Jose Fonseca Date: Thu Apr 2 11:25:06 2015 +0100 glx: Include util/macros.h instead of redefining PRINTFLIKE. Reviewed-by: Roland Scheidegger --- src/glx/dri_common.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/glx/dri_common.h b/src/glx/dri_common.h index 5cd150a..947d331 100644 --- a/src/glx/dri_common.h +++ b/src/glx/dri_common.h @@ -39,12 +39,7 @@ #include #include #include "loader.h" - -#if (__GNUC__) -#define PRINTFLIKE(f, a) __attribute__ ((format(__printf__, f, a))) -#else -#define PRINTFLIKE(f, a) -#endif +#include "util/macros.h" /* for PRINTFLIKE */ typedef struct __GLXDRIconfigPrivateRec __GLXDRIconfigPrivate; From jrfonseca at kemper.freedesktop.org Mon Apr 13 12:09:39 2015 From: jrfonseca at kemper.freedesktop.org (Jose Fonseca) Date: Mon, 13 Apr 2015 05:09:39 -0700 (PDT) Subject: Mesa (master): docs: Improve LLVM_USE_CRT_xxx instructions. Message-ID: <20150413120939.9A102761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 36ceda4eced243c1fab487b878e20944d1238d50 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=36ceda4eced243c1fab487b878e20944d1238d50 Author: Jose Fonseca Date: Mon Apr 13 13:08:13 2015 +0100 docs: Improve LLVM_USE_CRT_xxx instructions. --- docs/llvmpipe.html | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/docs/llvmpipe.html b/docs/llvmpipe.html index 72db93a..f603bd6 100644 --- a/docs/llvmpipe.html +++ b/docs/llvmpipe.html @@ -58,15 +58,37 @@ It's the fastest software rasterizer for Mesa.

      - For Windows you will need to build LLVM from source with MSVC or MINGW - (either natively or through cross compilers) and CMake, and set the LLVM - environment variable to the directory you installed it to. + For Windows you will need to build LLVM from source with MSVC or MINGW + (either natively or through cross compilers) and CMake, and set the LLVM + environment variable to the directory you installed it to. LLVM will be statically linked, so when building on MSVC it needs to be built with a matching CRT as Mesa, and you'll need to pass - -DLLVM_USE_CRT_RELEASE=MTd for debug and checked builds, - -DLLVM_USE_CRT_RELEASE=MTd for profile and release builds. + -DLLVM_USE_CRT_xxx=yyy as described below. +

      + + + + + + + + + + + + + + + + + + + + +
      LLVM build-typeMesa build-type
      debug,checkedrelease,profile
      Debug-DLLVM_USE_CRT_DEBUG=MTd-DLLVM_USE_CRT_DEBUG=MT
      Release-DLLVM_USE_CRT_RELEASE=MTd-DLLVM_USE_CRT_RELEASE=MT
      +

      You can build only the x86 target by passing -DLLVM_TARGETS_TO_BUILD=X86 to cmake.

      From chadversary at kemper.freedesktop.org Mon Apr 13 14:41:31 2015 From: chadversary at kemper.freedesktop.org (Chad Versace) Date: Mon, 13 Apr 2015 07:41:31 -0700 (PDT) Subject: Mesa (master): i965: Declare intel_miptree_alloc_mcs() as static Message-ID: <20150413144131.A67B8761EA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 1ef4bf71914c79b703fd9a75f047b24e0f16c59a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1ef4bf71914c79b703fd9a75f047b24e0f16c59a Author: Chad Versace Date: Mon Apr 6 07:04:06 2015 -0700 i965: Declare intel_miptree_alloc_mcs() as static It's not used outside of intel_mipmap_tree.c, nor should it ever be. Reviewed-by: Kenneth Graunke Reviewed-by: Tapani P?lli Reviewed-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 7 ++++++- src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 5 ----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index eb226d5..f766b96 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -49,6 +49,11 @@ #define FILE_DEBUG_FLAG DEBUG_MIPTREE +static bool +intel_miptree_alloc_mcs(struct brw_context *brw, + struct intel_mipmap_tree *mt, + GLuint num_samples); + /** * Determine which MSAA layout should be used by the MSAA surface being * created, based on the chip generation and the surface type. @@ -1300,7 +1305,7 @@ intel_miptree_copy_teximage(struct brw_context *brw, intel_obj->needs_validate = true; } -bool +static bool intel_miptree_alloc_mcs(struct brw_context *brw, struct intel_mipmap_tree *mt, GLuint num_samples) diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h index 41b6036..e3e2127 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h @@ -637,11 +637,6 @@ intel_miptree_copy_teximage(struct brw_context *brw, struct intel_texture_image *intelImage, struct intel_mipmap_tree *dst_mt, bool invalidate); -bool -intel_miptree_alloc_mcs(struct brw_context *brw, - struct intel_mipmap_tree *mt, - GLuint num_samples); - /** * \name Miptree HiZ functions * \{ From chadversary at kemper.freedesktop.org Mon Apr 13 14:41:31 2015 From: chadversary at kemper.freedesktop.org (Chad Versace) Date: Mon, 13 Apr 2015 07:41:31 -0700 (PDT) Subject: Mesa (master): i965: Refactor brw_is_hiz_depth_format() Message-ID: <20150413144131.C1614761EA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: e1338f267fa5670fc02a450774fa89b42e990883 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e1338f267fa5670fc02a450774fa89b42e990883 Author: Chad Versace Date: Mon Apr 6 06:54:30 2015 -0700 i965: Refactor brw_is_hiz_depth_format() Every caller of this function uses it to determine if the current miptree needs a hiz buffer to be allocated. Strangely, the function doesn't take a miptree argument. So, this function effectively decides if and when a miptree's hiz buffer gets allocated without inspecting the miptree itself. Luckily, the driver behaves correctly despite the brw_is_hiz_depth_format's quirk. I will soon make some changes to the miptree that will require inspecting the miptree to determine if it needs a hiz buffer. So this patch renames brw_is_hiz_depth_format -> intel_miptree_wants_hiz_buffer and gives it a miptree parameter. This patch shouldn't change any behavior. Reviewed-by: Kenneth Graunke Reviewed-by: Tapani P?lli Reviewed-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_context.h | 1 - src/mesa/drivers/dri/i965/brw_surface_formats.c | 19 ----------------- src/mesa/drivers/dri/i965/intel_fbo.c | 4 ++-- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 26 +++++++++++++++++++++-- src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 5 ++++- 5 files changed, 30 insertions(+), 25 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 6c168a3..0bd0ed1 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1681,7 +1681,6 @@ void brw_upload_abo_surfaces(struct brw_context *brw, struct brw_stage_prog_data *prog_data); /* brw_surface_formats.c */ -bool brw_is_hiz_depth_format(struct brw_context *ctx, mesa_format format); bool brw_render_target_supported(struct brw_context *brw, struct gl_renderbuffer *rb); uint32_t brw_depth_format(struct brw_context *brw, mesa_format format); diff --git a/src/mesa/drivers/dri/i965/brw_surface_formats.c b/src/mesa/drivers/dri/i965/brw_surface_formats.c index 7524ad9..c7fb707 100644 --- a/src/mesa/drivers/dri/i965/brw_surface_formats.c +++ b/src/mesa/drivers/dri/i965/brw_surface_formats.c @@ -798,22 +798,3 @@ brw_depth_format(struct brw_context *brw, mesa_format format) unreachable("Unexpected depth format."); } } - -/** Can HiZ be enabled on a depthbuffer of the given format? */ -bool -brw_is_hiz_depth_format(struct brw_context *brw, mesa_format format) -{ - if (!brw->has_hiz) - return false; - - switch (format) { - case MESA_FORMAT_Z_FLOAT32: - case MESA_FORMAT_Z32_FLOAT_S8X24_UINT: - case MESA_FORMAT_Z24_UNORM_X8_UINT: - case MESA_FORMAT_Z24_UNORM_S8_UINT: - case MESA_FORMAT_Z_UNORM16: - return true; - default: - return false; - } -} diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c index 2cf4771..7babd29 100644 --- a/src/mesa/drivers/dri/i965/intel_fbo.c +++ b/src/mesa/drivers/dri/i965/intel_fbo.c @@ -561,7 +561,7 @@ intel_renderbuffer_update_wrapper(struct brw_context *brw, intel_renderbuffer_set_draw_offset(irb); - if (mt->hiz_buf == NULL && brw_is_hiz_depth_format(brw, rb->Format)) { + if (intel_miptree_wants_hiz_buffer(brw, mt)) { intel_miptree_alloc_hiz(brw, mt); if (!mt->hiz_buf) return false; @@ -1032,7 +1032,7 @@ intel_renderbuffer_move_to_temp(struct brw_context *brw, INTEL_MIPTREE_TILING_ANY, false); - if (brw_is_hiz_depth_format(brw, new_mt->format)) { + if (intel_miptree_wants_hiz_buffer(brw, new_mt)) { intel_miptree_alloc_hiz(brw, new_mt); } diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index a906460..492338b 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -403,7 +403,8 @@ intel_miptree_create_layout(struct brw_context *brw, if (!for_bo && _mesa_get_format_base_format(format) == GL_DEPTH_STENCIL && (brw->must_use_separate_stencil || - (brw->has_separate_stencil && brw_is_hiz_depth_format(brw, format)))) { + (brw->has_separate_stencil && + intel_miptree_wants_hiz_buffer(brw, mt)))) { const bool force_all_slices_at_each_lod = brw->gen == 6; mt->stencil_mt = intel_miptree_create(brw, mt->target, @@ -843,7 +844,7 @@ intel_miptree_create_for_renderbuffer(struct brw_context *brw, if (!mt) goto fail; - if (brw_is_hiz_depth_format(brw, format)) { + if (intel_miptree_wants_hiz_buffer(brw, mt)) { ok = intel_miptree_alloc_hiz(brw, mt); if (!ok) goto fail; @@ -1681,6 +1682,27 @@ intel_hiz_miptree_buf_create(struct brw_context *brw, return buf; } +bool +intel_miptree_wants_hiz_buffer(struct brw_context *brw, + struct intel_mipmap_tree *mt) +{ + if (!brw->has_hiz) + return false; + + if (mt->hiz_buf != NULL) + return false; + + switch (mt->format) { + case MESA_FORMAT_Z_FLOAT32: + case MESA_FORMAT_Z32_FLOAT_S8X24_UINT: + case MESA_FORMAT_Z24_UNORM_X8_UINT: + case MESA_FORMAT_Z24_UNORM_S8_UINT: + case MESA_FORMAT_Z_UNORM16: + return true; + default: + return false; + } +} bool intel_miptree_alloc_hiz(struct brw_context *brw, diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h index 3c41893..0cb64d2 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h @@ -632,12 +632,15 @@ intel_miptree_copy_teximage(struct brw_context *brw, * functions on a miptree without HiZ. In that case, each function is a no-op. */ +bool +intel_miptree_wants_hiz_buffer(struct brw_context *brw, + struct intel_mipmap_tree *mt); + /** * \brief Allocate the miptree's embedded HiZ miptree. * \see intel_mipmap_tree:hiz_mt * \return false if allocation failed */ - bool intel_miptree_alloc_hiz(struct brw_context *brw, struct intel_mipmap_tree *mt); From chadversary at kemper.freedesktop.org Mon Apr 13 14:41:31 2015 From: chadversary at kemper.freedesktop.org (Chad Versace) Date: Mon, 13 Apr 2015 07:41:31 -0700 (PDT) Subject: Mesa (master): i965: Add field intel_mipmap_tree::disable_aux_buffers Message-ID: <20150413144131.CCD73761EA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: d3b042f359df5836d4a4f56664eb228fc80772c0 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d3b042f359df5836d4a4f56664eb228fc80772c0 Author: Chad Versace Date: Mon Apr 6 06:46:09 2015 -0700 i965: Add field intel_mipmap_tree::disable_aux_buffers The new field disables allocation of auxiliary buffers, such as the HiZ buffer and MCS buffer. This is useful for sharing the miptree bo with an external client that doesn't understand auxiliary buffers. We need this field to safely render to a buffer that was imported with EGL_EXT_image_dma_buf_import, because EGL does not yet have extensions to manage flushing and invalidating auxiliary buffers. Nothing yet enables this field. That's left to follow-up patches. Testing: - Tested on Ivybridge Chromebook Pixel with WebGL Aquarium and YouTube. - No Piglit regressions on Broadwell with `piglit run -p gbm tests/quick.py`. Reviewed-by: Kenneth Graunke Reviewed-by: Tapani P?lli Reviewed-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 24 ++++++++++++++++++++++-- src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 7 +++++++ 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 492338b..ec0bb19 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -59,7 +59,8 @@ intel_miptree_alloc_mcs(struct brw_context *brw, * created, based on the chip generation and the surface type. */ static enum intel_msaa_layout -compute_msaa_layout(struct brw_context *brw, mesa_format format, GLenum target) +compute_msaa_layout(struct brw_context *brw, mesa_format format, GLenum target, + bool disable_aux_buffers) { /* Prior to Gen7, all MSAA surfaces used IMS layout. */ if (brw->gen < 7) @@ -85,6 +86,11 @@ compute_msaa_layout(struct brw_context *brw, mesa_format format, GLenum target) */ if (brw->gen == 7 && _mesa_get_format_datatype(format) == GL_INT) { return INTEL_MSAA_LAYOUT_UMS; + } else if (disable_aux_buffers) { + /* We can't use the CMS layout because it uses an aux buffer, the MCS + * buffer. So fallback to UMS, which is identical to CMS without the + * MCS. */ + return INTEL_MSAA_LAYOUT_UMS; } else { return INTEL_MSAA_LAYOUT_CMS; } @@ -176,6 +182,9 @@ intel_is_non_msrt_mcs_buffer_supported(struct brw_context *brw, if (brw->gen < 7) return false; + if (mt->disable_aux_buffers) + return false; + /* MCS is only supported for color buffers */ switch (_mesa_get_format_base_format(mt->format)) { case GL_DEPTH_COMPONENT: @@ -276,6 +285,7 @@ intel_miptree_create_layout(struct brw_context *brw, mt->logical_height0 = height0; mt->logical_depth0 = depth0; mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS; + mt->disable_aux_buffers = false; /* hardcoded for now */ exec_list_make_empty(&mt->hiz_map); /* The cpp is bytes per (1, blockheight)-sized block for compressed @@ -293,7 +303,8 @@ intel_miptree_create_layout(struct brw_context *brw, if (num_samples > 1) { /* Adjust width/height/depth for MSAA */ - mt->msaa_layout = compute_msaa_layout(brw, format, mt->target); + mt->msaa_layout = compute_msaa_layout(brw, format, + mt->target, mt->disable_aux_buffers); if (mt->msaa_layout == INTEL_MSAA_LAYOUT_IMS) { /* From the Ivybridge PRM, Volume 1, Part 1, page 108: * "If the surface is multisampled and it is a depth or stencil @@ -440,6 +451,9 @@ intel_miptree_create_layout(struct brw_context *brw, brw_miptree_layout(brw, mt); + if (mt->disable_aux_buffers) + assert(mt->msaa_layout != INTEL_MSAA_LAYOUT_CMS); + return mt; } @@ -1313,6 +1327,7 @@ intel_miptree_alloc_mcs(struct brw_context *brw, { assert(brw->gen >= 7); /* MCS only used on Gen7+ */ assert(mt->mcs_mt == NULL); + assert(!mt->disable_aux_buffers); /* Choose the correct format for the MCS buffer. All that really matters * is that we allocate the right buffer size, since we'll always be @@ -1379,6 +1394,7 @@ intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw, struct intel_mipmap_tree *mt) { assert(mt->mcs_mt == NULL); + assert(!mt->disable_aux_buffers); /* The format of the MCS buffer is opaque to the driver; all that matters * is that we get its size and pitch right. We'll pretend that the format @@ -1692,6 +1708,9 @@ intel_miptree_wants_hiz_buffer(struct brw_context *brw, if (mt->hiz_buf != NULL) return false; + if (mt->disable_aux_buffers) + return false; + switch (mt->format) { case MESA_FORMAT_Z_FLOAT32: case MESA_FORMAT_Z32_FLOAT_S8X24_UINT: @@ -1709,6 +1728,7 @@ intel_miptree_alloc_hiz(struct brw_context *brw, struct intel_mipmap_tree *mt) { assert(mt->hiz_buf == NULL); + assert(!mt->disable_aux_buffers); if (brw->gen == 7) { mt->hiz_buf = intel_gen7_hiz_buf_create(brw, mt); diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h index 0cb64d2..3dd37883 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h @@ -492,6 +492,13 @@ struct intel_mipmap_tree */ uint32_t fast_clear_color_value; + /** + * Disable allocation of auxiliary buffers, such as the HiZ buffer and MCS + * buffer. This is useful for sharing the miptree bo with an external client + * that doesn't understand auxiliary buffers. + */ + bool disable_aux_buffers; + /* These are also refcounted: */ GLuint refcount; From chadversary at kemper.freedesktop.org Mon Apr 13 14:41:31 2015 From: chadversary at kemper.freedesktop.org (Chad Versace) Date: Mon, 13 Apr 2015 07:41:31 -0700 (PDT) Subject: Mesa (master): i965: Change intel_miptree_create_for_bo() signature Message-ID: <20150413144131.DA49E761EA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: bf504b61274123f09720c80569a8b4f2d3495630 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=bf504b61274123f09720c80569a8b4f2d3495630 Author: Chad Versace Date: Mon Apr 6 08:11:43 2015 -0700 i965: Change intel_miptree_create_for_bo() signature Add parameter 'bool disable_aux_buffers'. This is a refactor patch. The patch changes no behavior because the new parameter is false in every call. Reviewed-by: Kenneth Graunke Reviewed-by: Tapani P?lli Reviewed-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/intel_fbo.c | 3 ++- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 17 +++++++++++------ src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 3 ++- src/mesa/drivers/dri/i965/intel_pixel_draw.c | 3 ++- src/mesa/drivers/dri/i965/intel_tex.c | 3 ++- src/mesa/drivers/dri/i965/intel_tex_image.c | 3 ++- 6 files changed, 21 insertions(+), 11 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c index 7babd29..4c38583 100644 --- a/src/mesa/drivers/dri/i965/intel_fbo.c +++ b/src/mesa/drivers/dri/i965/intel_fbo.c @@ -390,7 +390,8 @@ intel_image_target_renderbuffer_storage(struct gl_context *ctx, image->width, image->height, 1, - image->pitch); + image->pitch, + false /*disable_aux_buffers*/); if (!irb->mt) return; diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index ec0bb19..c0a3452 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -246,7 +246,8 @@ intel_miptree_create_layout(struct brw_context *brw, GLuint depth0, bool for_bo, GLuint num_samples, - bool force_all_slices_at_each_lod) + bool force_all_slices_at_each_lod, + bool disable_aux_buffers) { struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1); if (!mt) @@ -285,7 +286,7 @@ intel_miptree_create_layout(struct brw_context *brw, mt->logical_height0 = height0; mt->logical_depth0 = depth0; mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS; - mt->disable_aux_buffers = false; /* hardcoded for now */ + mt->disable_aux_buffers = disable_aux_buffers; exec_list_make_empty(&mt->hiz_map); /* The cpp is bytes per (1, blockheight)-sized block for compressed @@ -629,7 +630,8 @@ intel_miptree_create(struct brw_context *brw, first_level, last_level, width0, height0, depth0, false, num_samples, - force_all_slices_at_each_lod); + force_all_slices_at_each_lod, + false /*disable_aux_buffers*/); /* * pitch == 0 || height == 0 indicates the null texture */ @@ -720,7 +722,8 @@ intel_miptree_create_for_bo(struct brw_context *brw, uint32_t width, uint32_t height, uint32_t depth, - int pitch) + int pitch, + bool disable_aux_buffers) { struct intel_mipmap_tree *mt; uint32_t tiling, swizzle; @@ -744,7 +747,8 @@ intel_miptree_create_for_bo(struct brw_context *brw, mt = intel_miptree_create_layout(brw, target, format, 0, 0, width, height, depth, - true, 0, false); + true, 0, false, + disable_aux_buffers); if (!mt) { free(mt); return mt; @@ -795,7 +799,8 @@ intel_update_winsys_renderbuffer_miptree(struct brw_context *intel, width, height, 1, - pitch); + pitch, + false); if (!singlesample_mt) goto fail; diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h index 3dd37883..0796059 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h @@ -544,7 +544,8 @@ intel_miptree_create_for_bo(struct brw_context *brw, uint32_t width, uint32_t height, uint32_t depth, - int pitch); + int pitch, + bool disable_aux_buffers); void intel_update_winsys_renderbuffer_miptree(struct brw_context *intel, diff --git a/src/mesa/drivers/dri/i965/intel_pixel_draw.c b/src/mesa/drivers/dri/i965/intel_pixel_draw.c index e0904de..055ab42 100644 --- a/src/mesa/drivers/dri/i965/intel_pixel_draw.c +++ b/src/mesa/drivers/dri/i965/intel_pixel_draw.c @@ -111,7 +111,8 @@ do_blit_drawpixels(struct gl_context * ctx, irb->mt->format, src_offset, width, height, 1, - src_stride); + src_stride, + false /*disable_aux_buffers*/); if (!pbo_mt) return false; diff --git a/src/mesa/drivers/dri/i965/intel_tex.c b/src/mesa/drivers/dri/i965/intel_tex.c index 2d3009a..3335fd1 100644 --- a/src/mesa/drivers/dri/i965/intel_tex.c +++ b/src/mesa/drivers/dri/i965/intel_tex.c @@ -340,7 +340,8 @@ intel_set_texture_storage_for_buffer_object(struct gl_context *ctx, image->TexFormat, buffer_offset, image->Width, image->Height, image->Depth, - row_stride); + row_stride, + false /*disable_aux_buffers*/); if (!intel_texobj->mt) return false; diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c index 00bf9ce..c581010 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_image.c +++ b/src/mesa/drivers/dri/i965/intel_tex_image.c @@ -169,7 +169,8 @@ intel_set_texture_image_bo(struct gl_context *ctx, ctx->Driver.FreeTextureImageBuffer(ctx, image); intel_image->mt = intel_miptree_create_for_bo(brw, bo, image->TexFormat, - 0, width, height, 1, pitch); + 0, width, height, 1, pitch, + false /*disable_aux_buffers*/); if (intel_image->mt == NULL) return; intel_image->mt->target = target; From chadversary at kemper.freedesktop.org Mon Apr 13 14:41:31 2015 From: chadversary at kemper.freedesktop.org (Chad Versace) Date: Mon, 13 Apr 2015 07:41:31 -0700 (PDT) Subject: Mesa (master): i965: Lift some restrictions on dma_buf EGLImages Message-ID: <20150413144131.EF431761EA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: a76dc15b2b37db18151b42be63b49438588a92fe URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a76dc15b2b37db18151b42be63b49438588a92fe Author: Chad Versace Date: Thu Apr 9 20:29:59 2015 -0700 i965: Lift some restrictions on dma_buf EGLImages Allow glEGLImageTargetRenderbufferStorageOES and glEGLImageTargetTexture2DOES for dma_buf EGLImages if the image is a single RGBA8 unorm plane. This is safe, despite fast color clears, because i965 disables allocation of auxiliary buffers for EGLImages. Chrome OS needs this, because its compositor uses dma_buf EGLImages for its scanout buffers. Testing: - Tested on Ivybridge Chromebook Pixel with WebGL Aquarium and YouTube. - No Piglit regressions on Broadwell with `piglit run -p gbm tests/quick.py`, with my Piglit patches that update the EGL_EXT_image_dma_buf_import tests. Reviewed-by: Kenneth Graunke Reviewed-by: Tapani P?lli Reviewed-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/intel_fbo.c | 7 ------- src/mesa/drivers/dri/i965/intel_image.h | 10 +++++----- src/mesa/drivers/dri/i965/intel_tex_image.c | 12 ++---------- 3 files changed, 7 insertions(+), 22 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c index fb26038..8a398f7 100644 --- a/src/mesa/drivers/dri/i965/intel_fbo.c +++ b/src/mesa/drivers/dri/i965/intel_fbo.c @@ -363,13 +363,6 @@ intel_image_target_renderbuffer_storage(struct gl_context *ctx, return; } - /* Buffers originating from outside are for read-only. */ - if (image->dma_buf_imported) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glEGLImageTargetRenderbufferStorage(dma buffers are read-only)"); - return; - } - /* __DRIimage is opaque to the core so it has to be checked here */ switch (image->format) { case MESA_FORMAT_R8G8B8A8_UNORM: diff --git a/src/mesa/drivers/dri/i965/intel_image.h b/src/mesa/drivers/dri/i965/intel_image.h index 0cae711..a82cf3b 100644 --- a/src/mesa/drivers/dri/i965/intel_image.h +++ b/src/mesa/drivers/dri/i965/intel_image.h @@ -89,18 +89,18 @@ struct __DRIimageRec { GLuint tile_y; bool has_depthstencil; + /** The image was created with EGL_EXT_image_dma_buf_import. */ + bool dma_buf_imported; + /** * Provided by EGL_EXT_image_dma_buf_import. - * - * The flag is set in order to restrict the use of the image later on. - * - * See intel_image_target_texture_2d() + * \{ */ - bool dma_buf_imported; enum __DRIYUVColorSpace yuv_color_space; enum __DRISampleRange sample_range; enum __DRIChromaSiting horizontal_siting; enum __DRIChromaSiting vertical_siting; + /* \} */ void *data; }; diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c index 290d313..b70f8de 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_image.c +++ b/src/mesa/drivers/dri/i965/intel_tex_image.c @@ -320,17 +320,9 @@ intel_image_target_texture_2d(struct gl_context *ctx, GLenum target, if (image == NULL) return; - /** - * Images originating via EGL_EXT_image_dma_buf_import can be used only - * with GL_OES_EGL_image_external only. + /* We support external textures only for EGLImages created with + * EGL_EXT_image_dma_buf_import. We may lift that restriction in the future. */ - if (image->dma_buf_imported && target != GL_TEXTURE_EXTERNAL_OES) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glEGLImageTargetTexture2DOES(dma buffers can be used with " - "GL_OES_EGL_image_external only"); - return; - } - if (target == GL_TEXTURE_EXTERNAL_OES && !image->dma_buf_imported) { _mesa_error(ctx, GL_INVALID_OPERATION, "glEGLImageTargetTexture2DOES(external target is enabled only " From chadversary at kemper.freedesktop.org Mon Apr 13 14:41:31 2015 From: chadversary at kemper.freedesktop.org (Chad Versace) Date: Mon, 13 Apr 2015 07:41:31 -0700 (PDT) Subject: Mesa (master): i965: Declare intel_miptree_create_layout() as static Message-ID: <20150413144131.B2EA7761EA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 5776d65114b553643eea74c58699910cbdb29b55 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5776d65114b553643eea74c58699910cbdb29b55 Author: Chad Versace Date: Mon Apr 6 07:13:53 2015 -0700 i965: Declare intel_miptree_create_layout() as static It's not used outside intel_mipmap_tree.c. Reviewed-by: Kenneth Graunke Reviewed-by: Tapani P?lli Reviewed-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 2 +- src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 13 ------------- 2 files changed, 1 insertion(+), 14 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index f766b96..a906460 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -226,7 +226,7 @@ intel_depth_format_for_depthstencil_format(mesa_format format) { * intel_miptree_create_for_bo(). If true, then do not create * \c stencil_mt. */ -struct intel_mipmap_tree * +static struct intel_mipmap_tree * intel_miptree_create_layout(struct brw_context *brw, GLenum target, mesa_format format, diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h index e3e2127..3c41893 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h @@ -530,19 +530,6 @@ struct intel_mipmap_tree *intel_miptree_create(struct brw_context *brw, bool force_all_slices_at_each_lod); struct intel_mipmap_tree * -intel_miptree_create_layout(struct brw_context *brw, - GLenum target, - mesa_format format, - GLuint first_level, - GLuint last_level, - GLuint width0, - GLuint height0, - GLuint depth0, - bool for_bo, - GLuint num_samples, - bool force_all_slices_at_each_lod); - -struct intel_mipmap_tree * intel_miptree_create_for_bo(struct brw_context *brw, drm_intel_bo *bo, mesa_format format, From chadversary at kemper.freedesktop.org Mon Apr 13 14:41:31 2015 From: chadversary at kemper.freedesktop.org (Chad Versace) Date: Mon, 13 Apr 2015 07:41:31 -0700 (PDT) Subject: Mesa (master): i965: Disable aux buffers for EGLImage-backed miptrees Message-ID: <20150413144131.E4A30761EA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 2943b15ce7ce1bc29424949124a69538253008f7 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=2943b15ce7ce1bc29424949124a69538253008f7 Author: Chad Versace Date: Mon Apr 6 08:07:27 2015 -0700 i965: Disable aux buffers for EGLImage-backed miptrees EGL does not yet have extensions to manage the flushing and invalidating of driver-internal aux buffers. So we must disable aux buffers of dma_buf-backed EGLImages in order to safely render into them. This patch is obviously needed for renderbufers. It's also needed for textures because the user can attach the texture to a framebuffer and because the driver sometimes renders to textures for internal reasons. Testing: - Tested on Ivybridge Chromebook Pixel with WebGL Aquarium and YouTube. - No Piglit regressions on Broadwell with `piglit run -p gbm tests/quick.py`. Reviewed-by: Kenneth Graunke Reviewed-by: Tapani P?lli Reviewed-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/intel_fbo.c | 8 +++++++- src/mesa/drivers/dri/i965/intel_tex_image.c | 16 ++++++++++++---- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c index 4c38583..fb26038 100644 --- a/src/mesa/drivers/dri/i965/intel_fbo.c +++ b/src/mesa/drivers/dri/i965/intel_fbo.c @@ -383,6 +383,12 @@ intel_image_target_renderbuffer_storage(struct gl_context *ctx, irb = intel_renderbuffer(rb); intel_miptree_release(&irb->mt); + + /* Disable creation of the miptree's aux buffers because the driver exposes + * no EGL API to manage them. That is, there is no API for resolving the aux + * buffer's content to the main buffer nor for invalidating the aux buffer's + * content. + */ irb->mt = intel_miptree_create_for_bo(brw, image->bo, image->format, @@ -391,7 +397,7 @@ intel_image_target_renderbuffer_storage(struct gl_context *ctx, image->height, 1, image->pitch, - false /*disable_aux_buffers*/); + true /*disable_aux_buffers*/); if (!irb->mt) return; diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c index c581010..290d313 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_image.c +++ b/src/mesa/drivers/dri/i965/intel_tex_image.c @@ -154,7 +154,8 @@ intel_set_texture_image_bo(struct gl_context *ctx, uint32_t offset, GLuint width, GLuint height, GLuint pitch, - GLuint tile_x, GLuint tile_y) + GLuint tile_x, GLuint tile_y, + bool disable_aux_buffers) { struct brw_context *brw = brw_context(ctx); struct intel_texture_image *intel_image = intel_texture_image(image); @@ -170,7 +171,7 @@ intel_set_texture_image_bo(struct gl_context *ctx, intel_image->mt = intel_miptree_create_for_bo(brw, bo, image->TexFormat, 0, width, height, 1, pitch, - false /*disable_aux_buffers*/); + disable_aux_buffers); if (intel_image->mt == NULL) return; intel_image->mt->target = target; @@ -254,7 +255,8 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, rb->Base.Base.Width, rb->Base.Base.Height, rb->mt->pitch, - 0, 0); + 0, 0, + false /*disable_aux_buffers*/); _mesa_unlock_texture(&brw->ctx, texObj); } @@ -344,12 +346,18 @@ intel_image_target_texture_2d(struct gl_context *ctx, GLenum target, return; } + /* Disable creation of the texture's aux buffers because the driver exposes + * no EGL API to manage them. That is, there is no API for resolving the aux + * buffer's content to the main buffer nor for invalidating the aux buffer's + * content. + */ intel_set_texture_image_bo(ctx, texImage, image->bo, target, image->internal_format, image->format, image->offset, image->width, image->height, image->pitch, - image->tile_x, image->tile_y); + image->tile_x, image->tile_y, + true /*disable_aux_buffers*/); } /** From mattst88 at kemper.freedesktop.org Mon Apr 13 16:07:47 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Mon, 13 Apr 2015 09:07:47 -0700 (PDT) Subject: Mesa (master): i965: Don't bother freeing NULL. Message-ID: <20150413160747.99822761EA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 1c9db39d54508608ea9518bc82eacbd8e27c410c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1c9db39d54508608ea9518bc82eacbd8e27c410c Author: Matt Turner Date: Sat Apr 11 10:05:31 2015 -0700 i965: Don't bother freeing NULL. Commit e16c5c90 was replacing 'region' with 'mt', leaving this nonsensical code. Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index c0a3452..0424a87 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -749,10 +749,8 @@ intel_miptree_create_for_bo(struct brw_context *brw, width, height, depth, true, 0, false, disable_aux_buffers); - if (!mt) { - free(mt); - return mt; - } + if (!mt) + return NULL; drm_intel_bo_reference(bo); mt->bo = bo; From mattst88 at kemper.freedesktop.org Mon Apr 13 16:07:47 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Mon, 13 Apr 2015 09:07:47 -0700 (PDT) Subject: Mesa (master): swrast: Mark MAX_GLUINT literal with u suffix. Message-ID: <20150413160747.A0518761EB@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 89b140dfaeacb8fb0a784c8dd7da26b0d14189e8 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=89b140dfaeacb8fb0a784c8dd7da26b0d14189e8 Author: Matt Turner Date: Sat Apr 11 10:14:00 2015 -0700 swrast: Mark MAX_GLUINT literal with u suffix. Coverity is confused by the "float < int / 2" expression and suggests casting MAX_GLUINT to unsigned, which I believe it was supposed to have been already. Reviewed-by: Brian Paul --- src/mesa/swrast/s_tritemp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/swrast/s_tritemp.h b/src/mesa/swrast/s_tritemp.h index fb73b2d..3cd1b44 100644 --- a/src/mesa/swrast/s_tritemp.h +++ b/src/mesa/swrast/s_tritemp.h @@ -92,7 +92,7 @@ #ifndef MAX_GLUINT -#define MAX_GLUINT 0xffffffff +#define MAX_GLUINT 0xffffffffu #endif From anholt at kemper.freedesktop.org Mon Apr 13 17:41:18 2015 From: anholt at kemper.freedesktop.org (Eric Anholt) Date: Mon, 13 Apr 2015 10:41:18 -0700 (PDT) Subject: Mesa (master): vc4: Skip sending down the clear colors if not clearing. Message-ID: <20150413174118.D1A42761EA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 5100221ff705019334fcdc17da99d257224d2aff URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5100221ff705019334fcdc17da99d257224d2aff Author: Eric Anholt Date: Mon Apr 6 15:19:30 2015 -0700 vc4: Skip sending down the clear colors if not clearing. --- src/gallium/drivers/vc4/vc4_context.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c index 9b2ee5c..daa5ba5 100644 --- a/src/gallium/drivers/vc4/vc4_context.c +++ b/src/gallium/drivers/vc4/vc4_context.c @@ -129,11 +129,13 @@ vc4_setup_rcl(struct vc4_context *vc4) branch_size + color_store_size)); - cl_u8(&vc4->rcl, VC4_PACKET_CLEAR_COLORS); - cl_u32(&vc4->rcl, vc4->clear_color[0]); - cl_u32(&vc4->rcl, vc4->clear_color[1]); - cl_u32(&vc4->rcl, vc4->clear_depth); - cl_u8(&vc4->rcl, vc4->clear_stencil); + if (vc4->cleared) { + cl_u8(&vc4->rcl, VC4_PACKET_CLEAR_COLORS); + cl_u32(&vc4->rcl, vc4->clear_color[0]); + cl_u32(&vc4->rcl, vc4->clear_color[1]); + cl_u32(&vc4->rcl, vc4->clear_depth); + cl_u8(&vc4->rcl, vc4->clear_stencil); + } /* The rendering mode config determines the pointer that's used for * VC4_PACKET_STORE_MS_TILE_BUFFER address computations. The kernel From anholt at kemper.freedesktop.org Mon Apr 13 17:41:18 2015 From: anholt at kemper.freedesktop.org (Eric Anholt) Date: Mon, 13 Apr 2015 10:41:18 -0700 (PDT) Subject: Mesa (master): vc4: Remove dead fields from vc4_surface. Message-ID: <20150413174118.DA6B9761EA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 39b6f7e76c909505df8590b6414e8f710121108a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=39b6f7e76c909505df8590b6414e8f710121108a Author: Eric Anholt Date: Thu Apr 9 13:13:23 2015 -0700 vc4: Remove dead fields from vc4_surface. --- src/gallium/drivers/vc4/vc4_resource.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_resource.h b/src/gallium/drivers/vc4/vc4_resource.h index b2359f0..2ed848b 100644 --- a/src/gallium/drivers/vc4/vc4_resource.h +++ b/src/gallium/drivers/vc4/vc4_resource.h @@ -46,9 +46,6 @@ struct vc4_surface { struct pipe_surface base; uint32_t offset; uint32_t stride; - uint32_t width; - uint16_t height; - uint16_t depth; uint8_t tiling; }; From anholt at kemper.freedesktop.org Mon Apr 13 17:41:18 2015 From: anholt at kemper.freedesktop.org (Eric Anholt) Date: Mon, 13 Apr 2015 10:41:18 -0700 (PDT) Subject: Mesa (master): vc4: Fix another space allocation mistake. Message-ID: <20150413174118.BE1D9761EA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: cb88d2cfcb1f4444d1ec351277e8b662cda81a5e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=cb88d2cfcb1f4444d1ec351277e8b662cda81a5e Author: Eric Anholt Date: Thu Apr 9 13:05:00 2015 -0700 vc4: Fix another space allocation mistake. We're over-allocating our BCL in vc4_draw.c, so this never mattered. However, new RCL-only blit support might end up here without having set up any BCL contents. --- src/gallium/drivers/vc4/vc4_context.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c index 1859dd6..9b2ee5c 100644 --- a/src/gallium/drivers/vc4/vc4_context.c +++ b/src/gallium/drivers/vc4/vc4_context.c @@ -349,6 +349,7 @@ vc4_flush(struct pipe_context *pctx) * unblocking the render thread. Note that this doesn't act until the * FLUSH completes. */ + cl_ensure_space(&vc4->bcl, 8); cl_u8(&vc4->bcl, VC4_PACKET_INCREMENT_SEMAPHORE); /* The FLUSH caps all of our bin lists with a VC4_PACKET_RETURN. */ cl_u8(&vc4->bcl, VC4_PACKET_FLUSH); From anholt at kemper.freedesktop.org Mon Apr 13 17:41:18 2015 From: anholt at kemper.freedesktop.org (Eric Anholt) Date: Mon, 13 Apr 2015 10:41:18 -0700 (PDT) Subject: Mesa (master): vc4: Use the blit interface for updating shadow textures. Message-ID: <20150413174118.E3C67761EA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: adae027260bedc7af73e5cc7a74af3cafa4ab460 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=adae027260bedc7af73e5cc7a74af3cafa4ab460 Author: Eric Anholt Date: Wed Apr 8 13:11:01 2015 -0700 vc4: Use the blit interface for updating shadow textures. This lets us plug in a better blit implementation and have it impact the shadow update, too. --- src/gallium/drivers/vc4/vc4_resource.c | 44 ++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c index cbb334f..10e1d6c 100644 --- a/src/gallium/drivers/vc4/vc4_resource.c +++ b/src/gallium/drivers/vc4/vc4_resource.c @@ -651,20 +651,38 @@ vc4_update_shadow_baselevel_texture(struct pipe_context *pctx, return; for (int i = 0; i <= shadow->base.b.last_level; i++) { - struct pipe_box box = { - .x = 0, - .y = 0, - .z = 0, - .width = u_minify(shadow->base.b.width0, i), - .height = u_minify(shadow->base.b.height0, i), - .depth = 1, + unsigned width = u_minify(shadow->base.b.width0, i); + unsigned height = u_minify(shadow->base.b.height0, i); + struct pipe_blit_info info = { + .dst = { + .resource = &shadow->base.b, + .level = i, + .box = { + .x = 0, + .y = 0, + .z = 0, + .width = width, + .height = height, + .depth = 1, + }, + .format = shadow->base.b.format, + }, + .src = { + .resource = &orig->base.b, + .level = view->u.tex.first_level + i, + .box = { + .x = 0, + .y = 0, + .z = 0, + .width = width, + .height = height, + .depth = 1, + }, + .format = orig->base.b.format, + }, + .mask = ~0, }; - - util_resource_copy_region(pctx, - &shadow->base.b, i, 0, 0, 0, - &orig->base.b, - view->u.tex.first_level + i, - &box); + pctx->blit(pctx, &info); } shadow->writes = orig->writes; From anholt at kemper.freedesktop.org Mon Apr 13 17:41:18 2015 From: anholt at kemper.freedesktop.org (Eric Anholt) Date: Mon, 13 Apr 2015 10:41:18 -0700 (PDT) Subject: Mesa (master): vc4: Add missed accounting for the size of the semaphore. Message-ID: <20150413174118.B23F1761EA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 8eb9304ee74b7f4a3ef9f8ac9cb04f3031a61ded URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8eb9304ee74b7f4a3ef9f8ac9cb04f3031a61ded Author: Eric Anholt Date: Mon Apr 6 15:15:37 2015 -0700 vc4: Add missed accounting for the size of the semaphore. This wouldn't have mattered except in the worst case scenario RCL setup. --- src/gallium/drivers/vc4/vc4_context.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c index ed10f7a..1859dd6 100644 --- a/src/gallium/drivers/vc4/vc4_context.c +++ b/src/gallium/drivers/vc4/vc4_context.c @@ -118,10 +118,12 @@ vc4_setup_rcl(struct vc4_context *vc4) uint32_t tilecoords_size = 3; uint32_t branch_size = 5 + reloc_size; uint32_t color_store_size = 1; + uint32_t semaphore_size = 1; cl_ensure_space(&vc4->rcl, clear_size + config_size + loadstore_size + + semaphore_size + xtiles * ytiles * (loadstore_size * 4 + tilecoords_size * 3 + branch_size + From anholt at kemper.freedesktop.org Mon Apr 13 17:41:18 2015 From: anholt at kemper.freedesktop.org (Eric Anholt) Date: Mon, 13 Apr 2015 10:41:18 -0700 (PDT) Subject: Mesa (master): vc4: Sync with kernel changes to relax BCL versus RCL validation. Message-ID: <20150413174118.C8A1E761EA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 725620f21d19365d7a8a34d0c72694384c680afc URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=725620f21d19365d7a8a34d0c72694384c680afc Author: Eric Anholt Date: Thu Apr 9 13:41:29 2015 -0700 vc4: Sync with kernel changes to relax BCL versus RCL validation. There was no reason to tie the two packets' values together. --- src/gallium/drivers/vc4/kernel/vc4_validate.c | 25 +++---------------------- 1 file changed, 3 insertions(+), 22 deletions(-) diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate.c b/src/gallium/drivers/vc4/kernel/vc4_validate.c index 568b625..6b73587 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_validate.c +++ b/src/gallium/drivers/vc4/kernel/vc4_validate.c @@ -589,21 +589,6 @@ validate_tile_rendering_mode_config(VALIDATE_ARGS) exec->fb_width = *(uint16_t *)(untrusted + 4); exec->fb_height = *(uint16_t *)(untrusted + 6); - /* Make sure that the fb width/height matches the binning config -- we - * rely on being able to interchange these for various assertions. - * (Within a tile, loads and stores will be clipped to the - * width/height, but we allow load/storing to any binned tile). - */ - if (exec->fb_width <= (exec->bin_tiles_x - 1) * 64 || - exec->fb_width > exec->bin_tiles_x * 64 || - exec->fb_height <= (exec->bin_tiles_y - 1) * 64 || - exec->fb_height > exec->bin_tiles_y * 64) { - DRM_ERROR("bin config %dx%d doesn't match FB %dx%d\n", - exec->bin_tiles_x, exec->bin_tiles_y, - exec->fb_width, exec->fb_height); - return -EINVAL; - } - flags = *(uint16_t *)(untrusted + 8); if ((flags & VC4_RENDER_CONFIG_FORMAT_MASK) == VC4_RENDER_CONFIG_FORMAT_RGBA8888) { @@ -632,13 +617,9 @@ validate_tile_coordinates(VALIDATE_ARGS) uint8_t tile_x = *(uint8_t *)(untrusted + 0); uint8_t tile_y = *(uint8_t *)(untrusted + 1); - if (tile_x >= exec->bin_tiles_x || - tile_y >= exec->bin_tiles_y) { - DRM_ERROR("Tile coordinates %d,%d > bin config %d,%d\n", - tile_x, - tile_y, - exec->bin_tiles_x, - exec->bin_tiles_y); + if (tile_x * 64 >= exec->fb_width || tile_y * 64 >= exec->fb_height) { + DRM_ERROR("Tile coordinates %d,%d > render config %dx%d\n", + tile_x, tile_y, exec->fb_width, exec->fb_height); return -EINVAL; } From frohlich at kemper.freedesktop.org Mon Apr 13 18:34:00 2015 From: frohlich at kemper.freedesktop.org (Mathias Fröhlich) Date: Mon, 13 Apr 2015 11:34:00 -0700 (PDT) Subject: Mesa (master): i965: Flush batchbuffer containing the query on glQueryCounter. Message-ID: <20150413183400.B31D9761EA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 1e1d5456ba3dff82301ad4bbdde2fb6e2f562fe3 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1e1d5456ba3dff82301ad4bbdde2fb6e2f562fe3 Author: Mathias Froehlich Date: Sun Apr 12 18:23:58 2015 +0200 i965: Flush batchbuffer containing the query on glQueryCounter. This change fixes a regression with timer queries introduced with commit 3eb6258. There the pending batchbuffer is flushed only if glEndQuery is executed. This present change adds such a flush to glQueryCounter which also schedules a value query just like glEndQuery does. The patch fixes GPU timer queries going mad from within osgviewer. Reviewed-by: Kenneth Graunke Signed-off-by: Mathias Froehlich Cc: mesa-stable at lists.freedesktop.org --- src/mesa/drivers/dri/i965/brw_queryobj.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c index 917a24f..667c900 100644 --- a/src/mesa/drivers/dri/i965/brw_queryobj.c +++ b/src/mesa/drivers/dri/i965/brw_queryobj.c @@ -472,6 +472,8 @@ brw_query_counter(struct gl_context *ctx, struct gl_query_object *q) drm_intel_bo_unreference(query->bo); query->bo = drm_intel_bo_alloc(brw->bufmgr, "timestamp query", 4096, 4096); brw_write_timestamp(brw, query->bo, 0); + + query->flushed = false; } /** From airlied at kemper.freedesktop.org Mon Apr 13 22:05:42 2015 From: airlied at kemper.freedesktop.org (Dave Airlie) Date: Mon, 13 Apr 2015 15:05:42 -0700 (PDT) Subject: Mesa (master): st/mesa: convert sub image for cube map arrays to 2d arrays for upload Message-ID: <20150413220542.DAA88761EA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 5ed79312ed99f3b141c35569b9767f82f5ba0a93 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5ed79312ed99f3b141c35569b9767f82f5ba0a93 Author: Dave Airlie Date: Wed Apr 8 10:59:20 2015 +1000 st/mesa: convert sub image for cube map arrays to 2d arrays for upload Since we can subimage upload a number of cube map array layers, that aren't a complete cube map array, we should specify things as a 2D array and blit from that. Suggested by Ilia Mirkin as an alternate fix for texsubimage cube map array issues. seems to work just as well. Cc: mesa-stable at lists.freedesktop.org Reviewed-by: Marek Ol??k Signed-off-by: Dave Airlie --- src/mesa/state_tracker/st_cb_texture.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 5c520b4..353f80d 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -738,6 +738,11 @@ st_TexSubImage(struct gl_context *ctx, GLuint dims, if (gl_target == GL_TEXTURE_CUBE_MAP) { gl_target = GL_TEXTURE_2D; } + /* TexSubImage can specify subsets of cube map array faces + * so we need to upload via 2D array instead */ + if (gl_target == GL_TEXTURE_CUBE_MAP_ARRAY) { + gl_target = GL_TEXTURE_2D_ARRAY; + } /* Initialize the source texture description. */ memset(&src_templ, 0, sizeof(src_templ)); From airlied at kemper.freedesktop.org Mon Apr 13 22:05:42 2015 From: airlied at kemper.freedesktop.org (Dave Airlie) Date: Mon, 13 Apr 2015 15:05:42 -0700 (PDT) Subject: Mesa (master): st/mesa: align cube map arrays layers Message-ID: <20150413220542.E299F761EB@kemper.freedesktop.org> Module: Mesa Branch: master Commit: cc5860e40787b3afe36856674f028e830685271b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=cc5860e40787b3afe36856674f028e830685271b Author: Dave Airlie Date: Wed Apr 8 10:00:27 2015 +1000 st/mesa: align cube map arrays layers We create textures internally for texsubimage, and we use the values from sub image to create a new texture, however we don't align these to valid sizes, and cube map arrays must have an array size aligned to 6. This fixes texsubimage cube_map_array on CAYMAN at least, (it was causing GPU hang and bad values), it probably also fixes it on radeonsi and evergreen. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89957 Tested-by: Tom Stellard Cc: mesa-stable at lists.freedesktop.org Reviewed-by: Marek Ol??k Signed-off-by: Dave Airlie --- src/mesa/state_tracker/st_texture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c index ca7c83c..de4a6eb 100644 --- a/src/mesa/state_tracker/st_texture.c +++ b/src/mesa/state_tracker/st_texture.c @@ -177,7 +177,7 @@ st_gl_texture_dims_to_pipe_dims(GLenum texture, *widthOut = widthIn; *heightOut = heightIn; *depthOut = 1; - *layersOut = depthIn; + *layersOut = util_align_npot(depthIn, 6); break; default: assert(0 && "Unexpected texture in st_gl_texture_dims_to_pipe_dims()"); From anholt at kemper.freedesktop.org Tue Apr 14 06:46:18 2015 From: anholt at kemper.freedesktop.org (Eric Anholt) Date: Mon, 13 Apr 2015 23:46:18 -0700 (PDT) Subject: Mesa (master): vc4: Add a bunch of type conversions. Message-ID: <20150414064618.F338B761EA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 84ebaff1b7f78cb47cd8eed5476f03c5c3d0e14b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=84ebaff1b7f78cb47cd8eed5476f03c5c3d0e14b Author: Eric Anholt Date: Wed Apr 1 15:35:13 2015 -0700 vc4: Add a bunch of type conversions. These are required to get piglit's idiv tests working. The unsigned<->float conversions are wrong, but are good enough to get piglit's small ranges of values working. --- src/gallium/drivers/vc4/vc4_program.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index bcceb3c..ca2e81c 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -925,15 +925,27 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr) case nir_op_fmax: *dest = qir_FMAX(c, src[0], src[1]); break; + case nir_op_f2i: + case nir_op_f2u: *dest = qir_FTOI(c, src[0]); break; case nir_op_i2f: + case nir_op_u2f: *dest = qir_ITOF(c, src[0]); break; case nir_op_b2f: *dest = qir_AND(c, src[0], qir_uniform_f(c, 1.0)); break; + case nir_op_b2i: + *dest = qir_AND(c, src[0], qir_uniform_ui(c, 1)); + break; + case nir_op_i2b: + case nir_op_f2b: + qir_SF(c, src[0]); + *dest = qir_SEL_X_0_ZC(c, qir_uniform_ui(c, ~0)); + break; + case nir_op_iadd: *dest = qir_ADD(c, src[0], src[1]); break; From anholt at kemper.freedesktop.org Tue Apr 14 06:46:19 2015 From: anholt at kemper.freedesktop.org (Eric Anholt) Date: Mon, 13 Apr 2015 23:46:19 -0700 (PDT) Subject: Mesa (master): vc4: Use NIR-level lowering for idiv. Message-ID: <20150414064619.08B32761EB@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 7fa2f2e36660afe9f50f652baa6d65903d3a9dea URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7fa2f2e36660afe9f50f652baa6d65903d3a9dea Author: Eric Anholt Date: Mon Apr 13 14:12:59 2015 -0700 vc4: Use NIR-level lowering for idiv. This fixes the idiv tests in piglit. --- src/gallium/drivers/vc4/vc4_program.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index ca2e81c..ec649c9 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -311,14 +311,6 @@ ntq_umul(struct vc4_compile *c, struct qreg src0, struct qreg src1) qir_uniform_ui(c, 24))); } -static struct qreg -ntq_idiv(struct vc4_compile *c, struct qreg src0, struct qreg src1) -{ - return qir_FTOI(c, qir_FMUL(c, - qir_ITOF(c, src0), - qir_RCP(c, qir_ITOF(c, src1)))); -} - static void ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr) { @@ -983,9 +975,6 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr) case nir_op_imul: *dest = ntq_umul(c, src[0], src[1]); break; - case nir_op_idiv: - *dest = ntq_idiv(c, src[0], src[1]); - break; case nir_op_seq: qir_SF(c, qir_FSUB(c, src[0], src[1])); @@ -2096,6 +2085,7 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage, c->s = tgsi_to_nir(tokens, &nir_options); nir_opt_global_to_local(c->s); nir_convert_to_ssa(c->s); + nir_lower_idiv(c->s); vc4_optimize_nir(c->s); From anholt at kemper.freedesktop.org Tue Apr 14 06:46:19 2015 From: anholt at kemper.freedesktop.org (Eric Anholt) Date: Mon, 13 Apr 2015 23:46:19 -0700 (PDT) Subject: Mesa (master): vc4: Fix off-by-one in branch target validation. Message-ID: <20150414064619.1267A761EA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: d04b07f8e2eb61bb389f2d6b8ed0a501952466ee URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d04b07f8e2eb61bb389f2d6b8ed0a501952466ee Author: Eric Anholt Date: Thu Apr 9 13:43:55 2015 -0700 vc4: Fix off-by-one in branch target validation. --- src/gallium/drivers/vc4/kernel/vc4_validate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate.c b/src/gallium/drivers/vc4/kernel/vc4_validate.c index 6b73587..aeac29e 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_validate.c +++ b/src/gallium/drivers/vc4/kernel/vc4_validate.c @@ -287,7 +287,7 @@ validate_branch_to_sublist(VALIDATE_ARGS) offset = *(uint32_t *)(untrusted + 0); if (offset % exec->tile_alloc_init_block_size || - offset / exec->tile_alloc_init_block_size > + offset / exec->tile_alloc_init_block_size >= exec->bin_tiles_x * exec->bin_tiles_y) { DRM_ERROR("VC4_PACKET_BRANCH_TO_SUB_LIST must jump to initial " "tile allocation space.\n"); From anholt at kemper.freedesktop.org Tue Apr 14 06:46:19 2015 From: anholt at kemper.freedesktop.org (Eric Anholt) Date: Mon, 13 Apr 2015 23:46:19 -0700 (PDT) Subject: Mesa (master): vc4: Separate out a bit of code for submitting jobs to the kernel. Message-ID: <20150414064619.229AA761EA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: e214a596352e67c89ce379a1e5a060dbc1ce31e1 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e214a596352e67c89ce379a1e5a060dbc1ce31e1 Author: Eric Anholt Date: Mon Apr 6 12:58:51 2015 -0700 vc4: Separate out a bit of code for submitting jobs to the kernel. I want to be able to have multiple jobs being set up at the same time (for example, a render job to do a little fixup blit in the course of doing a render to the main FBO). --- src/gallium/drivers/vc4/Makefile.sources | 1 + src/gallium/drivers/vc4/vc4_context.c | 93 +-------------------- src/gallium/drivers/vc4/vc4_context.h | 3 + src/gallium/drivers/vc4/vc4_job.c | 132 ++++++++++++++++++++++++++++++ 4 files changed, 139 insertions(+), 90 deletions(-) diff --git a/src/gallium/drivers/vc4/Makefile.sources b/src/gallium/drivers/vc4/Makefile.sources index ec0f25c..62cd0e0 100644 --- a/src/gallium/drivers/vc4/Makefile.sources +++ b/src/gallium/drivers/vc4/Makefile.sources @@ -11,6 +11,7 @@ C_SOURCES := \ vc4_emit.c \ vc4_fence.c \ vc4_formats.c \ + vc4_job.c \ vc4_opt_algebraic.c \ vc4_opt_constant_folding.c \ vc4_opt_copy_propagation.c \ diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c index daa5ba5..b394c18 100644 --- a/src/gallium/drivers/vc4/vc4_context.c +++ b/src/gallium/drivers/vc4/vc4_context.c @@ -296,40 +296,6 @@ vc4_setup_rcl(struct vc4_context *vc4) ztex->writes++; } -static void -vc4_draw_reset(struct vc4_context *vc4) -{ - struct vc4_bo **referenced_bos = vc4->bo_pointers.base; - for (int i = 0; i < (vc4->bo_handles.next - - vc4->bo_handles.base) / 4; i++) { - vc4_bo_unreference(&referenced_bos[i]); - } - vc4_reset_cl(&vc4->bcl); - vc4_reset_cl(&vc4->rcl); - vc4_reset_cl(&vc4->shader_rec); - vc4_reset_cl(&vc4->uniforms); - vc4_reset_cl(&vc4->bo_handles); - vc4_reset_cl(&vc4->bo_pointers); - vc4->shader_rec_count = 0; - - vc4->needs_flush = false; - vc4->draw_call_queued = false; - - /* We have no hardware context saved between our draw calls, so we - * need to flag the next draw as needing all state emitted. Emitting - * all state at the start of our draws is also what ensures that we - * return to the state we need after a previous tile has finished. - */ - vc4->dirty = ~0; - vc4->resolve = 0; - vc4->cleared = 0; - - vc4->draw_min_x = ~0; - vc4->draw_min_y = ~0; - vc4->draw_max_x = 0; - vc4->draw_max_y = 0; -} - void vc4_flush(struct pipe_context *pctx) { @@ -343,7 +309,7 @@ vc4_flush(struct pipe_context *pctx) */ if (vc4->draw_max_x <= vc4->draw_min_x || vc4->draw_max_y <= vc4->draw_min_y) { - vc4_draw_reset(vc4); + vc4_job_reset(vc4); return; } @@ -358,54 +324,7 @@ vc4_flush(struct pipe_context *pctx) vc4_setup_rcl(vc4); - if (vc4_debug & VC4_DEBUG_CL) { - fprintf(stderr, "BCL:\n"); - vc4_dump_cl(vc4->bcl.base, vc4->bcl.next - vc4->bcl.base, false); - fprintf(stderr, "RCL:\n"); - vc4_dump_cl(vc4->rcl.base, vc4->rcl.next - vc4->rcl.base, true); - } - - struct drm_vc4_submit_cl submit; - memset(&submit, 0, sizeof(submit)); - - submit.bo_handles = (uintptr_t)vc4->bo_handles.base; - submit.bo_handle_count = (vc4->bo_handles.next - - vc4->bo_handles.base) / 4; - submit.bin_cl = (uintptr_t)vc4->bcl.base; - submit.bin_cl_size = vc4->bcl.next - vc4->bcl.base; - submit.render_cl = (uintptr_t)vc4->rcl.base; - submit.render_cl_size = vc4->rcl.next - vc4->rcl.base; - submit.shader_rec = (uintptr_t)vc4->shader_rec.base; - submit.shader_rec_size = vc4->shader_rec.next - vc4->shader_rec.base; - submit.shader_rec_count = vc4->shader_rec_count; - submit.uniforms = (uintptr_t)vc4->uniforms.base; - submit.uniforms_size = vc4->uniforms.next - vc4->uniforms.base; - - if (!(vc4_debug & VC4_DEBUG_NORAST)) { - int ret; - -#ifndef USE_VC4_SIMULATOR - ret = drmIoctl(vc4->fd, DRM_IOCTL_VC4_SUBMIT_CL, &submit); -#else - ret = vc4_simulator_flush(vc4, &submit); -#endif - if (ret) { - fprintf(stderr, "VC4 submit failed\n"); - abort(); - } - } - - vc4->last_emit_seqno = submit.seqno; - - if (vc4_debug & VC4_DEBUG_ALWAYS_SYNC) { - if (!vc4_wait_seqno(vc4->screen, vc4->last_emit_seqno, - PIPE_TIMEOUT_INFINITE)) { - fprintf(stderr, "Wait failed.\n"); - abort(); - } - } - - vc4_draw_reset(vc4); + vc4_job_submit(vc4); } static void @@ -532,13 +451,7 @@ vc4_context_create(struct pipe_screen *pscreen, void *priv) vc4_query_init(pctx); vc4_resource_context_init(pctx); - vc4_init_cl(vc4, &vc4->bcl); - vc4_init_cl(vc4, &vc4->rcl); - vc4_init_cl(vc4, &vc4->shader_rec); - vc4_init_cl(vc4, &vc4->uniforms); - vc4_init_cl(vc4, &vc4->bo_handles); - vc4_init_cl(vc4, &vc4->bo_pointers); - vc4_draw_reset(vc4); + vc4_job_init(vc4); vc4->fd = screen->fd; diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index fa1cc43..72d655e 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -326,6 +326,9 @@ void vc4_write_uniforms(struct vc4_context *vc4, struct vc4_texture_stateobj *texstate); void vc4_flush(struct pipe_context *pctx); +void vc4_job_init(struct vc4_context *vc4); +void vc4_job_submit(struct vc4_context *vc4); +void vc4_job_reset(struct vc4_context *vc4); bool vc4_cl_references_bo(struct pipe_context *pctx, struct vc4_bo *bo); void vc4_emit_state(struct pipe_context *pctx); void vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c); diff --git a/src/gallium/drivers/vc4/vc4_job.c b/src/gallium/drivers/vc4/vc4_job.c new file mode 100644 index 0000000..7603716 --- /dev/null +++ b/src/gallium/drivers/vc4/vc4_job.c @@ -0,0 +1,132 @@ +/* + * Copyright ? 2014-2015 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** @file vc4_job.c + * + * Functions for submitting VC4 render jobs to the kernel. + */ + +#include +#include "vc4_context.h" + +void +vc4_job_init(struct vc4_context *vc4) +{ + vc4_init_cl(vc4, &vc4->bcl); + vc4_init_cl(vc4, &vc4->rcl); + vc4_init_cl(vc4, &vc4->shader_rec); + vc4_init_cl(vc4, &vc4->uniforms); + vc4_init_cl(vc4, &vc4->bo_handles); + vc4_init_cl(vc4, &vc4->bo_pointers); + vc4_job_reset(vc4); +} + +void +vc4_job_reset(struct vc4_context *vc4) +{ + struct vc4_bo **referenced_bos = vc4->bo_pointers.base; + for (int i = 0; i < (vc4->bo_handles.next - + vc4->bo_handles.base) / 4; i++) { + vc4_bo_unreference(&referenced_bos[i]); + } + vc4_reset_cl(&vc4->bcl); + vc4_reset_cl(&vc4->rcl); + vc4_reset_cl(&vc4->shader_rec); + vc4_reset_cl(&vc4->uniforms); + vc4_reset_cl(&vc4->bo_handles); + vc4_reset_cl(&vc4->bo_pointers); + vc4->shader_rec_count = 0; + + vc4->needs_flush = false; + vc4->draw_call_queued = false; + + /* We have no hardware context saved between our draw calls, so we + * need to flag the next draw as needing all state emitted. Emitting + * all state at the start of our draws is also what ensures that we + * return to the state we need after a previous tile has finished. + */ + vc4->dirty = ~0; + vc4->resolve = 0; + vc4->cleared = 0; + + vc4->draw_min_x = ~0; + vc4->draw_min_y = ~0; + vc4->draw_max_x = 0; + vc4->draw_max_y = 0; +} + +/** + * Submits the job to the kernel and then reinitializes it. + */ +void +vc4_job_submit(struct vc4_context *vc4) +{ + if (vc4_debug & VC4_DEBUG_CL) { + fprintf(stderr, "BCL:\n"); + vc4_dump_cl(vc4->bcl.base, vc4->bcl.next - vc4->bcl.base, false); + fprintf(stderr, "RCL:\n"); + vc4_dump_cl(vc4->rcl.base, vc4->rcl.next - vc4->rcl.base, true); + } + + struct drm_vc4_submit_cl submit; + memset(&submit, 0, sizeof(submit)); + + submit.bo_handles = (uintptr_t)vc4->bo_handles.base; + submit.bo_handle_count = (vc4->bo_handles.next - + vc4->bo_handles.base) / 4; + submit.bin_cl = (uintptr_t)vc4->bcl.base; + submit.bin_cl_size = vc4->bcl.next - vc4->bcl.base; + submit.render_cl = (uintptr_t)vc4->rcl.base; + submit.render_cl_size = vc4->rcl.next - vc4->rcl.base; + submit.shader_rec = (uintptr_t)vc4->shader_rec.base; + submit.shader_rec_size = vc4->shader_rec.next - vc4->shader_rec.base; + submit.shader_rec_count = vc4->shader_rec_count; + submit.uniforms = (uintptr_t)vc4->uniforms.base; + submit.uniforms_size = vc4->uniforms.next - vc4->uniforms.base; + + if (!(vc4_debug & VC4_DEBUG_NORAST)) { + int ret; + +#ifndef USE_VC4_SIMULATOR + ret = drmIoctl(vc4->fd, DRM_IOCTL_VC4_SUBMIT_CL, &submit); +#else + ret = vc4_simulator_flush(vc4, &submit); +#endif + if (ret) { + fprintf(stderr, "VC4 submit failed\n"); + abort(); + } + } + + vc4->last_emit_seqno = submit.seqno; + + if (vc4_debug & VC4_DEBUG_ALWAYS_SYNC) { + if (!vc4_wait_seqno(vc4->screen, vc4->last_emit_seqno, + PIPE_TIMEOUT_INFINITE)) { + fprintf(stderr, "Wait failed.\n"); + abort(); + } + } + + vc4_job_reset(vc4); +} From anholt at kemper.freedesktop.org Tue Apr 14 06:46:19 2015 From: anholt at kemper.freedesktop.org (Eric Anholt) Date: Mon, 13 Apr 2015 23:46:19 -0700 (PDT) Subject: Mesa (master): vc4: Move the blit code to a separate file. Message-ID: <20150414064619.2DAD6761EA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 43b20795b742b9f1608dd6f2dc586337408760ad URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=43b20795b742b9f1608dd6f2dc586337408760ad Author: Eric Anholt Date: Thu Apr 9 12:12:20 2015 -0700 vc4: Move the blit code to a separate file. There will be other blit code showing up, and it seems like the place you'd look. --- src/gallium/drivers/vc4/Makefile.sources | 1 + src/gallium/drivers/vc4/vc4_blit.c | 90 ++++++++++++++++++++++++++++++ src/gallium/drivers/vc4/vc4_context.h | 1 + src/gallium/drivers/vc4/vc4_resource.c | 64 --------------------- 4 files changed, 92 insertions(+), 64 deletions(-) diff --git a/src/gallium/drivers/vc4/Makefile.sources b/src/gallium/drivers/vc4/Makefile.sources index 62cd0e0..49474df 100644 --- a/src/gallium/drivers/vc4/Makefile.sources +++ b/src/gallium/drivers/vc4/Makefile.sources @@ -1,4 +1,5 @@ C_SOURCES := \ + vc4_blit.c \ vc4_bufmgr.c \ vc4_bufmgr.h \ vc4_cl.c \ diff --git a/src/gallium/drivers/vc4/vc4_blit.c b/src/gallium/drivers/vc4/vc4_blit.c new file mode 100644 index 0000000..5c98fb6 --- /dev/null +++ b/src/gallium/drivers/vc4/vc4_blit.c @@ -0,0 +1,90 @@ +/* + * Copyright ? 2015 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "util/u_format.h" +#include "util/u_surface.h" +#include "util/u_blitter.h" +#include "vc4_context.h" + +static bool +vc4_render_blit(struct pipe_context *ctx, struct pipe_blit_info *info) +{ + struct vc4_context *vc4 = vc4_context(ctx); + + if (!util_blitter_is_blit_supported(vc4->blitter, info)) { + fprintf(stderr, "blit unsupported %s -> %s", + util_format_short_name(info->src.resource->format), + util_format_short_name(info->dst.resource->format)); + return false; + } + + util_blitter_save_vertex_buffer_slot(vc4->blitter, vc4->vertexbuf.vb); + util_blitter_save_vertex_elements(vc4->blitter, vc4->vtx); + util_blitter_save_vertex_shader(vc4->blitter, vc4->prog.bind_vs); + util_blitter_save_rasterizer(vc4->blitter, vc4->rasterizer); + util_blitter_save_viewport(vc4->blitter, &vc4->viewport); + util_blitter_save_scissor(vc4->blitter, &vc4->scissor); + util_blitter_save_fragment_shader(vc4->blitter, vc4->prog.bind_fs); + util_blitter_save_blend(vc4->blitter, vc4->blend); + util_blitter_save_depth_stencil_alpha(vc4->blitter, vc4->zsa); + util_blitter_save_stencil_ref(vc4->blitter, &vc4->stencil_ref); + util_blitter_save_sample_mask(vc4->blitter, vc4->sample_mask); + util_blitter_save_framebuffer(vc4->blitter, &vc4->framebuffer); + util_blitter_save_fragment_sampler_states(vc4->blitter, + vc4->fragtex.num_samplers, + (void **)vc4->fragtex.samplers); + util_blitter_save_fragment_sampler_views(vc4->blitter, + vc4->fragtex.num_textures, vc4->fragtex.textures); + + util_blitter_blit(vc4->blitter, info); + + return true; +} + +/* Optimal hardware path for blitting pixels. + * Scaling, format conversion, up- and downsampling (resolve) are allowed. + */ +void +vc4_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info) +{ + struct pipe_blit_info info = *blit_info; + + if (info.src.resource->nr_samples > 1 && + info.dst.resource->nr_samples <= 1 && + !util_format_is_depth_or_stencil(info.src.resource->format) && + !util_format_is_pure_integer(info.src.resource->format)) { + fprintf(stderr, "color resolve unimplemented"); + return; + } + + if (util_try_blit_via_copy_region(pctx, &info)) { + return; /* done */ + } + + if (info.mask & PIPE_MASK_S) { + fprintf(stderr, "cannot blit stencil, skipping"); + info.mask &= ~PIPE_MASK_S; + } + + vc4_render_blit(pctx, &info); +} diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index 72d655e..68eacb8 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -341,4 +341,5 @@ bool vc4_tex_format_supported(enum pipe_format f); uint8_t vc4_get_tex_format(enum pipe_format f); const uint8_t *vc4_get_format_swizzle(enum pipe_format f); void vc4_init_query_functions(struct vc4_context *vc4); +void vc4_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info); #endif /* VC4_CONTEXT_H */ diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c index 10e1d6c..f6ca075 100644 --- a/src/gallium/drivers/vc4/vc4_resource.c +++ b/src/gallium/drivers/vc4/vc4_resource.c @@ -26,7 +26,6 @@ #include "util/u_format.h" #include "util/u_inlines.h" #include "util/u_surface.h" -#include "util/u_blitter.h" #include "vc4_screen.h" #include "vc4_context.h" @@ -576,69 +575,6 @@ vc4_flush_resource(struct pipe_context *pctx, struct pipe_resource *resource) */ } -static bool -render_blit(struct pipe_context *ctx, struct pipe_blit_info *info) -{ - struct vc4_context *vc4 = vc4_context(ctx); - - if (!util_blitter_is_blit_supported(vc4->blitter, info)) { - fprintf(stderr, "blit unsupported %s -> %s", - util_format_short_name(info->src.resource->format), - util_format_short_name(info->dst.resource->format)); - return false; - } - - util_blitter_save_vertex_buffer_slot(vc4->blitter, vc4->vertexbuf.vb); - util_blitter_save_vertex_elements(vc4->blitter, vc4->vtx); - util_blitter_save_vertex_shader(vc4->blitter, vc4->prog.bind_vs); - util_blitter_save_rasterizer(vc4->blitter, vc4->rasterizer); - util_blitter_save_viewport(vc4->blitter, &vc4->viewport); - util_blitter_save_scissor(vc4->blitter, &vc4->scissor); - util_blitter_save_fragment_shader(vc4->blitter, vc4->prog.bind_fs); - util_blitter_save_blend(vc4->blitter, vc4->blend); - util_blitter_save_depth_stencil_alpha(vc4->blitter, vc4->zsa); - util_blitter_save_stencil_ref(vc4->blitter, &vc4->stencil_ref); - util_blitter_save_sample_mask(vc4->blitter, vc4->sample_mask); - util_blitter_save_framebuffer(vc4->blitter, &vc4->framebuffer); - util_blitter_save_fragment_sampler_states(vc4->blitter, - vc4->fragtex.num_samplers, - (void **)vc4->fragtex.samplers); - util_blitter_save_fragment_sampler_views(vc4->blitter, - vc4->fragtex.num_textures, vc4->fragtex.textures); - - util_blitter_blit(vc4->blitter, info); - - return true; -} - -/* Optimal hardware path for blitting pixels. - * Scaling, format conversion, up- and downsampling (resolve) are allowed. - */ -static void -vc4_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info) -{ - struct pipe_blit_info info = *blit_info; - - if (info.src.resource->nr_samples > 1 && - info.dst.resource->nr_samples <= 1 && - !util_format_is_depth_or_stencil(info.src.resource->format) && - !util_format_is_pure_integer(info.src.resource->format)) { - fprintf(stderr, "color resolve unimplemented"); - return; - } - - if (util_try_blit_via_copy_region(pctx, &info)) { - return; /* done */ - } - - if (info.mask & PIPE_MASK_S) { - fprintf(stderr, "cannot blit stencil, skipping"); - info.mask &= ~PIPE_MASK_S; - } - - render_blit(pctx, &info); -} - void vc4_update_shadow_baselevel_texture(struct pipe_context *pctx, struct pipe_sampler_view *view) From anholt at kemper.freedesktop.org Tue Apr 14 06:46:19 2015 From: anholt at kemper.freedesktop.org (Eric Anholt) Date: Mon, 13 Apr 2015 23:46:19 -0700 (PDT) Subject: Mesa (master): vc4: Add a blitter path using just the render thread. Message-ID: <20150414064619.40282761EA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 1be329e64cd035e3ee088cff3a50d39e1ad66868 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1be329e64cd035e3ee088cff3a50d39e1ad66868 Author: Eric Anholt Date: Mon Apr 6 15:12:58 2015 -0700 vc4: Add a blitter path using just the render thread. This accelerates the path for generating the shadow tiled texture when asked to sample from a raster texture (typical in glamor). --- src/gallium/drivers/vc4/vc4_blit.c | 127 ++++++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) diff --git a/src/gallium/drivers/vc4/vc4_blit.c b/src/gallium/drivers/vc4/vc4_blit.c index 5c98fb6..4f87189 100644 --- a/src/gallium/drivers/vc4/vc4_blit.c +++ b/src/gallium/drivers/vc4/vc4_blit.c @@ -26,6 +26,130 @@ #include "util/u_blitter.h" #include "vc4_context.h" +static void +vc4_tile_blit_color_rcl(struct vc4_context *vc4, + struct vc4_surface *dst_surf, + struct vc4_surface *src_surf) +{ + struct vc4_resource *src = vc4_resource(src_surf->base.texture); + struct vc4_resource *dst = vc4_resource(dst_surf->base.texture); + + uint32_t min_x_tile = 0; + uint32_t min_y_tile = 0; + uint32_t max_x_tile = (dst_surf->base.width - 1) / 64; + uint32_t max_y_tile = (dst_surf->base.height - 1) / 64; + uint32_t xtiles = max_x_tile - min_x_tile + 1; + uint32_t ytiles = max_y_tile - min_y_tile + 1; + uint32_t reloc_size = 9; + uint32_t config_size = 11 + reloc_size; + uint32_t loadstore_size = 7 + reloc_size; + uint32_t tilecoords_size = 3; + cl_ensure_space(&vc4->rcl, + config_size + + xtiles * ytiles * (loadstore_size * 2 + + tilecoords_size * 1)); + cl_ensure_space(&vc4->bo_handles, 2 * sizeof(uint32_t)); + cl_ensure_space(&vc4->bo_pointers, 2 * sizeof(struct vc4_bo *)); + + cl_start_reloc(&vc4->rcl, 1); + cl_u8(&vc4->rcl, VC4_PACKET_TILE_RENDERING_MODE_CONFIG); + cl_reloc(vc4, &vc4->rcl, dst->bo, dst_surf->offset); + cl_u16(&vc4->rcl, dst_surf->base.width); + cl_u16(&vc4->rcl, dst_surf->base.height); + cl_u16(&vc4->rcl, ((dst_surf->tiling << + VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT) | + (vc4_rt_format_is_565(dst_surf->base.format) ? + VC4_RENDER_CONFIG_FORMAT_BGR565 : + VC4_RENDER_CONFIG_FORMAT_RGBA8888))); + + uint32_t src_hindex = vc4_gem_hindex(vc4, src->bo); + + for (int y = min_y_tile; y <= max_y_tile; y++) { + for (int x = min_x_tile; x <= max_x_tile; x++) { + bool end_of_frame = (x == max_x_tile && + y == max_y_tile); + + cl_start_reloc(&vc4->rcl, 1); + cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL); + cl_u8(&vc4->rcl, + VC4_LOADSTORE_TILE_BUFFER_COLOR | + (src_surf->tiling << + VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT)); + cl_u8(&vc4->rcl, + vc4_rt_format_is_565(src_surf->base.format) ? + VC4_LOADSTORE_TILE_BUFFER_BGR565 : + VC4_LOADSTORE_TILE_BUFFER_RGBA8888); + cl_reloc_hindex(&vc4->rcl, src_hindex, + src_surf->offset); + + cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES); + cl_u8(&vc4->rcl, x); + cl_u8(&vc4->rcl, y); + + if (end_of_frame) { + cl_u8(&vc4->rcl, + VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF); + } else { + cl_u8(&vc4->rcl, + VC4_PACKET_STORE_MS_TILE_BUFFER); + } + } + } + + vc4->draw_min_x = 0; + vc4->draw_min_y = 0; + vc4->draw_max_x = dst_surf->base.width; + vc4->draw_max_y = dst_surf->base.height; + + dst->writes++; + vc4->needs_flush = true; +} + +static struct vc4_surface * +vc4_get_blit_surface(struct pipe_context *pctx, + struct pipe_resource *prsc, unsigned level) +{ + struct pipe_surface tmpl; + + memset(&tmpl, 0, sizeof(tmpl)); + tmpl.format = prsc->format; + tmpl.u.tex.level = level; + tmpl.u.tex.first_layer = 0; + tmpl.u.tex.last_layer = 0; + + return vc4_surface(pctx->create_surface(pctx, prsc, &tmpl)); +} + +static bool +vc4_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info) +{ + struct vc4_context *vc4 = vc4_context(pctx); + + if ((info->mask & PIPE_MASK_RGBA) == 0) + return false; + + if (info->dst.box.x != 0 || info->dst.box.y != 0 || + info->src.box.x != 0 || info->src.box.y != 0 || + info->dst.box.width != info->src.box.width || + info->dst.box.height != info->src.box.height) { + return false; + } + + struct vc4_surface *dst_surf = + vc4_get_blit_surface(pctx, info->dst.resource, info->dst.level); + struct vc4_surface *src_surf = + vc4_get_blit_surface(pctx, info->src.resource, info->src.level); + + vc4_flush(pctx); + vc4_tile_blit_color_rcl(vc4, dst_surf, src_surf); + vc4_job_submit(vc4); + + pctx->surface_destroy(pctx, &dst_surf->base); + pctx->surface_destroy(pctx, &src_surf->base); + + return true; +} + static bool vc4_render_blit(struct pipe_context *ctx, struct pipe_blit_info *info) { @@ -77,6 +201,9 @@ vc4_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info) return; } + if (vc4_tile_blit(pctx, blit_info)) + return; + if (util_try_blit_via_copy_region(pctx, &info)) { return; /* done */ } From anholt at kemper.freedesktop.org Tue Apr 14 06:46:19 2015 From: anholt at kemper.freedesktop.org (Eric Anholt) Date: Mon, 13 Apr 2015 23:46:19 -0700 (PDT) Subject: Mesa (master): vc4: When asked to sample from a raster texture, make a shadow tiled copy. Message-ID: <20150414064619.19BF9761EA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 44b63cf5c051f7eccfc1d7427247fd58dabb7761 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=44b63cf5c051f7eccfc1d7427247fd58dabb7761 Author: Eric Anholt Date: Wed Apr 8 12:49:24 2015 -0700 vc4: When asked to sample from a raster texture, make a shadow tiled copy. So, it turns out my simulator doesn't *quite* match the hardware. And the errata about raster textures tells you most of what's wrong, but there's still stuff wrong after that. Instead, if we're asked to sample from raster, we'll just blit it to a tiled temporary. Raster textures should only be screen scanout, and word is that it's faster to copy to tiled using the tiling engine first than to texture from an entire raster texture, anyway. --- src/gallium/drivers/vc4/vc4_state.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_state.c b/src/gallium/drivers/vc4/vc4_state.c index 332f310..df75b6e 100644 --- a/src/gallium/drivers/vc4/vc4_state.c +++ b/src/gallium/drivers/vc4/vc4_state.c @@ -516,6 +516,7 @@ vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc, const struct pipe_sampler_view *cso) { struct pipe_sampler_view *so = malloc(sizeof(*so)); + struct vc4_resource *rsc = vc4_resource(prsc); if (!so) return NULL; @@ -527,8 +528,12 @@ vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc, /* There is no hardware level clamping, and the start address of a * texture may be misaligned, so in that case we have to copy to a * temporary. + * + * Also, Raspberry Pi doesn't support sampling from raster textures, + * so we also have to copy to a temporary then. */ - if (so->u.tex.first_level) { + if (so->u.tex.first_level || + rsc->vc4_format == VC4_TEXTURE_TYPE_RGBA32R) { struct vc4_resource *shadow_parent = vc4_resource(prsc); struct pipe_resource tmpl = shadow_parent->base.b; struct vc4_resource *clone; @@ -574,8 +579,10 @@ vc4_set_sampler_views(struct pipe_context *pctx, unsigned shader, for (i = 0; i < nr; i++) { if (views[i]) { + struct vc4_resource *rsc = + vc4_resource(views[i]->texture); new_nr = i + 1; - if (views[i]->u.tex.first_level != 0) + if (rsc->shadow_parent) vc4_update_shadow_baselevel_texture(pctx, views[i]); } pipe_sampler_view_reference(&stage_tex->textures[i], views[i]); From anholt at kemper.freedesktop.org Tue Apr 14 06:46:19 2015 From: anholt at kemper.freedesktop.org (Eric Anholt) Date: Mon, 13 Apr 2015 23:46:19 -0700 (PDT) Subject: Mesa (master): vc4: Allow submitting jobs with no bin CL in validation. Message-ID: <20150414064619.36552761EA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 76d56752ccff5bca3a0808705d5da76f186afb33 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=76d56752ccff5bca3a0808705d5da76f186afb33 Author: Eric Anholt Date: Thu Apr 9 13:35:57 2015 -0700 vc4: Allow submitting jobs with no bin CL in validation. For blitting, we want to fire off an RCL-only job. This takes a bit of tweaking in our validation and the simulator support (and corresponding new code in the kernel). --- src/gallium/drivers/vc4/kernel/vc4_drv.h | 1 + src/gallium/drivers/vc4/kernel/vc4_gem.c | 2 ++ src/gallium/drivers/vc4/kernel/vc4_validate.c | 9 ++++++--- src/gallium/drivers/vc4/vc4_simulator.c | 18 ++++++++++-------- 4 files changed, 19 insertions(+), 11 deletions(-) diff --git a/src/gallium/drivers/vc4/kernel/vc4_drv.h b/src/gallium/drivers/vc4/kernel/vc4_drv.h index 12a3cef..325f944 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_drv.h +++ b/src/gallium/drivers/vc4/kernel/vc4_drv.h @@ -162,6 +162,7 @@ vc4_validate_cl(struct drm_device *dev, void *unvalidated, uint32_t len, bool is_bin, + bool has_bin, struct vc4_exec_info *exec); int diff --git a/src/gallium/drivers/vc4/kernel/vc4_gem.c b/src/gallium/drivers/vc4/kernel/vc4_gem.c index c9a7573..ac29ab3 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_gem.c +++ b/src/gallium/drivers/vc4/kernel/vc4_gem.c @@ -130,6 +130,7 @@ vc4_cl_validate(struct drm_device *dev, struct vc4_exec_info *exec) bin, args->bin_cl_size, true, + args->bin_cl_size != 0, exec); if (ret) goto fail; @@ -139,6 +140,7 @@ vc4_cl_validate(struct drm_device *dev, struct vc4_exec_info *exec) render, args->render_cl_size, false, + args->bin_cl_size != 0, exec); if (ret) goto fail; diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate.c b/src/gallium/drivers/vc4/kernel/vc4_validate.c index aeac29e..2d04a4a 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_validate.c +++ b/src/gallium/drivers/vc4/kernel/vc4_validate.c @@ -702,6 +702,7 @@ vc4_validate_cl(struct drm_device *dev, void *unvalidated, uint32_t len, bool is_bin, + bool has_bin, struct vc4_exec_info *exec) { uint32_t dst_offset = 0; @@ -772,7 +773,7 @@ vc4_validate_cl(struct drm_device *dev, if (is_bin) { exec->ct0ea = exec->ct0ca + dst_offset; - if (!exec->found_start_tile_binning_packet) { + if (has_bin && !exec->found_start_tile_binning_packet) { DRM_ERROR("Bin CL missing VC4_PACKET_START_TILE_BINNING\n"); return -EINVAL; } @@ -786,8 +787,10 @@ vc4_validate_cl(struct drm_device *dev, * increment from the bin CL. Otherwise a later submit would * have render execute immediately. */ - if (!exec->found_wait_on_semaphore_packet) { - DRM_ERROR("Render CL missing VC4_PACKET_WAIT_ON_SEMAPHORE\n"); + if (exec->found_wait_on_semaphore_packet != has_bin) { + DRM_ERROR("Render CL %s VC4_PACKET_WAIT_ON_SEMAPHORE\n", + exec->found_wait_on_semaphore_packet ? + "has" : "missing"); return -EINVAL; } exec->ct1ea = exec->ct1ca + dst_offset; diff --git a/src/gallium/drivers/vc4/vc4_simulator.c b/src/gallium/drivers/vc4/vc4_simulator.c index cd8cc5b..2f72e72 100644 --- a/src/gallium/drivers/vc4/vc4_simulator.c +++ b/src/gallium/drivers/vc4/vc4_simulator.c @@ -151,14 +151,16 @@ vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args) if (ret) return ret; - int bfc = simpenrose_do_binning(exec.ct0ca, exec.ct0ea); - if (bfc != 1) { - fprintf(stderr, "Binning returned %d flushes, should be 1.\n", - bfc); - fprintf(stderr, "Relocated binning command list:\n"); - vc4_dump_cl(screen->simulator_mem_base + exec.ct0ca, - exec.ct0ea - exec.ct0ca, false); - abort(); + if (exec.ct0ca != exec.ct0ea) { + int bfc = simpenrose_do_binning(exec.ct0ca, exec.ct0ea); + if (bfc != 1) { + fprintf(stderr, "Binning returned %d flushes, should be 1.\n", + bfc); + fprintf(stderr, "Relocated binning command list:\n"); + vc4_dump_cl(screen->simulator_mem_base + exec.ct0ca, + exec.ct0ea - exec.ct0ca, false); + abort(); + } } int rfc = simpenrose_do_rendering(exec.ct1ca, exec.ct1ea); if (rfc != 1) { From kwg at kemper.freedesktop.org Tue Apr 14 08:49:23 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Tue, 14 Apr 2015 01:49:23 -0700 (PDT) Subject: Mesa (master): i965: Fix INTEL_DEBUG=shader_time for SIMD8 VS. Message-ID: <20150414084923.E8493761EA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 21d29124a719bdaf5794859a4a7441cc6be33df7 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=21d29124a719bdaf5794859a4a7441cc6be33df7 Author: Kenneth Graunke Date: Sun Apr 12 03:52:39 2015 -0700 i965: Fix INTEL_DEBUG=shader_time for SIMD8 VS. In commit 4ebeb71573ad44f7657810dc5dd2c9030e3e63db, I deleted the emit_shader_time_end() call in emit_urb_writes(). But I failed to add it to run_vs(), as I intended. So no data was recorded at all. Signed-off-by: Kenneth Graunke Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 5c475b2..8bd8da2 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -3877,6 +3877,9 @@ fs_visitor::run_vs() emit_urb_writes(); + if (INTEL_DEBUG & DEBUG_SHADER_TIME) + emit_shader_time_end(); + calculate_cfg(); optimize(); From kwg at kemper.freedesktop.org Tue Apr 14 08:49:23 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Tue, 14 Apr 2015 01:49:23 -0700 (PDT) Subject: Mesa (master): i965: Implement proper workaround for Gen4 GPU CONSTANT_BUFFER hangs. Message-ID: <20150414084923.F1931761EB@kemper.freedesktop.org> Module: Mesa Branch: master Commit: f55ded764ce60f87463e33bfa3a32e2c44715581 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f55ded764ce60f87463e33bfa3a32e2c44715581 Author: Kenneth Graunke Date: Sun Apr 12 18:39:07 2015 -0700 i965: Implement proper workaround for Gen4 GPU CONSTANT_BUFFER hangs. I finally managed to dig up some information on our mysterious GPU hangs. A wiki page from the Crestline validation team mentions that they found a GPU hang in "Serious Sam 2" (on Windows) with remarkably similar conditions to the ones we've seen in Google Chrome and glmark2. Apparently, if WM_STATE has "PS Use Source Depth" enabled, CC_STATE has most depth state disabled, and you issue a CONSTANT_BUFFER command and immediately draw, the depth interpolator makes a small mistake that leads to hangs. Most of the traces I looked at contained a CONSTANT_BUFFER packet immediately followed by 3DPRIMITIVE, or at least very few packets. It appears they also have "PS Use Source Depth" enabled - either at the hang, or a little before it. So I think this is our bug. The workaround is to emit a non-pipelined state packet after issuing a CONSTANT_BUFFER packet. This is really similar to the workaround I developed in commit c4fd0c9052dd391d6f2e9bb8e6da209dfc7ef35b. v2: Fix word-wrapping issues. Signed-off-by: Kenneth Graunke Reviewed-by: Francisco Jerez Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_curbe.c | 39 ++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index e45e2ab..befd7a9 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -285,19 +285,6 @@ brw_upload_constant_buffer(struct brw_context *brw) */ emit: - /* Work around mysterious 965 hangs that appear to happen if you do - * two 3DPRIMITIVEs with only a CONSTANT_BUFFER inbetween. If we - * haven't already flushed for some other reason, explicitly do so. - * - * We've found no documented reason why this should be necessary. - */ - if (brw->gen == 4 && !brw->is_g4x && - (brw->ctx.NewDriverState & (BRW_NEW_BATCH | BRW_NEW_PSP)) == 0) { - BEGIN_BATCH(1); - OUT_BATCH(MI_FLUSH); - ADVANCE_BATCH(); - } - /* BRW_NEW_URB_FENCE: From the gen4 PRM, volume 1, section 3.9.8 * (CONSTANT_BUFFER (CURBE Load)): * @@ -317,6 +304,31 @@ emit: (brw->curbe.total_size - 1) + brw->curbe.curbe_offset); } ADVANCE_BATCH(); + + /* Work around a Broadwater/Crestline depth interpolator bug. The + * following sequence will cause GPU hangs: + * + * 1. Change state so that all depth related fields in CC_STATE are + * disabled, and in WM_STATE, only "PS Use Source Depth" is enabled. + * 2. Emit a CONSTANT_BUFFER packet. + * 3. Draw via 3DPRIMITIVE. + * + * The recommended workaround is to emit a non-pipelined state change after + * emitting CONSTANT_BUFFER, in order to drain the windowizer pipeline. + * + * We arbitrarily choose 3DSTATE_GLOBAL_DEPTH_CLAMP_OFFSET (as it's small), + * and always emit it when "PS Use Source Depth" is set. We could be more + * precise, but the additional complexity is probably not worth it. + * + * BRW_NEW_FRAGMENT_PROGRAM + */ + if (brw->gen == 4 && !brw->is_g4x && + (brw->fragment_program->Base.InputsRead & (1 << VARYING_SLOT_POS))) { + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP << 16 | (2 - 2)); + OUT_BATCH(0); + ADVANCE_BATCH(); + } } const struct brw_tracked_state brw_constant_buffer = { @@ -324,6 +336,7 @@ const struct brw_tracked_state brw_constant_buffer = { .mesa = _NEW_PROGRAM_CONSTANTS, .brw = BRW_NEW_BATCH | BRW_NEW_CURBE_OFFSETS | + BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_FS_PROG_DATA | BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */ BRW_NEW_URB_FENCE | From kwg at kemper.freedesktop.org Tue Apr 14 08:49:24 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Tue, 14 Apr 2015 01:49:24 -0700 (PDT) Subject: Mesa (master): i965: Fix software primitive restart with indirect draws. Message-ID: <20150414084924.087B1761EA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 406df68736a213f17f21a38a7c2da4ea15acd053 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=406df68736a213f17f21a38a7c2da4ea15acd053 Author: Kenneth Graunke Date: Sat Apr 11 02:21:48 2015 -0700 i965: Fix software primitive restart with indirect draws. new_prim was declared as a stack variable within a nested scope; we tried to retain a pointer to that data beyond the scope, which is bogus. GCC with -O1 eliminated most of the code that set new_prim's fields. Move the declaration to fix the bug. v2: Also fix new_ib (thanks to Matt Turner and Ben Widawsky). Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=81025 Signed-off-by: Kenneth Graunke Reviewed-by: Matt Turner Reviewed-by: Ben Widawsky Cc: mesa-stable at lists.freedesktop.org --- src/mesa/vbo/vbo_primitive_restart.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/mesa/vbo/vbo_primitive_restart.c b/src/mesa/vbo/vbo_primitive_restart.c index 562dedc..dafc4fd 100644 --- a/src/mesa/vbo/vbo_primitive_restart.c +++ b/src/mesa/vbo/vbo_primitive_restart.c @@ -167,6 +167,8 @@ vbo_sw_primitive_restart(struct gl_context *ctx, struct gl_buffer_object *indirect) { GLuint prim_num; + struct _mesa_prim new_prim; + struct _mesa_index_buffer new_ib; struct sub_primitive *sub_prims; struct sub_primitive *sub_prim; GLuint num_sub_prims; @@ -182,8 +184,6 @@ vbo_sw_primitive_restart(struct gl_context *ctx, /* If there is an indirect buffer, map it and extract the draw params */ if (indirect && prims[0].is_indirect) { - struct _mesa_prim new_prim = *prims; - struct _mesa_index_buffer new_ib = *ib; const uint32_t *indirect_params; if (!ctx->Driver.MapBufferRange(ctx, 0, indirect->Size, GL_MAP_READ_BIT, indirect, MAP_INTERNAL)) { @@ -195,6 +195,7 @@ vbo_sw_primitive_restart(struct gl_context *ctx, } assert(nr_prims == 1); + new_prim = prims[0]; indirect_params = (const uint32_t *) ADD_POINTERS(indirect->Mappings[MAP_INTERNAL].Pointer, new_prim.indirect_offset); @@ -206,6 +207,7 @@ vbo_sw_primitive_restart(struct gl_context *ctx, new_prim.basevertex = indirect_params[3]; new_prim.base_instance = indirect_params[4]; + new_ib = *ib; new_ib.count = new_prim.count; prims = &new_prim; From tstellar at kemper.freedesktop.org Tue Apr 14 14:45:23 2015 From: tstellar at kemper.freedesktop.org (Tom Stellard) Date: Tue, 14 Apr 2015 07:45:23 -0700 (PDT) Subject: Mesa (master): radeonsi: Add header and footer to shader stat dump Message-ID: <20150414144523.84113761EB@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 2569c7109d466137ac2b9964db3427ccf2bbf49e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=2569c7109d466137ac2b9964db3427ccf2bbf49e Author: Tom Stellard Date: Mon Apr 13 12:56:37 2015 +0000 radeonsi: Add header and footer to shader stat dump This makes it easier to parse. Reviewed-by: Marek Ol??k --- src/gallium/drivers/radeonsi/si_shader.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index b4709ac..89f02ab 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2669,8 +2669,10 @@ int si_shader_binary_read(struct si_screen *sscreen, binary->code[i]); } } - fprintf(stderr, "SGPRS: %d\nVGPRS: %d\nCode Size: %d bytes\nLDS: %d blocks\n" - "Scratch: %d bytes per wave\n", + + fprintf(stderr, "*** SHADER STATS ***\n" + "SGPRS: %d\nVGPRS: %d\nCode Size: %d bytes\nLDS: %d blocks\n" + "Scratch: %d bytes per wave\n********************\n", shader->num_sgprs, shader->num_vgprs, binary->code_size, shader->lds_size, shader->scratch_bytes_per_wave); } From tstellar at kemper.freedesktop.org Tue Apr 14 14:45:23 2015 From: tstellar at kemper.freedesktop.org (Tom Stellard) Date: Tue, 14 Apr 2015 07:45:23 -0700 (PDT) Subject: Mesa (master): radeon/llvm: Run LLVM's instruction combining pass Message-ID: <20150414144523.8D0AE761EB@kemper.freedesktop.org> Module: Mesa Branch: master Commit: c6d79ed289a75f13c65f011be870f7e43a0fedc7 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c6d79ed289a75f13c65f011be870f7e43a0fedc7 Author: Tom Stellard Date: Fri Apr 10 17:07:16 2015 +0000 radeon/llvm: Run LLVM's instruction combining pass This should improve code quality in general and will help with some future changes to how we emit kill instructions. shader-db shows a few regressions, but these don't seem to be the result of deficiencies in instcombine. They're mostly caused by the scheduler making different decisions than before. shader-db stats (bonaire): 979 shaders Totals: SGPRS: 35056 -> 34872 (-0.52 %) VGPRS: 20624 -> 20696 (0.35 %) Code Size: 764372 -> 749032 (-2.01 %) bytes LDS: 11 -> 11 (0.00 %) blocks Scratch: 12288 -> 12288 (0.00 %) bytes per wave Totals from affected shaders: SGPRS: 13264 -> 13072 (-1.45 %) VGPRS: 8248 -> 8316 (0.82 %) Code Size: 486320 -> 470992 (-3.15 %) bytes LDS: 11 -> 11 (0.00 %) blocks Scratch: 11264 -> 11264 (0.00 %) bytes per wave Increases: SGPRS: 6 (0.01 %) VGPRS: 20 (0.02 %) Code Size: 14 (0.01 %) LDS: 0 (0.00 %) Scratch: 0 (0.00 %) Decreases: SGPRS: 32 (0.03 %) VGPRS: 8 (0.01 %) Code Size: 244 (0.25 %) LDS: 0 (0.00 %) Scratch: 0 (0.00 %) *** BY PERCENTAGE *** Max Increase: SGPRS: 32 -> 48 (50.00 %) VGPRS: 12 -> 20 (66.67 %) Code Size: 216 -> 224 (3.70 %) bytes LDS: 0 -> 0 (0.00 %) blocks Scratch: 0 -> 0 (0.00 %) bytes per wave Max Decrease: SGPRS: 40 -> 32 (-20.00 %) VGPRS: 16 -> 12 (-25.00 %) Code Size: 368 -> 280 (-23.91 %) bytes LDS: 0 -> 0 (0.00 %) blocks Scratch: 0 -> 0 (0.00 %) bytes per wave *** BY UNIT *** Max Increase: SGPRS: 32 -> 48 (50.00 %) VGPRS: 28 -> 36 (28.57 %) Code Size: 39320 -> 40132 (2.07 %) bytes LDS: 0 -> 0 (0.00 %) blocks Scratch: 0 -> 0 (0.00 %) bytes per wave Max Decrease: SGPRS: 72 -> 64 (-11.11 %) VGPRS: 48 -> 40 (-16.67 %) Code Size: 6272 -> 5852 (-6.70 %) bytes LDS: 0 -> 0 (0.00 %) blocks Scratch: 0 -> 0 (0.00 %) bytes per wave Reviewed-by: Marek Ol??k --- src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c index 1690194..91c56a3 100644 --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c @@ -1579,6 +1579,7 @@ void radeon_llvm_finalize_module(struct radeon_llvm_context * ctx) LLVMAddLICMPass(gallivm->passmgr); LLVMAddAggressiveDCEPass(gallivm->passmgr); LLVMAddCFGSimplificationPass(gallivm->passmgr); + LLVMAddInstructionCombiningPass(gallivm->passmgr); /* Run the pass */ LLVMRunFunctionPassManager(gallivm->passmgr, ctx->main_fn); From tstellar at kemper.freedesktop.org Tue Apr 14 14:45:23 2015 From: tstellar at kemper.freedesktop.org (Tom Stellard) Date: Tue, 14 Apr 2015 07:45:23 -0700 (PDT) Subject: Mesa (master): radeon/llvm: Improve codegen for KILL_IF Message-ID: <20150414144523.95E42761EB@kemper.freedesktop.org> Module: Mesa Branch: master Commit: e0994e0f97a2078735f0b5e86cbad9f74c565c05 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e0994e0f97a2078735f0b5e86cbad9f74c565c05 Author: Tom Stellard Date: Mon Apr 13 13:25:41 2015 +0000 radeon/llvm: Improve codegen for KILL_IF Rather than emitting one kill instruction per component of KILL_IF's src reg, we now or the components of the src register together and use the result as a condition for just one kill instruction. shader-db stats (bonaire): 979 shaders Totals: SGPRS: 34872 -> 34848 (-0.07 %) VGPRS: 20696 -> 20676 (-0.10 %) Code Size: 749032 -> 748452 (-0.08 %) bytes LDS: 11 -> 11 (0.00 %) blocks Scratch: 12288 -> 12288 (0.00 %) bytes per wave Totals from affected shaders: SGPRS: 1184 -> 1160 (-2.03 %) VGPRS: 600 -> 580 (-3.33 %) Code Size: 13200 -> 12620 (-4.39 %) bytes LDS: 0 -> 0 (0.00 %) blocks Scratch: 0 -> 0 (0.00 %) bytes per wave Increases: SGPRS: 2 (0.00 %) VGPRS: 0 (0.00 %) Code Size: 0 (0.00 %) LDS: 0 (0.00 %) Scratch: 0 (0.00 %) Decreases: SGPRS: 5 (0.01 %) VGPRS: 5 (0.01 %) Code Size: 25 (0.03 %) LDS: 0 (0.00 %) Scratch: 0 (0.00 %) *** BY PERCENTAGE *** Max Increase: SGPRS: 32 -> 40 (25.00 %) VGPRS: 0 -> 0 (0.00 %) Code Size: 0 -> 0 (0.00 %) bytes LDS: 0 -> 0 (0.00 %) blocks Scratch: 0 -> 0 (0.00 %) bytes per wave Max Decrease: SGPRS: 32 -> 24 (-25.00 %) VGPRS: 16 -> 12 (-25.00 %) Code Size: 116 -> 96 (-17.24 %) bytes LDS: 0 -> 0 (0.00 %) blocks Scratch: 0 -> 0 (0.00 %) bytes per wave *** BY UNIT *** Max Increase: SGPRS: 64 -> 72 (12.50 %) VGPRS: 0 -> 0 (0.00 %) Code Size: 0 -> 0 (0.00 %) bytes LDS: 0 -> 0 (0.00 %) blocks Scratch: 0 -> 0 (0.00 %) bytes per wave Max Decrease: SGPRS: 32 -> 24 (-25.00 %) VGPRS: 16 -> 12 (-25.00 %) Code Size: 424 -> 356 (-16.04 %) bytes LDS: 0 -> 0 (0.00 %) blocks Scratch: 0 -> 0 (0.00 %) bytes per wave Reviewed-by: Marek Ol??k --- .../drivers/radeon/radeon_setup_tgsi_llvm.c | 29 ++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c index 91c56a3..18afbcb 100644 --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c @@ -637,6 +637,34 @@ static void uif_emit( if_cond_emit(action, bld_base, emit_data, cond); } +static void kill_if_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + const struct tgsi_full_instruction * inst = emit_data->inst; + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + unsigned i; + LLVMValueRef conds[TGSI_NUM_CHANNELS]; + + for (i = 0; i < TGSI_NUM_CHANNELS; i++) { + LLVMValueRef value = lp_build_emit_fetch(bld_base, inst, 0, i); + conds[i] = LLVMBuildFCmp(builder, LLVMRealOLT, value, + bld_base->base.zero, ""); + } + + /* Or the conditions together */ + for (i = TGSI_NUM_CHANNELS - 1; i > 0; i--) { + conds[i - 1] = LLVMBuildOr(builder, conds[i], conds[i - 1], ""); + } + + emit_data->dst_type = LLVMVoidTypeInContext(gallivm->context); + emit_data->arg_count = 1; + emit_data->args[0] = LLVMBuildSelect(builder, conds[0], + lp_build_const_float(gallivm, -1.0f), + bld_base->base.zero, ""); +} + static void kil_emit( const struct lp_build_tgsi_action * action, struct lp_build_tgsi_context * bld_base, @@ -1467,6 +1495,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx) bld_base->op_actions[TGSI_OPCODE_ISLT].emit = emit_icmp; bld_base->op_actions[TGSI_OPCODE_ISSG].emit = emit_ssg; bld_base->op_actions[TGSI_OPCODE_I2F].emit = emit_i2f; + bld_base->op_actions[TGSI_OPCODE_KILL_IF].fetch_args = kill_if_fetch_args; bld_base->op_actions[TGSI_OPCODE_KILL_IF].emit = kil_emit; bld_base->op_actions[TGSI_OPCODE_KILL_IF].intr_name = "llvm.AMDGPU.kill"; bld_base->op_actions[TGSI_OPCODE_KILL].emit = lp_build_tgsi_intrinsic; From idr at kemper.freedesktop.org Tue Apr 14 15:36:35 2015 From: idr at kemper.freedesktop.org (Ian Romanick) Date: Tue, 14 Apr 2015 08:36:35 -0700 (PDT) Subject: Mesa (master): i965/fs: Always invert predicate of SEL with swapped arguments Message-ID: <20150414153635.91B02761EB@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 05a1d84491eabf56564488ba0b0b0b8e91fd4309 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=05a1d84491eabf56564488ba0b0b0b8e91fd4309 Author: Ian Romanick Date: Mon Mar 23 12:03:56 2015 -0700 i965/fs: Always invert predicate of SEL with swapped arguments Commit b616164 added an optimization of b2f generation of a comparison. It also included an extra optimization of one of the comparison values is a constant of zero. The trick was that some value was known to be zero, so that value could be used in the SEL instruction instead of potentially loading 0.0 into a register. This change switched the order of the arguments to the SEL, and, for some unknown reason, I thought that the predicate should therefore only be inverted for the == case. Clearly, it should always be inverted. Fixes piglit fs-notEqual-of-expression.shader_test and fs-equal-of-expression.shader_test. v2: Don't do the "register already has zero" optimization for the '== 0' case. In that case, the register does not have zero when we want to produce a zero result. Signed-off-by: Ian Romanick Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89722 Reviewed-by: Kenneth Graunke [v1] Tested-by: Lu Hua --- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 06337c9..0049b2d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -502,15 +502,15 @@ fs_visitor::try_emit_b2f_of_comparison(ir_expression *ir) * and(16) g4<1>D g2<8,8,1>D 1D * and(16) m6<1>D -g4<8,8,1>D 0x3f800000UD * - * When the comparison is either == 0.0 or != 0.0 using the knowledge that - * the true (or false) case already results in zero would allow better code - * generation by possibly avoiding a load-immediate instruction. + * When the comparison is != 0.0 using the knowledge that the false case + * already results in zero would allow better code generation by possibly + * avoiding a load-immediate instruction. */ ir_expression *cmp = ir->operands[0]->as_expression(); if (cmp == NULL) return false; - if (cmp->operation == ir_binop_equal || cmp->operation == ir_binop_nequal) { + if (cmp->operation == ir_binop_nequal) { for (unsigned i = 0; i < 2; i++) { ir_constant *c = cmp->operands[i]->as_constant(); if (c == NULL || !c->is_zero()) @@ -538,7 +538,7 @@ fs_visitor::try_emit_b2f_of_comparison(ir_expression *ir) fs_inst *inst = emit(SEL(this->result, op[i ^ 1], fs_reg(1.0f))); inst->predicate = BRW_PREDICATE_NORMAL; - inst->predicate_inverse = cmp->operation == ir_binop_equal; + inst->predicate_inverse = true; return true; } } From nroberts at kemper.freedesktop.org Tue Apr 14 18:24:25 2015 From: nroberts at kemper.freedesktop.org (Neil Roberts) Date: Tue, 14 Apr 2015 11:24:25 -0700 (PDT) Subject: Mesa (master): i965/skl: Use an exec size of 8 to initialise the message header Message-ID: <20150414182425.56B4F761EB@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 07c571a39fa12c3db1c638302de7aed67844609b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=07c571a39fa12c3db1c638302de7aed67844609b Author: Neil Roberts Date: Fri Apr 10 17:20:21 2015 +0100 i965/skl: Use an exec size of 8 to initialise the message header Commit e93566a15c61c33faa changed the message header code needed to make Skylake use SIMD4x2 so that it uses a register with width 4 instead of 8 as the source register in the send message. However it also changed the width for the dest in the MOV instruction which is used to initialise the header register with the values from g0. The width of the destination is used to determine the exec size in brw_set_dest so this would end up making the MOV have an exec size of 4. I think this would end up leaving the top half of the register uninitialised. The top half of the header has meaningful values so this probably isn't a good idea. This patch just casts the dest register for the MOV instruction back to a vec8 to fix it. It doesn't cause any changes to a Piglit run. Reviewed-by: Ben Widawsky --- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 2 +- src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 2743297..7c00020 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -1046,7 +1046,7 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst, brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); - brw_MOV(p, src, retype(brw_vec4_grf(0, 0), BRW_REGISTER_TYPE_UD)); + brw_MOV(p, vec8(src), retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); brw_set_default_access_mode(p, BRW_ALIGN_1); brw_MOV(p, get_element_ud(src, 2), diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index 9714785..e4addf7 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -1051,7 +1051,7 @@ vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst, brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); - brw_MOV(p, src, retype(brw_vec4_grf(0, 0), BRW_REGISTER_TYPE_UD)); + brw_MOV(p, vec8(src), retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); brw_set_default_access_mode(p, BRW_ALIGN_1); brw_MOV(p, get_element_ud(src, 2), From mattst88 at kemper.freedesktop.org Tue Apr 14 19:16:27 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Tue, 14 Apr 2015 12:16:27 -0700 (PDT) Subject: Mesa (master): i965/fs: Correct mistake in determining whether a MUL is negated. Message-ID: <20150414191627.2203D761EB@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 3ca17e75e4fe129511a4dcad47e139485beef880 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3ca17e75e4fe129511a4dcad47e139485beef880 Author: Matt Turner Date: Mon Apr 13 11:29:14 2015 -0700 i965/fs: Correct mistake in determining whether a MUL is negated. a * b is equivalent to -a * -b, and the previous code was failing at that. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89961 Reviewed-by: Anuj Phogat Reviewed-by: Ian Romanick --- src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp index f2c4098..c1d0616 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp @@ -145,7 +145,7 @@ operands_match(const fs_inst *a, const fs_inst *b, bool *negate) xs[1].fixed_hw_reg.dw1.f = xs1_imm; ys[1].fixed_hw_reg.dw1.f = ys1_imm; - *negate = (xs0_negate + xs1_negate) != (ys0_negate + ys1_negate); + *negate = (xs0_negate != xs1_negate) != (ys0_negate != ys1_negate); return ret; } else if (!a->is_commutative()) { bool match = true; From mattst88 at kemper.freedesktop.org Tue Apr 14 19:26:47 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Tue, 14 Apr 2015 12:26:47 -0700 (PDT) Subject: Mesa (master): tnl: replace __FUNCTION__ with __func__ Message-ID: <20150414192647.E4237761EB@kemper.freedesktop.org> Module: Mesa Branch: master Commit: f0e693efb393f1135702c80a64f8733adc922db1 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f0e693efb393f1135702c80a64f8733adc922db1 Author: Marius Predut Date: Fri Apr 3 16:11:57 2015 +0300 tnl: replace __FUNCTION__ with __func__ Consistently just use C99's __func__ everywhere. The patch was verified with Microsoft Visual studio 2013 redistributable package(RTM version number: 18.0.21005.1) Next MSVC versions intends to support __func__. No functional changes. Acked-by: Matt Turner Reviewed-by: Anuj Phogat Signed-off-by: Marius Predut --- src/mesa/tnl/t_draw.c | 2 +- src/mesa/tnl/t_vb_lighttmp.h | 8 ++++---- src/mesa/tnl/t_vertex_generic.c | 2 +- src/mesa/tnl_dd/t_dd_dmatmp.h | 34 +++++++++++++++++----------------- src/mesa/tnl_dd/t_dd_dmatmp2.h | 22 +++++++++++----------- src/mesa/tnl_dd/t_dd_triemit.h | 8 ++++---- src/mesa/tnl_dd/t_dd_tritmp.h | 2 +- src/mesa/tnl_dd/t_dd_unfilled.h | 2 +- 8 files changed, 40 insertions(+), 40 deletions(-) diff --git a/src/mesa/tnl/t_draw.c b/src/mesa/tnl/t_draw.c index 60265d6..6adf1dc 100644 --- a/src/mesa/tnl/t_draw.c +++ b/src/mesa/tnl/t_draw.c @@ -448,7 +448,7 @@ void _tnl_draw_prims(struct gl_context *ctx, if (0) { - printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); + printf("%s %d..%d\n", __func__, min_index, max_index); for (i = 0; i < nr_prims; i++) printf("prim %d: %s start %d count %d\n", i, _mesa_lookup_enum_by_nr(prim[i].mode), diff --git a/src/mesa/tnl/t_vb_lighttmp.h b/src/mesa/tnl/t_vb_lighttmp.h index 57f569b..f8786ac 100644 --- a/src/mesa/tnl/t_vb_lighttmp.h +++ b/src/mesa/tnl/t_vb_lighttmp.h @@ -68,7 +68,7 @@ static void TAG(light_rgba_spec)( struct gl_context *ctx, const GLuint nr = VB->Count; #ifdef TRACE - fprintf(stderr, "%s\n", __FUNCTION__ ); + fprintf(stderr, "%s\n", __func__ ); #endif VB->AttribPtr[_TNL_ATTRIB_COLOR0] = &store->LitColor[0]; @@ -249,7 +249,7 @@ static void TAG(light_rgba)( struct gl_context *ctx, const GLuint nr = VB->Count; #ifdef TRACE - fprintf(stderr, "%s\n", __FUNCTION__ ); + fprintf(stderr, "%s\n", __func__ ); #endif VB->AttribPtr[_TNL_ATTRIB_COLOR0] = &store->LitColor[0]; @@ -429,7 +429,7 @@ static void TAG(light_fast_rgba_single)( struct gl_context *ctx, #endif #ifdef TRACE - fprintf(stderr, "%s\n", __FUNCTION__ ); + fprintf(stderr, "%s\n", __func__ ); #endif (void) input; /* doesn't refer to Eye or Obj */ @@ -533,7 +533,7 @@ static void TAG(light_fast_rgba)( struct gl_context *ctx, const struct gl_light *light; #ifdef TRACE - fprintf(stderr, "%s %d\n", __FUNCTION__, nr ); + fprintf(stderr, "%s %d\n", __func__, nr ); #endif (void) input; diff --git a/src/mesa/tnl/t_vertex_generic.c b/src/mesa/tnl/t_vertex_generic.c index 2858922..2a25a96 100644 --- a/src/mesa/tnl/t_vertex_generic.c +++ b/src/mesa/tnl/t_vertex_generic.c @@ -36,7 +36,7 @@ #if 0 -#define DEBUG_INSERT printf("%s\n", __FUNCTION__) +#define DEBUG_INSERT printf("%s\n", __func__) #else #define DEBUG_INSERT #endif diff --git a/src/mesa/tnl_dd/t_dd_dmatmp.h b/src/mesa/tnl_dd/t_dd_dmatmp.h index 52ea2bf..667e2a6 100644 --- a/src/mesa/tnl_dd/t_dd_dmatmp.h +++ b/src/mesa/tnl_dd/t_dd_dmatmp.h @@ -128,7 +128,7 @@ static void TAG(render_points_verts)( struct gl_context *ctx, } } else { - fprintf(stderr, "%s - cannot draw primitive\n", __FUNCTION__); + fprintf(stderr, "%s - cannot draw primitive\n", __func__); return; } } @@ -163,7 +163,7 @@ static void TAG(render_lines_verts)( struct gl_context *ctx, } } else { - fprintf(stderr, "%s - cannot draw primitive\n", __FUNCTION__); + fprintf(stderr, "%s - cannot draw primitive\n", __func__); return; } } @@ -195,7 +195,7 @@ static void TAG(render_line_strip_verts)( struct gl_context *ctx, FLUSH(); } else { - fprintf(stderr, "%s - cannot draw primitive\n", __FUNCTION__); + fprintf(stderr, "%s - cannot draw primitive\n", __func__); return; } } @@ -261,7 +261,7 @@ static void TAG(render_line_loop_verts)( struct gl_context *ctx, FLUSH(); } else { - fprintf(stderr, "%s - cannot draw primitive\n", __FUNCTION__); + fprintf(stderr, "%s - cannot draw primitive\n", __func__); return; } } @@ -331,7 +331,7 @@ static void TAG(render_tri_strip_verts)( struct gl_context *ctx, FLUSH(); } else { - fprintf(stderr, "%s - cannot draw primitive\n", __FUNCTION__); + fprintf(stderr, "%s - cannot draw primitive\n", __func__); return; } } @@ -370,7 +370,7 @@ static void TAG(render_tri_fan_verts)( struct gl_context *ctx, /* Could write code to emit these as indexed vertices (for the * g400, for instance). */ - fprintf(stderr, "%s - cannot draw primitive\n", __FUNCTION__); + fprintf(stderr, "%s - cannot draw primitive\n", __func__); return; } } @@ -409,7 +409,7 @@ static void TAG(render_poly_verts)( struct gl_context *ctx, else if (HAVE_TRI_FANS && ctx->Light.ShadeModel == GL_SMOOTH) { TAG(render_tri_fan_verts)( ctx, start, count, flags ); } else { - fprintf(stderr, "%s - cannot draw primitive\n", __FUNCTION__); + fprintf(stderr, "%s - cannot draw primitive\n", __func__); return; } } @@ -500,7 +500,7 @@ static void TAG(render_quad_strip_verts)( struct gl_context *ctx, /* Vertices won't fit in a single buffer or elts not * available - should never happen. */ - fprintf(stderr, "%s - cannot draw primitive\n", __FUNCTION__); + fprintf(stderr, "%s - cannot draw primitive\n", __func__); return; } } @@ -534,7 +534,7 @@ static void TAG(render_quad_strip_verts)( struct gl_context *ctx, FLUSH(); } else { - fprintf(stderr, "%s - cannot draw primitive\n", __FUNCTION__); + fprintf(stderr, "%s - cannot draw primitive\n", __func__); return; } } @@ -644,7 +644,7 @@ static void TAG(render_quads_verts)( struct gl_context *ctx, else { /* Vertices won't fit in a single buffer, should never happen. */ - fprintf(stderr, "%s - cannot draw primitive\n", __FUNCTION__); + fprintf(stderr, "%s - cannot draw primitive\n", __func__); return; } } @@ -705,7 +705,7 @@ static void TAG(render_points_elts)( struct gl_context *ctx, currentsz = dmasz; } } else { - fprintf(stderr, "%s - cannot draw primitive\n", __FUNCTION__); + fprintf(stderr, "%s - cannot draw primitive\n", __func__); return; } } @@ -743,7 +743,7 @@ static void TAG(render_lines_elts)( struct gl_context *ctx, currentsz = dmasz; } } else { - fprintf(stderr, "%s - cannot draw primitive\n", __FUNCTION__); + fprintf(stderr, "%s - cannot draw primitive\n", __func__); return; } } @@ -777,7 +777,7 @@ static void TAG(render_line_strip_elts)( struct gl_context *ctx, } else { /* TODO: Try to emit as indexed lines. */ - fprintf(stderr, "%s - cannot draw primitive\n", __FUNCTION__); + fprintf(stderr, "%s - cannot draw primitive\n", __func__); return; } } @@ -845,7 +845,7 @@ static void TAG(render_line_loop_elts)( struct gl_context *ctx, FLUSH(); } else { /* TODO: Try to emit as indexed lines */ - fprintf(stderr, "%s - cannot draw primitive\n", __FUNCTION__); + fprintf(stderr, "%s - cannot draw primitive\n", __func__); return; } } @@ -922,7 +922,7 @@ static void TAG(render_tri_strip_elts)( struct gl_context *ctx, } } else { /* TODO: try to emit as indexed triangles */ - fprintf(stderr, "%s - cannot draw primitive\n", __FUNCTION__); + fprintf(stderr, "%s - cannot draw primitive\n", __func__); return; } } @@ -959,7 +959,7 @@ static void TAG(render_tri_fan_elts)( struct gl_context *ctx, } } else { /* TODO: try to emit as indexed triangles */ - fprintf(stderr, "%s - cannot draw primitive\n", __FUNCTION__); + fprintf(stderr, "%s - cannot draw primitive\n", __func__); return; } } @@ -998,7 +998,7 @@ static void TAG(render_poly_elts)( struct gl_context *ctx, } else if (HAVE_TRI_FANS && ctx->Light.ShadeModel == GL_SMOOTH) { TAG(render_tri_fan_verts)( ctx, start, count, flags ); } else { - fprintf(stderr, "%s - cannot draw primitive\n", __FUNCTION__); + fprintf(stderr, "%s - cannot draw primitive\n", __func__); return; } } diff --git a/src/mesa/tnl_dd/t_dd_dmatmp2.h b/src/mesa/tnl_dd/t_dd_dmatmp2.h index 7c6f136..96c5b47 100644 --- a/src/mesa/tnl_dd/t_dd_dmatmp2.h +++ b/src/mesa/tnl_dd/t_dd_dmatmp2.h @@ -121,7 +121,7 @@ static void TAG(render_points_verts)( struct gl_context *ctx, { if (start < count) { LOCAL_VARS; - if (0) fprintf(stderr, "%s\n", __FUNCTION__); + if (0) fprintf(stderr, "%s\n", __func__); EMIT_PRIM( ctx, GL_POINTS, HW_POINTS, start, count ); } } @@ -132,7 +132,7 @@ static void TAG(render_lines_verts)( struct gl_context *ctx, GLuint flags ) { LOCAL_VARS; - if (0) fprintf(stderr, "%s\n", __FUNCTION__); + if (0) fprintf(stderr, "%s\n", __func__); count -= (count-start) & 1; if (start+1 >= count) @@ -156,7 +156,7 @@ static void TAG(render_line_strip_verts)( struct gl_context *ctx, GLuint flags ) { LOCAL_VARS; - if (0) fprintf(stderr, "%s\n", __FUNCTION__); + if (0) fprintf(stderr, "%s\n", __func__); if (start+1 >= count) return; @@ -204,7 +204,7 @@ static void TAG(render_line_loop_verts)( struct gl_context *ctx, { LOCAL_VARS; GLuint j, nr; - if (0) fprintf(stderr, "%s\n", __FUNCTION__); + if (0) fprintf(stderr, "%s\n", __func__); if (flags & PRIM_BEGIN) { j = start; @@ -292,7 +292,7 @@ static void TAG(render_triangles_verts)( struct gl_context *ctx, GLuint flags ) { LOCAL_VARS; - if (0) fprintf(stderr, "%s\n", __FUNCTION__); + if (0) fprintf(stderr, "%s\n", __func__); count -= (count-start)%3; @@ -313,7 +313,7 @@ static void TAG(render_tri_strip_verts)( struct gl_context *ctx, GLuint flags ) { LOCAL_VARS; - if (0) fprintf(stderr, "%s\n", __FUNCTION__); + if (0) fprintf(stderr, "%s\n", __func__); if (start + 2 >= count) return; @@ -358,7 +358,7 @@ static void TAG(render_tri_fan_verts)( struct gl_context *ctx, GLuint flags ) { LOCAL_VARS; - if (0) fprintf(stderr, "%s\n", __FUNCTION__); + if (0) fprintf(stderr, "%s\n", __func__); if (start+2 >= count) return; @@ -401,7 +401,7 @@ static void TAG(render_poly_verts)( struct gl_context *ctx, GLuint flags ) { LOCAL_VARS; - if (0) fprintf(stderr, "%s\n", __FUNCTION__); + if (0) fprintf(stderr, "%s\n", __func__); if (start+2 >= count) return; @@ -415,7 +415,7 @@ static void TAG(render_quad_strip_verts)( struct gl_context *ctx, GLuint flags ) { LOCAL_VARS; - if (0) fprintf(stderr, "%s\n", __FUNCTION__); + if (0) fprintf(stderr, "%s\n", __func__); count -= (count-start) & 1; @@ -466,7 +466,7 @@ static void TAG(render_quads_verts)( struct gl_context *ctx, GLuint flags ) { LOCAL_VARS; - if (0) fprintf(stderr, "%s\n", __FUNCTION__); + if (0) fprintf(stderr, "%s\n", __func__); count -= (count-start)%4; if (start+3 >= count) @@ -642,7 +642,7 @@ static void TAG(render_line_loop_elts)( struct gl_context *ctx, GLuint j, nr; ELT_TYPE *dest; - if (0) fprintf(stderr, "%s\n", __FUNCTION__); + if (0) fprintf(stderr, "%s\n", __func__); if (flags & PRIM_BEGIN) j = start; diff --git a/src/mesa/tnl_dd/t_dd_triemit.h b/src/mesa/tnl_dd/t_dd_triemit.h index 082e83f..093501f 100644 --- a/src/mesa/tnl_dd/t_dd_triemit.h +++ b/src/mesa/tnl_dd/t_dd_triemit.h @@ -39,7 +39,7 @@ static __inline void TAG(quad)( CTX_ARG, GLuint j; if (DO_DEBUG_VERTS) { - fprintf(stderr, "%s\n", __FUNCTION__); + fprintf(stderr, "%s\n", __func__); PRINT_VERTEX(v0); PRINT_VERTEX(v1); PRINT_VERTEX(v2); @@ -63,7 +63,7 @@ static __inline void TAG(quad)( CTX_ARG, GLuint j; if (DO_DEBUG_VERTS) { - fprintf(stderr, "%s\n", __FUNCTION__); + fprintf(stderr, "%s\n", __func__); PRINT_VERTEX(v0); PRINT_VERTEX(v1); PRINT_VERTEX(v2); @@ -90,7 +90,7 @@ static __inline void TAG(triangle)( CTX_ARG, GLuint j; if (DO_DEBUG_VERTS) { - fprintf(stderr, "%s\n", __FUNCTION__); + fprintf(stderr, "%s\n", __func__); PRINT_VERTEX(v0); PRINT_VERTEX(v1); PRINT_VERTEX(v2); @@ -139,7 +139,7 @@ static void TAG(fast_clipped_poly)( struct gl_context *ctx, const GLuint *elts, GLuint i,j; if (DO_DEBUG_VERTS) { - fprintf(stderr, "%s\n", __FUNCTION__); + fprintf(stderr, "%s\n", __func__); PRINT_VERTEX(VERT(elts[0])); PRINT_VERTEX(VERT(elts[1])); } diff --git a/src/mesa/tnl_dd/t_dd_tritmp.h b/src/mesa/tnl_dd/t_dd_tritmp.h index 6fe2c82..2176f1f 100644 --- a/src/mesa/tnl_dd/t_dd_tritmp.h +++ b/src/mesa/tnl_dd/t_dd_tritmp.h @@ -121,7 +121,7 @@ static void TAG(triangle)( struct gl_context *ctx, GLuint e0, GLuint e1, GLuint GLuint facing = 0; LOCAL_VARS(3); -/* fprintf(stderr, "%s\n", __FUNCTION__); */ +/* fprintf(stderr, "%s\n", __func__); */ v[0] = (VERTEX *)GET_VERTEX(e0); v[1] = (VERTEX *)GET_VERTEX(e1); diff --git a/src/mesa/tnl_dd/t_dd_unfilled.h b/src/mesa/tnl_dd/t_dd_unfilled.h index 6be0e7f..82190c0 100644 --- a/src/mesa/tnl_dd/t_dd_unfilled.h +++ b/src/mesa/tnl_dd/t_dd_unfilled.h @@ -59,7 +59,7 @@ static void TAG(unfilled_tri)( struct gl_context *ctx, } } -/* fprintf(stderr, "%s %s %d %d %d\n", __FUNCTION__, */ +/* fprintf(stderr, "%s %s %d %d %d\n", __func__, */ /* _mesa_lookup_enum_by_nr( mode ), */ /* ef[e0], ef[e1], ef[e2]); */ From mattst88 at kemper.freedesktop.org Tue Apr 14 19:26:48 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Tue, 14 Apr 2015 12:26:48 -0700 (PDT) Subject: Mesa (master): swrast: replace __FUNCTION__ with __func__ Message-ID: <20150414192648.0478F761ED@kemper.freedesktop.org> Module: Mesa Branch: master Commit: d02942cc779f97b499c80879db60aa469f651b49 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d02942cc779f97b499c80879db60aa469f651b49 Author: Marius Predut Date: Tue Apr 7 22:03:52 2015 +0300 swrast: replace __FUNCTION__ with __func__ Consistently just use C99's __func__ everywhere. The patch was verified with Microsoft Visual studio 2013 redistributable package(RTM version number: 18.0.21005.1) Next MSVC versions intends to support __func__. No functional changes. Acked-by: Matt Turner Signed-off-by: Marius Predut --- src/mesa/drivers/dri/swrast/swrast.c | 2 +- src/mesa/drivers/dri/swrast/swrast_priv.h | 4 ++-- src/mesa/swrast/s_linetemp.h | 4 ++-- src/mesa/swrast/s_span.c | 2 +- src/mesa/swrast/s_tritemp.h | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/mesa/drivers/dri/swrast/swrast.c b/src/mesa/drivers/dri/swrast/swrast.c index fb29078..d1bb721 100644 --- a/src/mesa/drivers/dri/swrast/swrast.c +++ b/src/mesa/drivers/dri/swrast/swrast.c @@ -326,7 +326,7 @@ choose_pixel_format(const struct gl_config *v) && v->blueMask == 0xc0) return PF_R3G3B2; - _mesa_problem( NULL, "unexpected format in %s", __FUNCTION__ ); + _mesa_problem( NULL, "unexpected format in %s", __func__ ); return 0; } diff --git a/src/mesa/drivers/dri/swrast/swrast_priv.h b/src/mesa/drivers/dri/swrast/swrast_priv.h index 1f3a48f..a6ab535 100644 --- a/src/mesa/drivers/dri/swrast/swrast_priv.h +++ b/src/mesa/drivers/dri/swrast/swrast_priv.h @@ -41,13 +41,13 @@ #define DEBUG_SPAN 0 #if DEBUG_CORE -#define TRACE printf("--> %s\n", __FUNCTION__) +#define TRACE printf("--> %s\n", __func__) #else #define TRACE #endif #if DEBUG_SPAN -#define TRACE_SPAN printf("--> %s\n", __FUNCTION__) +#define TRACE_SPAN printf("--> %s\n", __func__) #else #define TRACE_SPAN #endif diff --git a/src/mesa/swrast/s_linetemp.h b/src/mesa/swrast/s_linetemp.h index 352c884..035a1e6 100644 --- a/src/mesa/swrast/s_linetemp.h +++ b/src/mesa/swrast/s_linetemp.h @@ -106,7 +106,7 @@ NAME( struct gl_context *ctx, const SWvertex *vert0, const SWvertex *vert1 ) } /* - printf("%s():\n", __FUNCTION__); + printf("%s():\n", __func__); printf(" (%f, %f, %f) -> (%f, %f, %f)\n", vert0->attrib[VARYING_SLOT_POS][0], vert0->attrib[VARYING_SLOT_POS][1], @@ -154,7 +154,7 @@ NAME( struct gl_context *ctx, const SWvertex *vert0, const SWvertex *vert1 ) return; /* - printf("%s %d,%d %g %g %g %g %g %g %g %g\n", __FUNCTION__, dx, dy, + printf("%s %d,%d %g %g %g %g %g %g %g %g\n", __func__, dx, dy, vert0->attrib[VARYING_SLOT_COL1][0], vert0->attrib[VARYING_SLOT_COL1][1], vert0->attrib[VARYING_SLOT_COL1][2], diff --git a/src/mesa/swrast/s_span.c b/src/mesa/swrast/s_span.c index 0a30d10..3db10e1 100644 --- a/src/mesa/swrast/s_span.c +++ b/src/mesa/swrast/s_span.c @@ -1143,7 +1143,7 @@ _swrast_write_rgba_span( struct gl_context *ctx, SWspan *span) struct gl_framebuffer *fb = ctx->DrawBuffer; /* - printf("%s() interp 0x%x array 0x%x\n", __FUNCTION__, + printf("%s() interp 0x%x array 0x%x\n", __func__, span->interpMask, span->arrayMask); */ diff --git a/src/mesa/swrast/s_tritemp.h b/src/mesa/swrast/s_tritemp.h index 3cd1b44..fddbbfd 100644 --- a/src/mesa/swrast/s_tritemp.h +++ b/src/mesa/swrast/s_tritemp.h @@ -156,7 +156,7 @@ static void NAME(struct gl_context *ctx, const SWvertex *v0, #endif /* - printf("%s()\n", __FUNCTION__); + printf("%s()\n", __func__); printf(" %g, %g, %g\n", v0->attrib[VARYING_SLOT_POS][0], v0->attrib[VARYING_SLOT_POS][1], From mattst88 at kemper.freedesktop.org Tue Apr 14 19:26:48 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Tue, 14 Apr 2015 12:26:48 -0700 (PDT) Subject: Mesa (master): state_tracker: replace __FUNCTION__ with __func__ Message-ID: <20150414192648.13E7C761EB@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 50cb780f7f90ffc4018f41502a472604bf1677c3 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=50cb780f7f90ffc4018f41502a472604bf1677c3 Author: Marius Predut Date: Tue Apr 7 22:04:38 2015 +0300 state_tracker: replace __FUNCTION__ with __func__ Consistently just use C99's __func__ everywhere. The patch was verified with Microsoft Visual studio 2013 redistributable package(RTM version number: 18.0.21005.1) Next MSVC versions intends to support __func__. No functional changes. Acked-by: Matt Turner Signed-off-by: Marius Predut --- src/mesa/state_tracker/st_atom.c | 2 +- src/mesa/state_tracker/st_atom_constbuf.c | 2 +- src/mesa/state_tracker/st_cb_clear.c | 2 +- src/mesa/state_tracker/st_cb_texture.c | 18 +++++++++--------- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +- src/mesa/state_tracker/st_mesa_to_tgsi.c | 2 +- src/mesa/state_tracker/st_program.c | 2 +- src/mesa/state_tracker/st_texture.c | 6 +++--- 8 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/mesa/state_tracker/st_atom.c b/src/mesa/state_tracker/st_atom.c index f0fe11f..428f2d9 100644 --- a/src/mesa/state_tracker/st_atom.c +++ b/src/mesa/state_tracker/st_atom.c @@ -183,7 +183,7 @@ void st_validate_state( struct st_context *st ) if (state->st == 0) return; - /*printf("%s %x/%x\n", __FUNCTION__, state->mesa, state->st);*/ + /*printf("%s %x/%x\n", __func__, state->mesa, state->st);*/ #ifdef DEBUG if (1) { diff --git a/src/mesa/state_tracker/st_atom_constbuf.c b/src/mesa/state_tracker/st_atom_constbuf.c index 7984bf7..a54e0d9 100644 --- a/src/mesa/state_tracker/st_atom_constbuf.c +++ b/src/mesa/state_tracker/st_atom_constbuf.c @@ -92,7 +92,7 @@ void st_upload_constants( struct st_context *st, if (ST_DEBUG & DEBUG_CONSTANTS) { debug_printf("%s(shader=%d, numParams=%d, stateFlags=0x%x)\n", - __FUNCTION__, shader_type, params->NumParameters, + __func__, shader_type, params->NumParameters, params->StateFlags); _mesa_print_parameter_list(params); } diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c index dd81a62..f10e906 100644 --- a/src/mesa/state_tracker/st_cb_clear.c +++ b/src/mesa/state_tracker/st_cb_clear.c @@ -247,7 +247,7 @@ clear_with_quad(struct gl_context *ctx, unsigned clear_buffers) util_framebuffer_get_num_layers(&st->state.framebuffer); /* - printf("%s %s%s%s %f,%f %f,%f\n", __FUNCTION__, + printf("%s %s%s%s %f,%f %f,%f\n", __func__, color ? "color, " : "", depth ? "depth, " : "", stencil ? "stencil" : "", diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 353f80d..bdf236e 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -123,7 +123,7 @@ gl_target_to_pipe(GLenum target) static struct gl_texture_image * st_NewTextureImage(struct gl_context * ctx) { - DBG("%s\n", __FUNCTION__); + DBG("%s\n", __func__); (void) ctx; return (struct gl_texture_image *) ST_CALLOC_STRUCT(st_texture_image); } @@ -144,7 +144,7 @@ st_NewTextureObject(struct gl_context * ctx, GLuint name, GLenum target) { struct st_texture_object *obj = ST_CALLOC_STRUCT(st_texture_object); - DBG("%s\n", __FUNCTION__); + DBG("%s\n", __func__); _mesa_initialize_texture_object(ctx, &obj->base, name, target); return &obj->base; @@ -172,7 +172,7 @@ st_FreeTextureImageBuffer(struct gl_context *ctx, { struct st_texture_image *stImage = st_texture_image(texImage); - DBG("%s\n", __FUNCTION__); + DBG("%s\n", __func__); if (stImage->pt) { pipe_resource_reference(&stImage->pt, NULL); @@ -405,7 +405,7 @@ guess_and_alloc_texture(struct st_context *st, GLuint ptWidth, ptHeight, ptDepth, ptLayers; enum pipe_format fmt; - DBG("%s\n", __FUNCTION__); + DBG("%s\n", __func__); assert(!stObj->pt); @@ -473,7 +473,7 @@ guess_and_alloc_texture(struct st_context *st, stObj->lastLevel = lastLevel; - DBG("%s returning %d\n", __FUNCTION__, (stObj->pt != NULL)); + DBG("%s returning %d\n", __func__, (stObj->pt != NULL)); return stObj->pt != NULL; } @@ -496,7 +496,7 @@ st_AllocTextureImageBuffer(struct gl_context *ctx, GLuint height = texImage->Height; GLuint depth = texImage->Depth; - DBG("%s\n", __FUNCTION__); + DBG("%s\n", __func__); assert(!stImage->pt); /* xxx this might be wrong */ @@ -1153,7 +1153,7 @@ st_GetTexImage(struct gl_context * ctx, } if (ST_DEBUG & DEBUG_FALLBACK) - debug_printf("%s: fallback format translation\n", __FUNCTION__); + debug_printf("%s: fallback format translation\n", __func__); dstMesaFormat = _mesa_format_from_format_and_type(format, type); dstStride = _mesa_image_row_stride(&ctx->Pack, width, format, type); @@ -1239,7 +1239,7 @@ fallback_copy_texsubimage(struct gl_context *ctx, struct pipe_transfer *transfer; if (ST_DEBUG & DEBUG_FALLBACK) - debug_printf("%s: fallback processing\n", __FUNCTION__); + debug_printf("%s: fallback processing\n", __func__); if (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP) { srcY = strb->Base.Height - srcY - height; @@ -1394,7 +1394,7 @@ st_CopyTexSubImage(struct gl_context *ctx, GLuint dims, texImage->TexFormat != MESA_FORMAT_ETC1_RGB8); if (!strb || !strb->surface || !stImage->pt) { - debug_printf("%s: null strb or stImage\n", __FUNCTION__); + debug_printf("%s: null strb or stImage\n", __func__); return; } diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 67a4da7..c05a456 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -5400,7 +5400,7 @@ out: free(t->immediates); if (t->error) { - debug_printf("%s: translate error flag set\n", __FUNCTION__); + debug_printf("%s: translate error flag set\n", __func__); } free(t); diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index 2f10161..98d525c 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -1249,7 +1249,7 @@ out: free(t->constants); if (t->error) { - debug_printf("%s: translate error flag set\n", __FUNCTION__); + debug_printf("%s: translate error flag set\n", __func__); } return ret; diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index 4cfd817..3b0ac4a 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -403,7 +403,7 @@ st_translate_vertex_program(struct st_context *st, return vpv; fail: - debug_printf("%s: failed to translate Mesa program:\n", __FUNCTION__); + debug_printf("%s: failed to translate Mesa program:\n", __func__); _mesa_print_program(&stvp->Base.Base); debug_assert(0); diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c index de4a6eb..6beb21e 100644 --- a/src/mesa/state_tracker/st_texture.c +++ b/src/mesa/state_tracker/st_texture.c @@ -74,7 +74,7 @@ st_texture_create(struct st_context *st, if (target == PIPE_TEXTURE_CUBE) assert(layers == 6); - DBG("%s target %d format %s last_level %d\n", __FUNCTION__, + DBG("%s target %d format %s last_level %d\n", __func__, (int) target, util_format_name(format), last_level); assert(format); @@ -250,7 +250,7 @@ st_texture_image_map(struct st_context *st, struct st_texture_image *stImage, GLuint level; void *map; - DBG("%s \n", __FUNCTION__); + DBG("%s \n", __func__); if (!stImage->pt) return NULL; @@ -304,7 +304,7 @@ st_texture_image_unmap(struct st_context *st, slice += stObj->base.MinLayer; transfer = &stImage->transfer[slice + stImage->base.Face].transfer; - DBG("%s\n", __FUNCTION__); + DBG("%s\n", __func__); pipe_transfer_unmap(pipe, *transfer); *transfer = NULL; From mattst88 at kemper.freedesktop.org Tue Apr 14 19:26:48 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Tue, 14 Apr 2015 12:26:48 -0700 (PDT) Subject: Mesa (master): i965: replace __FUNCTION__ with __func__ Message-ID: <20150414192648.53868761EB@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 28d9e90428282a5e0a6aa31ad858a5cf514d1264 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=28d9e90428282a5e0a6aa31ad858a5cf514d1264 Author: Marius Predut Date: Tue Apr 7 22:05:28 2015 +0300 i965: replace __FUNCTION__ with __func__ Consistently just use C99's __func__ everywhere. No functional changes. Acked-by: Matt Turner Signed-off-by: Marius Predut --- src/mesa/drivers/dri/common/utils.c | 2 +- src/mesa/drivers/dri/i965/brw_blorp.cpp | 2 +- src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 2 +- src/mesa/drivers/dri/i965/brw_context.c | 4 +-- src/mesa/drivers/dri/i965/brw_draw_upload.c | 2 +- src/mesa/drivers/dri/i965/brw_state_cache.c | 4 +-- src/mesa/drivers/dri/i965/brw_tex_layout.c | 2 +- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 2 +- src/mesa/drivers/dri/i965/gen6_surface_state.c | 2 +- src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 2 +- src/mesa/drivers/dri/i965/gen8_surface_state.c | 2 +- src/mesa/drivers/dri/i965/intel_blit.c | 8 +++--- src/mesa/drivers/dri/i965/intel_fbo.c | 8 +++--- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 30 ++++++++++---------- src/mesa/drivers/dri/i965/intel_pixel_bitmap.c | 2 +- src/mesa/drivers/dri/i965/intel_pixel_copy.c | 6 ++-- src/mesa/drivers/dri/i965/intel_pixel_draw.c | 14 ++++----- src/mesa/drivers/dri/i965/intel_pixel_read.c | 8 +++--- src/mesa/drivers/dri/i965/intel_screen.c | 4 +-- src/mesa/drivers/dri/i965/intel_tex.c | 10 +++---- src/mesa/drivers/dri/i965/intel_tex_copy.c | 4 +-- src/mesa/drivers/dri/i965/intel_tex_image.c | 16 +++++------ src/mesa/drivers/dri/i965/intel_tex_subimage.c | 6 ++-- .../dri/i965/test_vec4_register_coalesce.cpp | 2 +- 24 files changed, 72 insertions(+), 72 deletions(-) diff --git a/src/mesa/drivers/dri/common/utils.c b/src/mesa/drivers/dri/common/utils.c index bb22107..70d34e8 100644 --- a/src/mesa/drivers/dri/common/utils.c +++ b/src/mesa/drivers/dri/common/utils.c @@ -227,7 +227,7 @@ driCreateConfigs(mesa_format format, break; default: fprintf(stderr, "[%s:%u] Unknown framebuffer type %s (%d).\n", - __FUNCTION__, __LINE__, + __func__, __LINE__, _mesa_get_format_name(format), format); return NULL; } diff --git a/src/mesa/drivers/dri/i965/brw_blorp.cpp b/src/mesa/drivers/dri/i965/brw_blorp.cpp index 78ac58e..131e155 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp @@ -194,7 +194,7 @@ intel_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt, } DBG("%s %s to mt %p level %d layer %d\n", - __FUNCTION__, opname, mt, level, layer); + __func__, opname, mt, level, layer); if (brw->gen >= 8) { gen8_hiz_exec(brw, mt, level, layer, op); diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp index 644cb41..d25e201 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp @@ -78,7 +78,7 @@ brw_blorp_blit_miptrees(struct brw_context *brw, DBG("%s from %dx %s mt %p %d %d (%f,%f) (%f,%f)" "to %dx %s mt %p %d %d (%f,%f) (%f,%f) (flip %d,%d)\n", - __FUNCTION__, + __func__, src_mt->num_samples, _mesa_get_format_name(src_mt->format), src_mt, src_level, src_layer, src_x0, src_y0, src_x1, src_y1, dst_mt->num_samples, _mesa_get_format_name(dst_mt->format), dst_mt, diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index a4884ed..c7e1e81 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -725,7 +725,7 @@ brwCreateContext(gl_api api, struct brw_context *brw = rzalloc(NULL, struct brw_context); if (!brw) { - fprintf(stderr, "%s: failed to alloc context\n", __FUNCTION__); + fprintf(stderr, "%s: failed to alloc context\n", __func__); *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY; return false; } @@ -781,7 +781,7 @@ brwCreateContext(gl_api api, if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) { *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY; - fprintf(stderr, "%s: failed to init mesa context\n", __FUNCTION__); + fprintf(stderr, "%s: failed to init mesa context\n", __func__); intelDestroyContext(driContextPriv); return false; } diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index b1af0d7..320e40e 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -413,7 +413,7 @@ brw_prepare_vertices(struct brw_context *brw) } if (0) - fprintf(stderr, "%s %d..%d\n", __FUNCTION__, min_index, max_index); + fprintf(stderr, "%s %d..%d\n", __func__, min_index, max_index); /* Accumulate the list of enabled arrays. */ brw->vb.nr_enabled = 0; diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index 89508e4..606740e 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -374,7 +374,7 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) struct brw_cache_item *c, *next; GLuint i; - DBG("%s\n", __FUNCTION__); + DBG("%s\n", __func__); for (i = 0; i < cache->size; i++) { for (c = cache->items[i]; c; c = next) { @@ -422,7 +422,7 @@ static void brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache) { - DBG("%s\n", __FUNCTION__); + DBG("%s\n", __func__); if (brw->has_llc) drm_intel_bo_unmap(cache->bo); diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index 7a1e09d..75b409c 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -459,7 +459,7 @@ brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt) } break; } - DBG("%s: %dx%dx%d\n", __FUNCTION__, + DBG("%s: %dx%dx%d\n", __func__, mt->total_width, mt->total_height, mt->cpp); } diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index c9dac5b..161d140 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -669,7 +669,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw, format = brw->render_target_format[rb_format]; if (unlikely(!brw->format_supported_as_render_target[rb_format])) { _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n", - __FUNCTION__, _mesa_get_format_name(rb_format)); + __func__, _mesa_get_format_name(rb_format)); } surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT | diff --git a/src/mesa/drivers/dri/i965/gen6_surface_state.c b/src/mesa/drivers/dri/i965/gen6_surface_state.c index 080e0f3..fadc353 100644 --- a/src/mesa/drivers/dri/i965/gen6_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen6_surface_state.c @@ -74,7 +74,7 @@ gen6_update_renderbuffer_surface(struct brw_context *brw, format = brw->render_target_format[rb_format]; if (unlikely(!brw->format_supported_as_render_target[rb_format])) { _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n", - __FUNCTION__, _mesa_get_format_name(rb_format)); + __func__, _mesa_get_format_name(rb_format)); } switch (gl_target) { diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c index 18bcb8a..10567f3 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c @@ -468,7 +468,7 @@ gen7_update_renderbuffer_surface(struct brw_context *brw, format = brw->render_target_format[rb_format]; if (unlikely(!brw->format_supported_as_render_target[rb_format])) { _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n", - __FUNCTION__, _mesa_get_format_name(rb_format)); + __func__, _mesa_get_format_name(rb_format)); } switch (gl_target) { diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c index ba59b05..011c685 100644 --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c @@ -369,7 +369,7 @@ gen8_update_renderbuffer_surface(struct brw_context *brw, format = brw->render_target_format[rb_format]; if (unlikely(!brw->format_supported_as_render_target[rb_format])) _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n", - __FUNCTION__, _mesa_get_format_name(rb_format)); + __func__, _mesa_get_format_name(rb_format)); } if (mt->mcs_mt) { diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c index 9500bd7..4993f60 100644 --- a/src/mesa/drivers/dri/i965/intel_blit.c +++ b/src/mesa/drivers/dri/i965/intel_blit.c @@ -178,7 +178,7 @@ intel_miptree_blit(struct brw_context *brw, (dst_format != MESA_FORMAT_B8G8R8A8_UNORM && dst_format != MESA_FORMAT_B8G8R8X8_UNORM))) { perf_debug("%s: Can't use hardware blitter from %s to %s, " - "falling back.\n", __FUNCTION__, + "falling back.\n", __func__, _mesa_get_format_name(src_format), _mesa_get_format_name(dst_format)); return false; @@ -330,7 +330,7 @@ intelEmitCopyBlit(struct brw_context *brw, intel_batchbuffer_require_space(brw, length * 4, BLT_RING); DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", - __FUNCTION__, + __func__, src_buffer, src_pitch, src_offset, src_x, src_y, dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h); @@ -456,7 +456,7 @@ intelEmitImmediateColorExpandBlit(struct brw_context *brw, return true; DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d, %d bytes %d dwords\n", - __FUNCTION__, + __func__, dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords); unsigned xy_setup_blt_length = brw->gen >= 8 ? 10 : 8; @@ -581,7 +581,7 @@ intel_miptree_set_alpha_to_one(struct brw_context *brw, cpp = mt->cpp; DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n", - __FUNCTION__, mt->bo, pitch, x, y, width, height); + __func__, mt->bo, pitch, x, y, width, height); BR13 = br13_for_cpp(cpp) | 0xf0 << 16; CMD = XY_COLOR_BLT_CMD; diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c index 8a398f7..aebed72 100644 --- a/src/mesa/drivers/dri/i965/intel_fbo.c +++ b/src/mesa/drivers/dri/i965/intel_fbo.c @@ -190,7 +190,7 @@ intel_map_renderbuffer(struct gl_context *ctx, } DBG("%s: rb %d (%s) mt mapped: (%d, %d) (%dx%d) -> %p/%"PRIdPTR"\n", - __FUNCTION__, rb->Name, _mesa_get_format_name(rb->Format), + __func__, rb->Name, _mesa_get_format_name(rb->Format), x, y, w, h, map, stride); *out_map = map; @@ -214,7 +214,7 @@ intel_unmap_renderbuffer(struct gl_context *ctx, struct intel_renderbuffer *irb = intel_renderbuffer(rb); struct intel_mipmap_tree *mt; - DBG("%s: rb %d (%s)\n", __FUNCTION__, + DBG("%s: rb %d (%s)\n", __func__, rb->Name, _mesa_get_format_name(rb->Format)); if (srb->Buffer) { @@ -309,7 +309,7 @@ intel_alloc_private_renderbuffer_storage(struct gl_context * ctx, struct gl_rend intel_miptree_release(&irb->mt); - DBG("%s: %s: %s (%dx%d)\n", __FUNCTION__, + DBG("%s: %s: %s (%dx%d)\n", __func__, _mesa_lookup_enum_by_nr(internalFormat), _mesa_get_format_name(rb->Format), width, height); @@ -662,7 +662,7 @@ intel_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb) struct intel_mipmap_tree *depth_mt = NULL, *stencil_mt = NULL; int i; - DBG("%s() on fb %p (%s)\n", __FUNCTION__, + DBG("%s() on fb %p (%s)\n", __func__, fb, (fb == ctx->DrawBuffer ? "drawbuffer" : (fb == ctx->ReadBuffer ? "readbuffer" : "other buffer"))); diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 0424a87..9e311f06 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -253,7 +253,7 @@ intel_miptree_create_layout(struct brw_context *brw, if (!mt) return NULL; - DBG("%s target %s format %s level %d..%d slices %d <-- %p\n", __FUNCTION__, + DBG("%s target %s format %s level %d..%d slices %d <-- %p\n", __func__, _mesa_lookup_enum_by_nr(target), _mesa_get_format_name(format), first_level, last_level, depth0, mt); @@ -885,7 +885,7 @@ intel_miptree_reference(struct intel_mipmap_tree **dst, if (src) { src->refcount++; - DBG("%s %p refcount now %d\n", __FUNCTION__, src, src->refcount); + DBG("%s %p refcount now %d\n", __func__, src, src->refcount); } *dst = src; @@ -898,11 +898,11 @@ intel_miptree_release(struct intel_mipmap_tree **mt) if (!*mt) return; - DBG("%s %p refcount will be %d\n", __FUNCTION__, *mt, (*mt)->refcount - 1); + DBG("%s %p refcount will be %d\n", __func__, *mt, (*mt)->refcount - 1); if (--(*mt)->refcount <= 0) { GLuint i; - DBG("%s deleting %p\n", __FUNCTION__, *mt); + DBG("%s deleting %p\n", __func__, *mt); drm_intel_bo_unreference((*mt)->bo); intel_miptree_release(&(*mt)->stencil_mt); @@ -1018,7 +1018,7 @@ intel_miptree_set_level_info(struct intel_mipmap_tree *mt, mt->level[level].level_x = x; mt->level[level].level_y = y; - DBG("%s level %d, depth %d, offset %d,%d\n", __FUNCTION__, + DBG("%s level %d, depth %d, offset %d,%d\n", __func__, level, d, x, y); assert(mt->level[level].slice == NULL); @@ -1043,7 +1043,7 @@ intel_miptree_set_image_offset(struct intel_mipmap_tree *mt, mt->level[level].slice[img].y_offset = mt->level[level].level_y + y; DBG("%s level %d img %d pos %d,%d\n", - __FUNCTION__, level, img, + __func__, level, img, mt->level[level].slice[img].x_offset, mt->level[level].slice[img].y_offset); } @@ -2093,7 +2093,7 @@ intel_miptree_map_gtt(struct brw_context *brw, } DBG("%s: %d,%d %dx%d from mt %p (%s) " - "%"PRIiPTR",%"PRIiPTR" = %p/%d\n", __FUNCTION__, + "%"PRIiPTR",%"PRIiPTR" = %p/%d\n", __func__, map->x, map->y, map->w, map->h, mt, _mesa_get_format_name(mt->format), x, y, map->ptr, map->stride); @@ -2146,7 +2146,7 @@ intel_miptree_map_blit(struct brw_context *brw, map->ptr = intel_miptree_map_raw(brw, map->mt); - DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __FUNCTION__, + DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __func__, map->x, map->y, map->w, map->h, mt, _mesa_get_format_name(mt->format), level, slice, map->ptr, map->stride); @@ -2196,7 +2196,7 @@ intel_miptree_map_movntdqa(struct brw_context *brw, assert(map->mode & GL_MAP_READ_BIT); assert(!(map->mode & GL_MAP_WRITE_BIT)); - DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __FUNCTION__, + DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __func__, map->x, map->y, map->w, map->h, mt, _mesa_get_format_name(mt->format), level, slice, map->ptr, map->stride); @@ -2291,11 +2291,11 @@ intel_miptree_map_s8(struct brw_context *brw, intel_miptree_unmap_raw(brw, mt); - DBG("%s: %d,%d %dx%d from mt %p %d,%d = %p/%d\n", __FUNCTION__, + DBG("%s: %d,%d %dx%d from mt %p %d,%d = %p/%d\n", __func__, map->x, map->y, map->w, map->h, mt, map->x + image_x, map->y + image_y, map->ptr, map->stride); } else { - DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __FUNCTION__, + DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __func__, map->x, map->y, map->w, map->h, mt, map->ptr, map->stride); } @@ -2453,13 +2453,13 @@ intel_miptree_map_depthstencil(struct brw_context *brw, intel_miptree_unmap_raw(brw, z_mt); DBG("%s: %d,%d %dx%d from z mt %p %d,%d, s mt %p %d,%d = %p/%d\n", - __FUNCTION__, + __func__, map->x, map->y, map->w, map->h, z_mt, map->x + z_image_x, map->y + z_image_y, s_mt, map->x + s_image_x, map->y + s_image_y, map->ptr, map->stride); } else { - DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __FUNCTION__, + DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __func__, map->x, map->y, map->w, map->h, mt, map->ptr, map->stride); } @@ -2513,7 +2513,7 @@ intel_miptree_unmap_depthstencil(struct brw_context *brw, intel_miptree_unmap_raw(brw, z_mt); DBG("%s: %d,%d %dx%d from z mt %p (%s) %d,%d, s mt %p %d,%d = %p/%d\n", - __FUNCTION__, + __func__, map->x, map->y, map->w, map->h, z_mt, _mesa_get_format_name(z_mt->format), map->x + z_image_x, map->y + z_image_y, @@ -2692,7 +2692,7 @@ intel_miptree_unmap(struct brw_context *brw, if (!map) return; - DBG("%s: mt %p (%s) level %d slice %d\n", __FUNCTION__, + DBG("%s: mt %p (%s) level %d slice %d\n", __func__, mt, _mesa_get_format_name(mt->format), level, slice); if (mt->format == MESA_FORMAT_S_UINT8) { diff --git a/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c b/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c index 1b3f952..224dc65 100644 --- a/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c +++ b/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c @@ -120,7 +120,7 @@ static GLuint get_bitmap_rect(GLsizei width, GLsizei height, GLuint count = 0; DBG("%s %d,%d %dx%d bitmap %dx%d skip %d src_offset %d mask %d\n", - __FUNCTION__, x,y,w,h,width,height,unpack->SkipPixels, src_offset, mask); + __func__, x,y,w,h,width,height,unpack->SkipPixels, src_offset, mask); if (invert) { first = h-1; diff --git a/src/mesa/drivers/dri/i965/intel_pixel_copy.c b/src/mesa/drivers/dri/i965/intel_pixel_copy.c index d928e1d..ce053ed 100644 --- a/src/mesa/drivers/dri/i965/intel_pixel_copy.c +++ b/src/mesa/drivers/dri/i965/intel_pixel_copy.c @@ -181,7 +181,7 @@ do_blit_copypixels(struct gl_context * ctx, width, height, (ctx->Color.ColorLogicOpEnabled ? ctx->Color.LogicOp : GL_COPY))) { - DBG("%s: blit failure\n", __FUNCTION__); + DBG("%s: blit failure\n", __func__); return false; } @@ -190,7 +190,7 @@ do_blit_copypixels(struct gl_context * ctx, out: - DBG("%s: success\n", __FUNCTION__); + DBG("%s: success\n", __func__); return true; } @@ -201,7 +201,7 @@ intelCopyPixels(struct gl_context * ctx, GLsizei width, GLsizei height, GLint destx, GLint desty, GLenum type) { - DBG("%s\n", __FUNCTION__); + DBG("%s\n", __func__); if (!_mesa_check_conditional_render(ctx)) return; diff --git a/src/mesa/drivers/dri/i965/intel_pixel_draw.c b/src/mesa/drivers/dri/i965/intel_pixel_draw.c index 055ab42..4ecefc8 100644 --- a/src/mesa/drivers/dri/i965/intel_pixel_draw.c +++ b/src/mesa/drivers/dri/i965/intel_pixel_draw.c @@ -61,13 +61,13 @@ do_blit_drawpixels(struct gl_context * ctx, GLuint src_offset; drm_intel_bo *src_buffer; - DBG("%s\n", __FUNCTION__); + DBG("%s\n", __func__); if (!intel_check_blit_fragment_ops(ctx, false)) return false; if (ctx->DrawBuffer->_NumColorDrawBuffers != 1) { - DBG("%s: fallback due to MRT\n", __FUNCTION__); + DBG("%s: fallback due to MRT\n", __func__); return false; } @@ -78,13 +78,13 @@ do_blit_drawpixels(struct gl_context * ctx, if (!_mesa_format_matches_format_and_type(irb->mt->format, format, type, false)) { - DBG("%s: bad format for blit\n", __FUNCTION__); + DBG("%s: bad format for blit\n", __func__); return false; } if (unpack->SwapBytes || unpack->LsbFirst || unpack->SkipPixels || unpack->SkipRows) { - DBG("%s: bad packing params\n", __FUNCTION__); + DBG("%s: bad packing params\n", __func__); return false; } @@ -122,7 +122,7 @@ do_blit_drawpixels(struct gl_context * ctx, irb->mt, irb->mt_level, irb->mt_layer, x, y, _mesa_is_winsys_fbo(ctx->DrawBuffer), width, height, GL_COPY)) { - DBG("%s: blit failed\n", __FUNCTION__); + DBG("%s: blit failed\n", __func__); intel_miptree_release(&pbo_mt); return false; } @@ -132,7 +132,7 @@ do_blit_drawpixels(struct gl_context * ctx, if (ctx->Query.CurrentOcclusionObject) ctx->Query.CurrentOcclusionObject->Result += width * height; - DBG("%s: success\n", __FUNCTION__); + DBG("%s: success\n", __func__); return true; } @@ -162,7 +162,7 @@ intelDrawPixels(struct gl_context * ctx, return; } - perf_debug("%s: fallback to generic code in PBO case\n", __FUNCTION__); + perf_debug("%s: fallback to generic code in PBO case\n", __func__); } _mesa_meta_DrawPixels(ctx, x, y, width, height, format, type, diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c index 0972121..d3ca38b 100644 --- a/src/mesa/drivers/dri/i965/intel_pixel_read.c +++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c @@ -164,7 +164,7 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx, error = brw_bo_map(brw, bo, false /* write enable */, "miptree"); if (error) { - DBG("%s: failed to map bo\n", __FUNCTION__); + DBG("%s: failed to map bo\n", __func__); return false; } @@ -191,7 +191,7 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx, DBG("%s: x,y=(%d,%d) (w,h)=(%d,%d) format=0x%x type=0x%x " "mesa_format=0x%x tiling=%d " "pack=(alignment=%d row_length=%d skip_pixels=%d skip_rows=%d)\n", - __FUNCTION__, xoffset, yoffset, width, height, + __func__, xoffset, yoffset, width, height, format, type, rb->Format, irb->mt->tiling, pack->Alignment, pack->RowLength, pack->SkipPixels, pack->SkipRows); @@ -222,14 +222,14 @@ intelReadPixels(struct gl_context * ctx, struct brw_context *brw = brw_context(ctx); bool dirty; - DBG("%s\n", __FUNCTION__); + DBG("%s\n", __func__); if (_mesa_is_bufferobj(pack->BufferObj)) { if (_mesa_meta_pbo_GetTexSubImage(ctx, 2, NULL, x, y, 0, width, height, 1, format, type, pixels, pack)) return; - perf_debug("%s: fallback to CPU mapping in PBO case\n", __FUNCTION__); + perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__); } ok = intel_readpixels_tiled_memcpy(ctx, x, y, width, height, diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index cb9710f..5a9207a 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -706,7 +706,7 @@ intel_create_image_from_fds(__DRIscreen *screen, if (f->nplanes == 1) { image->offset = image->offsets[0]; - intel_image_warn_if_unaligned(image, __FUNCTION__); + intel_image_warn_if_unaligned(image, __func__); } return image; @@ -797,7 +797,7 @@ intel_from_planar(__DRIimage *parent, int plane, void *loaderPrivate) image->pitch = stride; image->offset = offset; - intel_image_warn_if_unaligned(image, __FUNCTION__); + intel_image_warn_if_unaligned(image, __func__); return image; } diff --git a/src/mesa/drivers/dri/i965/intel_tex.c b/src/mesa/drivers/dri/i965/intel_tex.c index 3335fd1..777a682 100644 --- a/src/mesa/drivers/dri/i965/intel_tex.c +++ b/src/mesa/drivers/dri/i965/intel_tex.c @@ -15,7 +15,7 @@ static struct gl_texture_image * intelNewTextureImage(struct gl_context * ctx) { - DBG("%s\n", __FUNCTION__); + DBG("%s\n", __func__); (void) ctx; return (struct gl_texture_image *) CALLOC_STRUCT(intel_texture_image); } @@ -35,7 +35,7 @@ intelNewTextureObject(struct gl_context * ctx, GLuint name, GLenum target) (void) ctx; - DBG("%s\n", __FUNCTION__); + DBG("%s\n", __func__); if (obj == NULL) return NULL; @@ -88,7 +88,7 @@ intel_alloc_texture_image_buffer(struct gl_context *ctx, intel_miptree_match_image(intel_texobj->mt, image)) { intel_miptree_reference(&intel_image->mt, intel_texobj->mt); DBG("%s: alloc obj %p level %d %dx%dx%d using object's miptree %p\n", - __FUNCTION__, texobj, image->Level, + __func__, texobj, image->Level, image->Width, image->Height, image->Depth, intel_texobj->mt); } else { intel_image->mt = intel_miptree_create_for_teximage(brw, intel_texobj, @@ -103,7 +103,7 @@ intel_alloc_texture_image_buffer(struct gl_context *ctx, intel_miptree_reference(&intel_texobj->mt, intel_image->mt); DBG("%s: alloc obj %p level %d %dx%dx%d using new miptree %p\n", - __FUNCTION__, texobj, image->Level, + __func__, texobj, image->Level, image->Width, image->Height, image->Depth, intel_image->mt); } @@ -185,7 +185,7 @@ intel_free_texture_image_buffer(struct gl_context * ctx, { struct intel_texture_image *intelImage = intel_texture_image(texImage); - DBG("%s\n", __FUNCTION__); + DBG("%s\n", __func__); intel_miptree_release(&intelImage->mt); diff --git a/src/mesa/drivers/dri/i965/intel_tex_copy.c b/src/mesa/drivers/dri/i965/intel_tex_copy.c index fc31031..4d8c82e 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_copy.c +++ b/src/mesa/drivers/dri/i965/intel_tex_copy.c @@ -71,7 +71,7 @@ intel_copy_texsubimage(struct brw_context *brw, if (!intelImage->mt || !irb || !irb->mt) { if (unlikely(INTEL_DEBUG & DEBUG_PERF)) fprintf(stderr, "%s fail %p %p (0x%08x)\n", - __FUNCTION__, intelImage->mt, irb, internalFormat); + __func__, intelImage->mt, irb, internalFormat); return false; } @@ -121,7 +121,7 @@ intelCopyTexSubImage(struct gl_context *ctx, GLuint dims, } /* Finally, fall back to meta. This will likely be slow. */ - perf_debug("%s - fallback to swrast\n", __FUNCTION__); + perf_debug("%s - fallback to swrast\n", __func__); _mesa_meta_CopyTexSubImage(ctx, dims, texImage, xoffset, yoffset, slice, rb, x, y, width, height); diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c index b70f8de..7952ee5 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_image.c +++ b/src/mesa/drivers/dri/i965/intel_tex_image.c @@ -45,7 +45,7 @@ intel_miptree_create_for_teximage(struct brw_context *brw, intel_miptree_get_dimensions_for_image(&intelImage->base.Base, &width, &height, &depth); - DBG("%s\n", __FUNCTION__); + DBG("%s\n", __func__); /* Figure out image dimensions at start level. */ for (i = intelImage->base.Base.Level; i > 0; i--) { @@ -98,7 +98,7 @@ intelTexImage(struct gl_context * ctx, bool tex_busy = intelImage->mt && drm_intel_bo_busy(intelImage->mt->bo); DBG("%s mesa_format %s target %s format %s type %s level %d %dx%dx%d\n", - __FUNCTION__, _mesa_get_format_name(texImage->TexFormat), + __func__, _mesa_get_format_name(texImage->TexFormat), _mesa_lookup_enum_by_nr(texImage->TexObject->Target), _mesa_lookup_enum_by_nr(format), _mesa_lookup_enum_by_nr(type), texImage->Level, texImage->Width, texImage->Height, texImage->Depth); @@ -131,7 +131,7 @@ intelTexImage(struct gl_context * ctx, return; DBG("%s: upload image %dx%dx%d pixels %p\n", - __FUNCTION__, texImage->Width, texImage->Height, texImage->Depth, + __func__, texImage->Width, texImage->Height, texImage->Depth, pixels); _mesa_store_teximage(ctx, dims, texImage, @@ -438,7 +438,7 @@ intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx, error = brw_bo_map(brw, bo, false /* write enable */, "miptree"); if (error) { - DBG("%s: failed to map bo\n", __FUNCTION__); + DBG("%s: failed to map bo\n", __func__); return false; } @@ -447,7 +447,7 @@ intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx, DBG("%s: level=%d x,y=(%d,%d) (w,h)=(%d,%d) format=0x%x type=0x%x " "mesa_format=0x%x tiling=%d " "packing=(alignment=%d row_length=%d skip_pixels=%d skip_rows=%d)\n", - __FUNCTION__, texImage->Level, xoffset, yoffset, width, height, + __func__, texImage->Level, xoffset, yoffset, width, height, format, type, texImage->TexFormat, image->mt->tiling, packing->Alignment, packing->RowLength, packing->SkipPixels, packing->SkipRows); @@ -480,7 +480,7 @@ intel_get_tex_image(struct gl_context *ctx, struct brw_context *brw = brw_context(ctx); bool ok; - DBG("%s\n", __FUNCTION__); + DBG("%s\n", __func__); if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) { if (_mesa_meta_pbo_GetTexSubImage(ctx, 3, texImage, 0, 0, 0, @@ -489,7 +489,7 @@ intel_get_tex_image(struct gl_context *ctx, pixels, &ctx->Pack)) return; - perf_debug("%s: fallback to CPU mapping in PBO case\n", __FUNCTION__); + perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__); } ok = intel_gettexsubimage_tiled_memcpy(ctx, texImage, 0, 0, @@ -501,7 +501,7 @@ intel_get_tex_image(struct gl_context *ctx, _mesa_meta_GetTexImage(ctx, format, type, pixels, texImage); - DBG("%s - DONE\n", __FUNCTION__); + DBG("%s - DONE\n", __func__); } void diff --git a/src/mesa/drivers/dri/i965/intel_tex_subimage.c b/src/mesa/drivers/dri/i965/intel_tex_subimage.c index 909ff25..7507f76 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_subimage.c +++ b/src/mesa/drivers/dri/i965/intel_tex_subimage.c @@ -150,7 +150,7 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx, error = brw_bo_map(brw, bo, true /* write enable */, "miptree"); if (error || bo->virtual == NULL) { - DBG("%s: failed to map bo\n", __FUNCTION__); + DBG("%s: failed to map bo\n", __func__); return false; } @@ -163,7 +163,7 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx, "mesa_format=0x%x tiling=%d " "packing=(alignment=%d row_length=%d skip_pixels=%d skip_rows=%d) " "for_glTexImage=%d\n", - __FUNCTION__, texImage->Level, xoffset, yoffset, width, height, + __func__, texImage->Level, xoffset, yoffset, width, height, format, type, texImage->TexFormat, image->mt->tiling, packing->Alignment, packing->RowLength, packing->SkipPixels, packing->SkipRows, for_glTexImage); @@ -205,7 +205,7 @@ intelTexSubImage(struct gl_context * ctx, bool tex_busy = intelImage->mt && drm_intel_bo_busy(intelImage->mt->bo); DBG("%s mesa_format %s target %s format %s type %s level %d %dx%dx%d\n", - __FUNCTION__, _mesa_get_format_name(texImage->TexFormat), + __func__, _mesa_get_format_name(texImage->TexFormat), _mesa_lookup_enum_by_nr(texImage->TexObject->Target), _mesa_lookup_enum_by_nr(format), _mesa_lookup_enum_by_nr(type), texImage->Level, texImage->Width, texImage->Height, texImage->Depth); diff --git a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp index 0c27162..17bece5 100644 --- a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp +++ b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp @@ -29,7 +29,7 @@ using namespace brw; int ret = 0; -#define register_coalesce(v) _register_coalesce(v, __FUNCTION__) +#define register_coalesce(v) _register_coalesce(v, __func__) class register_coalesce_test : public ::testing::Test { virtual void SetUp(); From mattst88 at kemper.freedesktop.org Tue Apr 14 19:26:47 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Tue, 14 Apr 2015 12:26:47 -0700 (PDT) Subject: Mesa (master): vbo: replace __FUNCTION__ with __func__ Message-ID: <20150414192647.ED63F761EC@kemper.freedesktop.org> Module: Mesa Branch: master Commit: e1231159bcd0b0fc9aaebeda0745ee38d13a7282 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e1231159bcd0b0fc9aaebeda0745ee38d13a7282 Author: Marius Predut Date: Fri Apr 3 15:02:33 2015 +0300 vbo: replace __FUNCTION__ with __func__ Consistently just use C99's __func__ everywhere. The patch was verified with Microsoft Visual studio 2013 redistributable package(RTM version number: 18.0.21005.1) Next MSVC versions intends to support __func__. No functional changes. Acked-by: Matt Turner Reviewed-by: Anuj Phogat Signed-off-by: Marius Predut --- src/mesa/vbo/vbo_exec_api.c | 2 +- src/mesa/vbo/vbo_exec_draw.c | 4 ++-- src/mesa/vbo/vbo_rebase.c | 2 +- src/mesa/vbo/vbo_save_api.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c index 02741c2..859078f 100644 --- a/src/mesa/vbo/vbo_exec_api.c +++ b/src/mesa/vbo/vbo_exec_api.c @@ -439,7 +439,7 @@ do { \ } while (0) -#define ERROR(err) _mesa_error( ctx, err, __FUNCTION__ ) +#define ERROR(err) _mesa_error( ctx, err, __func__ ) #define TAG(x) vbo_##x #include "vbo_attrib_tmp.h" diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c index 91f2ca4..37b53a8 100644 --- a/src/mesa/vbo/vbo_exec_draw.c +++ b/src/mesa/vbo/vbo_exec_draw.c @@ -45,7 +45,7 @@ vbo_exec_debug_verts( struct vbo_exec_context *exec ) GLuint i; printf("%s: %u vertices %d primitives, %d vertsize\n", - __FUNCTION__, + __func__, count, exec->vtx.prim_count, exec->vtx.vertex_size); @@ -402,7 +402,7 @@ vbo_exec_vtx_flush(struct vbo_exec_context *exec, GLboolean keepUnmapped) } if (0) - printf("%s %d %d\n", __FUNCTION__, exec->vtx.prim_count, + printf("%s %d %d\n", __func__, exec->vtx.prim_count, exec->vtx.vert_count); vbo_context(ctx)->draw_prims( ctx, diff --git a/src/mesa/vbo/vbo_rebase.c b/src/mesa/vbo/vbo_rebase.c index b06df4a..c3c4b64 100644 --- a/src/mesa/vbo/vbo_rebase.c +++ b/src/mesa/vbo/vbo_rebase.c @@ -142,7 +142,7 @@ void vbo_rebase_prims( struct gl_context *ctx, assert(min_index != 0); if (0) - printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); + printf("%s %d..%d\n", __func__, min_index, max_index); /* XXX this path is disabled for now. diff --git a/src/mesa/vbo/vbo_save_api.c b/src/mesa/vbo/vbo_save_api.c index fd9a5de..5927bee 100644 --- a/src/mesa/vbo/vbo_save_api.c +++ b/src/mesa/vbo/vbo_save_api.c @@ -763,7 +763,7 @@ _save_reset_vertex(struct gl_context *ctx) -#define ERROR(err) _mesa_compile_error(ctx, err, __FUNCTION__); +#define ERROR(err) _mesa_compile_error(ctx, err, __func__); /* Only one size for each attribute may be active at once. Eg. if From mattst88 at kemper.freedesktop.org Tue Apr 14 19:26:48 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Tue, 14 Apr 2015 12:26:48 -0700 (PDT) Subject: Mesa (master): glx: replace __FUNCTION__ with __func__ Message-ID: <20150414192648.2C8BB761EB@kemper.freedesktop.org> Module: Mesa Branch: master Commit: fc57222f6097da737d578f081075a027377ba399 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=fc57222f6097da737d578f081075a027377ba399 Author: Marius Predut Date: Tue Apr 7 22:06:01 2015 +0300 glx: replace __FUNCTION__ with __func__ Consistently just use C99's __func__ everywhere. No functional changes. Acked-by: Matt Turner Signed-off-by: Marius Predut --- src/glx/apple/apple_glx_log.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/glx/apple/apple_glx_log.h b/src/glx/apple/apple_glx_log.h index 4b1c531..b1a5538 100644 --- a/src/glx/apple/apple_glx_log.h +++ b/src/glx/apple/apple_glx_log.h @@ -39,14 +39,14 @@ __printflike(5, 6) void _apple_glx_log(int level, const char *file, const char *function, int line, const char *fmt, ...); #define apple_glx_log(l, f, args ...) \ - _apple_glx_log(l, __FILE__, __FUNCTION__, __LINE__, f, ## args) + _apple_glx_log(l, __FILE__, __func__, __LINE__, f, ## args) __printflike(5, 0) void _apple_glx_vlog(int level, const char *file, const char *function, int line, const char *fmt, va_list v); #define apple_glx_vlog(l, f, v) \ - _apple_glx_vlog(l, __FILE__, __FUNCTION__, __LINE__, f, v) + _apple_glx_vlog(l, __FILE__, __func__, __LINE__, f, v) /* This is just here to help the transition. * TODO: Replace calls to apple_glx_diagnostic From mattst88 at kemper.freedesktop.org Tue Apr 14 19:26:48 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Tue, 14 Apr 2015 12:26:48 -0700 (PDT) Subject: Mesa (master): main: replace __FUNCTION__ with __func__ Message-ID: <20150414192648.239FE761EB@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 6f4d9418b41f650630e725e0a842de9bb1ad746f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6f4d9418b41f650630e725e0a842de9bb1ad746f Author: Marius Predut Date: Tue Apr 7 22:04:58 2015 +0300 main: replace __FUNCTION__ with __func__ Consistently just use C99's __func__ everywhere. The patch was verified with Microsoft Visual studio 2013 redistributable package(RTM version number: 18.0.21005.1) Next MSVC versions intends to support __func__. No functional changes. Acked-by: Matt Turner Signed-off-by: Marius Predut --- src/mesa/main/atifragshader.c | 4 ++-- src/mesa/main/ffvertex_prog.c | 6 +++--- src/mesa/main/format_unpack.py | 4 ++-- src/mesa/main/glformats.c | 2 +- src/mesa/main/mtypes.h | 2 +- src/mesa/main/state.c | 2 +- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/mesa/main/atifragshader.c b/src/mesa/main/atifragshader.c index 9d967b9..9fc3552 100644 --- a/src/mesa/main/atifragshader.c +++ b/src/mesa/main/atifragshader.c @@ -476,7 +476,7 @@ _mesa_PassTexCoordATI(GLuint dst, GLuint coord, GLenum swizzle) curI->swizzle = swizzle; #if MESA_DEBUG_ATI_FS - _mesa_debug(ctx, "%s(%s, %s, %s)\n", __FUNCTION__, + _mesa_debug(ctx, "%s(%s, %s, %s)\n", __func__, _mesa_lookup_enum_by_nr(dst), _mesa_lookup_enum_by_nr(coord), _mesa_lookup_enum_by_nr(swizzle)); #endif @@ -549,7 +549,7 @@ _mesa_SampleMapATI(GLuint dst, GLuint interp, GLenum swizzle) curI->swizzle = swizzle; #if MESA_DEBUG_ATI_FS - _mesa_debug(ctx, "%s(%s, %s, %s)\n", __FUNCTION__, + _mesa_debug(ctx, "%s(%s, %s, %s)\n", __func__, _mesa_lookup_enum_by_nr(dst), _mesa_lookup_enum_by_nr(interp), _mesa_lookup_enum_by_nr(swizzle)); #endif diff --git a/src/mesa/main/ffvertex_prog.c b/src/mesa/main/ffvertex_prog.c index 395b00e..edf7e33 100644 --- a/src/mesa/main/ffvertex_prog.c +++ b/src/mesa/main/ffvertex_prog.c @@ -619,13 +619,13 @@ static void emit_op3fn(struct tnl_program *p, #define emit_op3(p, op, dst, mask, src0, src1, src2) \ - emit_op3fn(p, op, dst, mask, src0, src1, src2, __FUNCTION__, __LINE__) + emit_op3fn(p, op, dst, mask, src0, src1, src2, __func__, __LINE__) #define emit_op2(p, op, dst, mask, src0, src1) \ - emit_op3fn(p, op, dst, mask, src0, src1, undef, __FUNCTION__, __LINE__) + emit_op3fn(p, op, dst, mask, src0, src1, undef, __func__, __LINE__) #define emit_op1(p, op, dst, mask, src0) \ - emit_op3fn(p, op, dst, mask, src0, undef, undef, __FUNCTION__, __LINE__) + emit_op3fn(p, op, dst, mask, src0, undef, undef, __func__, __LINE__) static struct ureg make_temp( struct tnl_program *p, struct ureg reg ) diff --git a/src/mesa/main/format_unpack.py b/src/mesa/main/format_unpack.py index 5928c20..0ae73b8 100644 --- a/src/mesa/main/format_unpack.py +++ b/src/mesa/main/format_unpack.py @@ -332,7 +332,7 @@ _mesa_unpack_rgba_row(mesa_format format, GLuint n, unpack_float_ycbcr_rev(src, dst, n); break; default: - _mesa_problem(NULL, "%s: bad format %s", __FUNCTION__, + _mesa_problem(NULL, "%s: bad format %s", __func__, _mesa_get_format_name(format)); return; } @@ -401,7 +401,7 @@ _mesa_unpack_uint_rgba_row(mesa_format format, GLuint n, break; %endfor default: - _mesa_problem(NULL, "%s: bad format %s", __FUNCTION__, + _mesa_problem(NULL, "%s: bad format %s", __func__, _mesa_get_format_name(format)); return; } diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c index 4e05229..8ced579 100644 --- a/src/mesa/main/glformats.c +++ b/src/mesa/main/glformats.c @@ -1393,7 +1393,7 @@ _mesa_base_format_has_channel(GLenum base_format, GLenum pname) return GL_FALSE; default: _mesa_warning(NULL, "%s: Unexpected channel token 0x%x\n", - __FUNCTION__, pname); + __func__, pname); return GL_FALSE; } diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 5d726b4..56d3b7e 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -4365,7 +4365,7 @@ struct gl_context #ifdef DEBUG extern int MESA_VERBOSE; extern int MESA_DEBUG_FLAGS; -# define MESA_FUNCTION __FUNCTION__ +# define MESA_FUNCTION __func__ #else # define MESA_VERBOSE 0 # define MESA_DEBUG_FLAGS 0 diff --git a/src/mesa/main/state.c b/src/mesa/main/state.c index 6dc14b2..cc84c61 100644 --- a/src/mesa/main/state.c +++ b/src/mesa/main/state.c @@ -482,7 +482,7 @@ _mesa_set_varying_vp_inputs( struct gl_context *ctx, ctx->FragmentProgram._TexEnvProgram) { ctx->NewState |= _NEW_VARYING_VP_INPUTS; } - /*printf("%s %x\n", __FUNCTION__, varying_inputs);*/ + /*printf("%s %x\n", __func__, varying_inputs);*/ } } From mattst88 at kemper.freedesktop.org Tue Apr 14 19:26:48 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Tue, 14 Apr 2015 12:26:48 -0700 (PDT) Subject: Mesa (master): i915: replace __FUNCTION__ with __func__ Message-ID: <20150414192648.3E119761EB@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 139e6c7c4a9c59be5f4b3f431ac393cc097326ac URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=139e6c7c4a9c59be5f4b3f431ac393cc097326ac Author: Marius Predut Date: Tue Apr 7 22:05:44 2015 +0300 i915: replace __FUNCTION__ with __func__ Consistently just use C99's __func__ everywhere. No functional changes. Acked-by: Matt Turner Signed-off-by: Marius Predut --- src/mesa/drivers/dri/i915/i830_state.c | 44 ++++++++++++------------ src/mesa/drivers/dri/i915/i830_texblend.c | 4 +-- src/mesa/drivers/dri/i915/i830_texstate.c | 2 +- src/mesa/drivers/dri/i915/i915_program.c | 8 ++--- src/mesa/drivers/dri/i915/i915_state.c | 26 +++++++------- src/mesa/drivers/dri/i915/i915_tex_layout.c | 4 +-- src/mesa/drivers/dri/i915/i915_texstate.c | 2 +- src/mesa/drivers/dri/i915/i915_vtbl.c | 2 +- src/mesa/drivers/dri/i915/intel_blit.c | 10 +++--- src/mesa/drivers/dri/i915/intel_clear.c | 2 +- src/mesa/drivers/dri/i915/intel_context.c | 2 +- src/mesa/drivers/dri/i915/intel_fbo.c | 8 ++--- src/mesa/drivers/dri/i915/intel_mipmap_tree.c | 18 +++++----- src/mesa/drivers/dri/i915/intel_pixel_bitmap.c | 2 +- src/mesa/drivers/dri/i915/intel_pixel_copy.c | 6 ++-- src/mesa/drivers/dri/i915/intel_pixel_read.c | 12 +++---- src/mesa/drivers/dri/i915/intel_regions.c | 8 ++--- src/mesa/drivers/dri/i915/intel_render.c | 2 +- src/mesa/drivers/dri/i915/intel_state.c | 6 ++-- src/mesa/drivers/dri/i915/intel_tex.c | 10 +++--- src/mesa/drivers/dri/i915/intel_tex_copy.c | 4 +-- src/mesa/drivers/dri/i915/intel_tex_image.c | 18 +++++----- src/mesa/drivers/dri/i915/intel_tex_subimage.c | 2 +- src/mesa/drivers/dri/i915/intel_tris.c | 12 +++---- 24 files changed, 107 insertions(+), 107 deletions(-) diff --git a/src/mesa/drivers/dri/i915/i830_state.c b/src/mesa/drivers/dri/i915/i830_state.c index 3e379f3..13adf56 100644 --- a/src/mesa/drivers/dri/i915/i830_state.c +++ b/src/mesa/drivers/dri/i915/i830_state.c @@ -56,7 +56,7 @@ i830StencilFuncSeparate(struct gl_context * ctx, GLenum face, GLenum func, GLint mask = mask & 0xff; - DBG("%s : func: %s, ref : 0x%x, mask: 0x%x\n", __FUNCTION__, + DBG("%s : func: %s, ref : 0x%x, mask: 0x%x\n", __func__, _mesa_lookup_enum_by_nr(func), ref, mask); @@ -77,7 +77,7 @@ i830StencilMaskSeparate(struct gl_context * ctx, GLenum face, GLuint mask) { struct i830_context *i830 = i830_context(ctx); - DBG("%s : mask 0x%x\n", __FUNCTION__, mask); + DBG("%s : mask 0x%x\n", __func__, mask); mask = mask & 0xff; @@ -94,7 +94,7 @@ i830StencilOpSeparate(struct gl_context * ctx, GLenum face, GLenum fail, GLenum struct i830_context *i830 = i830_context(ctx); int fop, dfop, dpop; - DBG("%s: fail : %s, zfail: %s, zpass : %s\n", __FUNCTION__, + DBG("%s: fail : %s, zfail: %s, zpass : %s\n", __func__, _mesa_lookup_enum_by_nr(fail), _mesa_lookup_enum_by_nr(zfail), _mesa_lookup_enum_by_nr(zpass)); @@ -261,7 +261,7 @@ i830BlendColor(struct gl_context * ctx, const GLfloat color[4]) struct i830_context *i830 = i830_context(ctx); GLubyte r, g, b, a; - DBG("%s\n", __FUNCTION__); + DBG("%s\n", __func__); UNCLAMPED_FLOAT_TO_UBYTE(r, color[RCOMP]); UNCLAMPED_FLOAT_TO_UBYTE(g, color[GCOMP]); @@ -315,7 +315,7 @@ i830_set_blend_state(struct gl_context * ctx) break; default: fprintf(stderr, "[%s:%u] Invalid RGB blend equation (0x%04x).\n", - __FUNCTION__, __LINE__, ctx->Color.Blend[0].EquationRGB); + __func__, __LINE__, ctx->Color.Blend[0].EquationRGB); return; } @@ -343,7 +343,7 @@ i830_set_blend_state(struct gl_context * ctx) break; default: fprintf(stderr, "[%s:%u] Invalid alpha blend equation (0x%04x).\n", - __FUNCTION__, __LINE__, ctx->Color.Blend[0].EquationA); + __func__, __LINE__, ctx->Color.Blend[0].EquationA); return; } @@ -378,7 +378,7 @@ i830_set_blend_state(struct gl_context * ctx) if (0) { fprintf(stderr, "[%s:%u] STATE1: 0x%08x IALPHAB: 0x%08x blend is %sabled\n", - __FUNCTION__, __LINE__, i830->state.Ctx[I830_CTXREG_STATE1], + __func__, __LINE__, i830->state.Ctx[I830_CTXREG_STATE1], i830->state.Ctx[I830_CTXREG_IALPHAB], (ctx->Color.BlendEnabled) ? "en" : "dis"); } @@ -388,7 +388,7 @@ i830_set_blend_state(struct gl_context * ctx) static void i830BlendEquationSeparate(struct gl_context * ctx, GLenum modeRGB, GLenum modeA) { - DBG("%s -> %s, %s\n", __FUNCTION__, + DBG("%s -> %s, %s\n", __func__, _mesa_lookup_enum_by_nr(modeRGB), _mesa_lookup_enum_by_nr(modeA)); @@ -402,7 +402,7 @@ static void i830BlendFuncSeparate(struct gl_context * ctx, GLenum sfactorRGB, GLenum dfactorRGB, GLenum sfactorA, GLenum dfactorA) { - DBG("%s -> RGB(%s, %s) A(%s, %s)\n", __FUNCTION__, + DBG("%s -> RGB(%s, %s) A(%s, %s)\n", __func__, _mesa_lookup_enum_by_nr(sfactorRGB), _mesa_lookup_enum_by_nr(dfactorRGB), _mesa_lookup_enum_by_nr(sfactorA), @@ -423,7 +423,7 @@ i830DepthFunc(struct gl_context * ctx, GLenum func) struct i830_context *i830 = i830_context(ctx); int test = intel_translate_compare_func(func); - DBG("%s\n", __FUNCTION__); + DBG("%s\n", __func__); I830_STATECHANGE(i830, I830_UPLOAD_CTX); i830->state.Ctx[I830_CTXREG_STATE3] &= ~DEPTH_TEST_FUNC_MASK; @@ -436,7 +436,7 @@ i830DepthMask(struct gl_context * ctx, GLboolean flag) { struct i830_context *i830 = i830_context(ctx); - DBG("%s flag (%d)\n", __FUNCTION__, flag); + DBG("%s flag (%d)\n", __func__, flag); if (!ctx->DrawBuffer || !ctx->DrawBuffer->Visual.depthBits) flag = false; @@ -535,7 +535,7 @@ i830Scissor(struct gl_context * ctx) if (!ctx->DrawBuffer) return; - DBG("%s %d,%d %dx%d\n", __FUNCTION__, + DBG("%s %d,%d %dx%d\n", __func__, ctx->Scissor.ScissorArray[0].X, ctx->Scissor.ScissorArray[0].Y, ctx->Scissor.ScissorArray[0].Width, ctx->Scissor.ScissorArray[0].Height); @@ -546,7 +546,7 @@ i830Scissor(struct gl_context * ctx) x2 = ctx->Scissor.ScissorArray[0].X + ctx->Scissor.ScissorArray[0].Width - 1; y2 = y1 + ctx->Scissor.ScissorArray[0].Height - 1; - DBG("%s %d..%d,%d..%d (inverted)\n", __FUNCTION__, x1, x2, y1, y2); + DBG("%s %d..%d,%d..%d (inverted)\n", __func__, x1, x2, y1, y2); } else { /* FBO - not inverted @@ -557,7 +557,7 @@ i830Scissor(struct gl_context * ctx) + ctx->Scissor.ScissorArray[0].Width - 1; y2 = ctx->Scissor.ScissorArray[0].Y + ctx->Scissor.ScissorArray[0].Height - 1; - DBG("%s %d..%d,%d..%d (not inverted)\n", __FUNCTION__, x1, x2, y1, y2); + DBG("%s %d..%d,%d..%d (not inverted)\n", __func__, x1, x2, y1, y2); } x1 = CLAMP(x1, 0, ctx->DrawBuffer->Width - 1); @@ -565,7 +565,7 @@ i830Scissor(struct gl_context * ctx) x2 = CLAMP(x2, 0, ctx->DrawBuffer->Width - 1); y2 = CLAMP(y2, 0, ctx->DrawBuffer->Height - 1); - DBG("%s %d..%d,%d..%d (clamped)\n", __FUNCTION__, x1, x2, y1, y2); + DBG("%s %d..%d,%d..%d (clamped)\n", __func__, x1, x2, y1, y2); I830_STATECHANGE(i830, I830_UPLOAD_BUFFERS); i830->state.Buffer[I830_DESTREG_SR1] = (y1 << 16) | (x1 & 0xffff); @@ -578,7 +578,7 @@ i830LogicOp(struct gl_context * ctx, GLenum opcode) struct i830_context *i830 = i830_context(ctx); int tmp = intel_translate_logic_op(opcode); - DBG("%s\n", __FUNCTION__); + DBG("%s\n", __func__); I830_STATECHANGE(i830, I830_UPLOAD_CTX); i830->state.Ctx[I830_CTXREG_STATE4] &= ~LOGICOP_MASK; @@ -593,7 +593,7 @@ i830CullFaceFrontFace(struct gl_context * ctx, GLenum unused) struct i830_context *i830 = i830_context(ctx); GLuint mode; - DBG("%s\n", __FUNCTION__); + DBG("%s\n", __func__); if (!ctx->Polygon.CullFlag) { mode = CULLMODE_NONE; @@ -622,7 +622,7 @@ i830LineWidth(struct gl_context * ctx, GLfloat widthf) int width; int state5; - DBG("%s\n", __FUNCTION__); + DBG("%s\n", __func__); width = (int) (widthf * 2); width = CLAMP(width, 1, 15); @@ -642,7 +642,7 @@ i830PointSize(struct gl_context * ctx, GLfloat size) struct i830_context *i830 = i830_context(ctx); GLint point_size = (int) size; - DBG("%s\n", __FUNCTION__); + DBG("%s\n", __func__); point_size = CLAMP(point_size, 1, 256); I830_STATECHANGE(i830, I830_UPLOAD_CTX); @@ -663,7 +663,7 @@ i830ColorMask(struct gl_context * ctx, struct i830_context *i830 = i830_context(ctx); GLuint tmp = 0; - DBG("%s r(%d) g(%d) b(%d) a(%d)\n", __FUNCTION__, r, g, b, a); + DBG("%s r(%d) g(%d) b(%d) a(%d)\n", __func__, r, g, b, a); tmp = ((i830->state.Ctx[I830_CTXREG_ENABLES_2] & ~WRITEMASK_MASK) | ENABLE_COLOR_MASK | @@ -695,7 +695,7 @@ update_specular(struct gl_context * ctx) static void i830LightModelfv(struct gl_context * ctx, GLenum pname, const GLfloat * param) { - DBG("%s\n", __FUNCTION__); + DBG("%s\n", __func__); if (pname == GL_LIGHT_MODEL_COLOR_CONTROL) { update_specular(ctx); @@ -738,7 +738,7 @@ i830Fogfv(struct gl_context * ctx, GLenum pname, const GLfloat * param) { struct i830_context *i830 = i830_context(ctx); - DBG("%s\n", __FUNCTION__); + DBG("%s\n", __func__); if (pname == GL_FOG_COLOR) { GLuint color = (((GLubyte) (ctx->Fog.Color[0] * 255.0F) << 16) | diff --git a/src/mesa/drivers/dri/i915/i830_texblend.c b/src/mesa/drivers/dri/i915/i830_texblend.c index ebfce7f..661e424 100644 --- a/src/mesa/drivers/dri/i915/i830_texblend.c +++ b/src/mesa/drivers/dri/i915/i830_texblend.c @@ -159,7 +159,7 @@ i830SetTexEnvCombine(struct i830_context * i830, }; if (INTEL_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "%s\n", __FUNCTION__); + fprintf(stderr, "%s\n", __func__); /* The EXT version of the DOT3 extension does not support the @@ -395,7 +395,7 @@ emit_texblend(struct i830_context *i830, GLuint unit, GLuint blendUnit, if (0) - fprintf(stderr, "%s unit %d\n", __FUNCTION__, unit); + fprintf(stderr, "%s unit %d\n", __func__, unit); /* Update i830->state.TexBlend */ diff --git a/src/mesa/drivers/dri/i915/i830_texstate.c b/src/mesa/drivers/dri/i915/i830_texstate.c index 00731e6..83cd733 100644 --- a/src/mesa/drivers/dri/i915/i830_texstate.c +++ b/src/mesa/drivers/dri/i915/i830_texstate.c @@ -74,7 +74,7 @@ translate_texture_format(GLuint mesa_format) case MESA_FORMAT_RGBA_DXT5: return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT4_5); default: - fprintf(stderr, "%s: bad image format %s\n", __FUNCTION__, + fprintf(stderr, "%s: bad image format %s\n", __func__, _mesa_get_format_name(mesa_format)); abort(); return 0; diff --git a/src/mesa/drivers/dri/i915/i915_program.c b/src/mesa/drivers/dri/i915/i915_program.c index d1bfcfe..64b0b4d 100644 --- a/src/mesa/drivers/dri/i915/i915_program.c +++ b/src/mesa/drivers/dri/i915/i915_program.c @@ -322,7 +322,7 @@ i915_emit_const1f(struct i915_fragment_program * p, GLfloat c0) } } - fprintf(stderr, "%s: out of constants\n", __FUNCTION__); + fprintf(stderr, "%s: out of constants\n", __func__); p->error = 1; return 0; } @@ -359,7 +359,7 @@ i915_emit_const2f(struct i915_fragment_program * p, GLfloat c0, GLfloat c1) } } - fprintf(stderr, "%s: out of constants\n", __FUNCTION__); + fprintf(stderr, "%s: out of constants\n", __func__); p->error = 1; return 0; } @@ -391,7 +391,7 @@ i915_emit_const4f(struct i915_fragment_program * p, } } - fprintf(stderr, "%s: out of constants\n", __FUNCTION__); + fprintf(stderr, "%s: out of constants\n", __func__); p->error = 1; return 0; } @@ -430,7 +430,7 @@ i915_emit_param4fv(struct i915_fragment_program * p, const GLfloat * values) } } - fprintf(stderr, "%s: out of constants\n", __FUNCTION__); + fprintf(stderr, "%s: out of constants\n", __func__); p->error = 1; return 0; } diff --git a/src/mesa/drivers/dri/i915/i915_state.c b/src/mesa/drivers/dri/i915/i915_state.c index 32e5f98..f12bf8a 100644 --- a/src/mesa/drivers/dri/i915/i915_state.c +++ b/src/mesa/drivers/dri/i915/i915_state.c @@ -241,7 +241,7 @@ i915BlendColor(struct gl_context * ctx, const GLfloat color[4]) GLubyte r, g, b, a; GLuint dw; - DBG("%s\n", __FUNCTION__); + DBG("%s\n", __func__); UNCLAMPED_FLOAT_TO_UBYTE(r, color[RCOMP]); UNCLAMPED_FLOAT_TO_UBYTE(g, color[GCOMP]); @@ -357,7 +357,7 @@ i915DepthFunc(struct gl_context * ctx, GLenum func) int test = intel_translate_compare_func(func); GLuint dw; - DBG("%s\n", __FUNCTION__); + DBG("%s\n", __func__); dw = i915->state.Ctx[I915_CTXREG_LIS6]; dw &= ~S6_DEPTH_TEST_FUNC_MASK; @@ -374,7 +374,7 @@ i915DepthMask(struct gl_context * ctx, GLboolean flag) struct i915_context *i915 = I915_CONTEXT(ctx); GLuint dw; - DBG("%s flag (%d)\n", __FUNCTION__, flag); + DBG("%s flag (%d)\n", __func__, flag); if (!ctx->DrawBuffer || !ctx->DrawBuffer->Visual.depthBits) flag = false; @@ -501,7 +501,7 @@ i915Scissor(struct gl_context * ctx) if (!ctx->DrawBuffer) return; - DBG("%s %d,%d %dx%d\n", __FUNCTION__, + DBG("%s %d,%d %dx%d\n", __func__, ctx->Scissor.ScissorArray[0].X, ctx->Scissor.ScissorArray[0].Y, ctx->Scissor.ScissorArray[0].Width, ctx->Scissor.ScissorArray[0].Height); @@ -512,7 +512,7 @@ i915Scissor(struct gl_context * ctx) x2 = ctx->Scissor.ScissorArray[0].X + ctx->Scissor.ScissorArray[0].Width - 1; y2 = y1 + ctx->Scissor.ScissorArray[0].Height - 1; - DBG("%s %d..%d,%d..%d (inverted)\n", __FUNCTION__, x1, x2, y1, y2); + DBG("%s %d..%d,%d..%d (inverted)\n", __func__, x1, x2, y1, y2); } else { /* FBO - not inverted @@ -523,7 +523,7 @@ i915Scissor(struct gl_context * ctx) + ctx->Scissor.ScissorArray[0].Width - 1; y2 = ctx->Scissor.ScissorArray[0].Y + ctx->Scissor.ScissorArray[0].Height - 1; - DBG("%s %d..%d,%d..%d (not inverted)\n", __FUNCTION__, x1, x2, y1, y2); + DBG("%s %d..%d,%d..%d (not inverted)\n", __func__, x1, x2, y1, y2); } x1 = CLAMP(x1, 0, ctx->DrawBuffer->Width - 1); @@ -531,7 +531,7 @@ i915Scissor(struct gl_context * ctx) x2 = CLAMP(x2, 0, ctx->DrawBuffer->Width - 1); y2 = CLAMP(y2, 0, ctx->DrawBuffer->Height - 1); - DBG("%s %d..%d,%d..%d (clamped)\n", __FUNCTION__, x1, x2, y1, y2); + DBG("%s %d..%d,%d..%d (clamped)\n", __func__, x1, x2, y1, y2); I915_STATECHANGE(i915, I915_UPLOAD_BUFFERS); i915->state.Buffer[I915_DESTREG_SR1] = (y1 << 16) | (x1 & 0xffff); @@ -544,7 +544,7 @@ i915LogicOp(struct gl_context * ctx, GLenum opcode) struct i915_context *i915 = I915_CONTEXT(ctx); int tmp = intel_translate_logic_op(opcode); - DBG("%s\n", __FUNCTION__); + DBG("%s\n", __func__); I915_STATECHANGE(i915, I915_UPLOAD_CTX); i915->state.Ctx[I915_CTXREG_STATE4] &= ~LOGICOP_MASK; @@ -559,7 +559,7 @@ i915CullFaceFrontFace(struct gl_context * ctx, GLenum unused) struct i915_context *i915 = I915_CONTEXT(ctx); GLuint mode, dw; - DBG("%s %d\n", __FUNCTION__, + DBG("%s %d\n", __func__, ctx->DrawBuffer ? ctx->DrawBuffer->Name : 0); if (!ctx->Polygon.CullFlag) { @@ -595,7 +595,7 @@ i915LineWidth(struct gl_context * ctx, GLfloat widthf) int lis4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_LINE_WIDTH_MASK; int width; - DBG("%s\n", __FUNCTION__); + DBG("%s\n", __func__); width = (int) (widthf * 2); width = CLAMP(width, 1, 0xf); @@ -614,7 +614,7 @@ i915PointSize(struct gl_context * ctx, GLfloat size) int lis4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_POINT_WIDTH_MASK; GLint point_size = (int) round(size); - DBG("%s\n", __FUNCTION__); + DBG("%s\n", __func__); point_size = CLAMP(point_size, 1, 255); lis4 |= point_size << S4_POINT_WIDTH_SHIFT; @@ -697,7 +697,7 @@ i915ColorMask(struct gl_context * ctx, struct i915_context *i915 = I915_CONTEXT(ctx); GLuint tmp = i915->state.Ctx[I915_CTXREG_LIS5] & ~S5_WRITEDISABLE_MASK; - DBG("%s r(%d) g(%d) b(%d) a(%d)\n", __FUNCTION__, r, g, b, + DBG("%s r(%d) g(%d) b(%d) a(%d)\n", __func__, r, g, b, a); if (!r) @@ -726,7 +726,7 @@ update_specular(struct gl_context * ctx) static void i915LightModelfv(struct gl_context * ctx, GLenum pname, const GLfloat * param) { - DBG("%s\n", __FUNCTION__); + DBG("%s\n", __func__); if (pname == GL_LIGHT_MODEL_COLOR_CONTROL) { update_specular(ctx); diff --git a/src/mesa/drivers/dri/i915/i915_tex_layout.c b/src/mesa/drivers/dri/i915/i915_tex_layout.c index d416d46..e76ccb0 100644 --- a/src/mesa/drivers/dri/i915/i915_tex_layout.c +++ b/src/mesa/drivers/dri/i915/i915_tex_layout.c @@ -244,7 +244,7 @@ i915_miptree_layout(struct intel_mipmap_tree * mt) break; } - DBG("%s: %dx%dx%d\n", __FUNCTION__, + DBG("%s: %dx%dx%d\n", __func__, mt->total_width, mt->total_height, mt->cpp); } @@ -476,6 +476,6 @@ i945_miptree_layout(struct intel_mipmap_tree * mt) break; } - DBG("%s: %dx%dx%d\n", __FUNCTION__, + DBG("%s: %dx%dx%d\n", __func__, mt->total_width, mt->total_height, mt->cpp); } diff --git a/src/mesa/drivers/dri/i915/i915_texstate.c b/src/mesa/drivers/dri/i915/i915_texstate.c index 000ab6e..aef5ff9 100644 --- a/src/mesa/drivers/dri/i915/i915_texstate.c +++ b/src/mesa/drivers/dri/i915/i915_texstate.c @@ -97,7 +97,7 @@ translate_texture_format(mesa_format mesa_format, GLenum DepthMode) else return (MAPSURF_32BIT | MT_32BIT_x8L24); default: - fprintf(stderr, "%s: bad image format %s\n", __FUNCTION__, + fprintf(stderr, "%s: bad image format %s\n", __func__, _mesa_get_format_name(mesa_format)); abort(); return 0; diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c index 706e0c3..97bf81e 100644 --- a/src/mesa/drivers/dri/i915/i915_vtbl.c +++ b/src/mesa/drivers/dri/i915/i915_vtbl.c @@ -356,7 +356,7 @@ i915_emit_state(struct intel_context *intel) assert(get_dirty(state) == 0); if (INTEL_DEBUG & DEBUG_STATE) - fprintf(stderr, "%s dirty: %x\n", __FUNCTION__, dirty); + fprintf(stderr, "%s dirty: %x\n", __func__, dirty); if (dirty & I915_UPLOAD_INVARIENT) { if (INTEL_DEBUG & DEBUG_STATE) diff --git a/src/mesa/drivers/dri/i915/intel_blit.c b/src/mesa/drivers/dri/i915/intel_blit.c index 9a68625..279db28 100644 --- a/src/mesa/drivers/dri/i915/intel_blit.c +++ b/src/mesa/drivers/dri/i915/intel_blit.c @@ -134,7 +134,7 @@ intel_miptree_blit(struct intel_context *intel, (dst_format != MESA_FORMAT_B8G8R8A8_UNORM && dst_format != MESA_FORMAT_B8G8R8X8_UNORM))) { perf_debug("%s: Can't use hardware blitter from %s to %s, " - "falling back.\n", __FUNCTION__, + "falling back.\n", __func__, _mesa_get_format_name(src_format), _mesa_get_format_name(dst_format)); return false; @@ -266,7 +266,7 @@ intelEmitCopyBlit(struct intel_context *intel, intel_batchbuffer_require_space(intel, 8 * 4); DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", - __FUNCTION__, + __func__, src_buffer, src_pitch, src_offset, src_x, src_y, dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h); @@ -421,7 +421,7 @@ intelClearWithBlit(struct gl_context *ctx, GLbitfield mask) cpp = region->cpp; DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n", - __FUNCTION__, + __func__, region->bo, pitch, x1, y1, x2 - x1, y2 - y1); @@ -542,7 +542,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, return true; DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d, %d bytes %d dwords\n", - __FUNCTION__, + __func__, dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords); intel_batchbuffer_require_space(intel, @@ -658,7 +658,7 @@ intel_miptree_set_alpha_to_one(struct intel_context *intel, cpp = region->cpp; DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n", - __FUNCTION__, region->bo, pitch, x, y, width, height); + __func__, region->bo, pitch, x, y, width, height); BR13 = br13_for_cpp(cpp) | 0xf0 << 16; CMD = XY_COLOR_BLT_CMD; diff --git a/src/mesa/drivers/dri/i915/intel_clear.c b/src/mesa/drivers/dri/i915/intel_clear.c index 5374e19..8cf7c14 100644 --- a/src/mesa/drivers/dri/i915/intel_clear.c +++ b/src/mesa/drivers/dri/i915/intel_clear.c @@ -94,7 +94,7 @@ intelClear(struct gl_context *ctx, GLbitfield mask) } if (0) - fprintf(stderr, "%s\n", __FUNCTION__); + fprintf(stderr, "%s\n", __func__); /* Get SW clears out of the way: Anything without an intel_renderbuffer */ for (i = 0; i < BUFFER_COUNT; i++) { diff --git a/src/mesa/drivers/dri/i915/intel_context.c b/src/mesa/drivers/dri/i915/intel_context.c index 12a1d2b..5618dcd 100644 --- a/src/mesa/drivers/dri/i915/intel_context.c +++ b/src/mesa/drivers/dri/i915/intel_context.c @@ -422,7 +422,7 @@ intelInitContext(struct intel_context *intel, if (!_mesa_initialize_context(&intel->ctx, api, mesaVis, shareCtx, functions)) { *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY; - printf("%s: failed to init mesa context\n", __FUNCTION__); + printf("%s: failed to init mesa context\n", __func__); return false; } diff --git a/src/mesa/drivers/dri/i915/intel_fbo.c b/src/mesa/drivers/dri/i915/intel_fbo.c index 8ce1dbf..24c3180 100644 --- a/src/mesa/drivers/dri/i915/intel_fbo.c +++ b/src/mesa/drivers/dri/i915/intel_fbo.c @@ -135,7 +135,7 @@ intel_map_renderbuffer(struct gl_context *ctx, } DBG("%s: rb %d (%s) mt mapped: (%d, %d) (%dx%d) -> %p/%d\n", - __FUNCTION__, rb->Name, _mesa_get_format_name(rb->Format), + __func__, rb->Name, _mesa_get_format_name(rb->Format), x, y, w, h, map, stride); *out_map = map; @@ -153,7 +153,7 @@ intel_unmap_renderbuffer(struct gl_context *ctx, struct swrast_renderbuffer *srb = (struct swrast_renderbuffer *)rb; struct intel_renderbuffer *irb = intel_renderbuffer(rb); - DBG("%s: rb %d (%s)\n", __FUNCTION__, + DBG("%s: rb %d (%s)\n", __func__, rb->Name, _mesa_get_format_name(rb->Format)); if (srb->Buffer) { @@ -215,7 +215,7 @@ intel_alloc_private_renderbuffer_storage(struct gl_context * ctx, struct gl_rend intel_miptree_release(&irb->mt); - DBG("%s: %s: %s (%dx%d)\n", __FUNCTION__, + DBG("%s: %s: %s (%dx%d)\n", __func__, _mesa_lookup_enum_by_nr(internalFormat), _mesa_get_format_name(rb->Format), width, height); @@ -568,7 +568,7 @@ intel_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb) struct intel_mipmap_tree *depth_mt = NULL, *stencil_mt = NULL; int i; - DBG("%s() on fb %p (%s)\n", __FUNCTION__, + DBG("%s() on fb %p (%s)\n", __func__, fb, (fb == ctx->DrawBuffer ? "drawbuffer" : (fb == ctx->ReadBuffer ? "readbuffer" : "other buffer"))); diff --git a/src/mesa/drivers/dri/i915/intel_mipmap_tree.c b/src/mesa/drivers/dri/i915/intel_mipmap_tree.c index b9a629f..e56b985 100644 --- a/src/mesa/drivers/dri/i915/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i915/intel_mipmap_tree.c @@ -80,7 +80,7 @@ intel_miptree_create_layout(struct intel_context *intel, if (!mt) return NULL; - DBG("%s target %s format %s level %d..%d <-- %p\n", __FUNCTION__, + DBG("%s target %s format %s level %d..%d <-- %p\n", __func__, _mesa_lookup_enum_by_nr(target), _mesa_get_format_name(format), first_level, last_level, mt); @@ -381,7 +381,7 @@ intel_miptree_reference(struct intel_mipmap_tree **dst, if (src) { src->refcount++; - DBG("%s %p refcount now %d\n", __FUNCTION__, src, src->refcount); + DBG("%s %p refcount now %d\n", __func__, src, src->refcount); } *dst = src; @@ -394,11 +394,11 @@ intel_miptree_release(struct intel_mipmap_tree **mt) if (!*mt) return; - DBG("%s %p refcount will be %d\n", __FUNCTION__, *mt, (*mt)->refcount - 1); + DBG("%s %p refcount will be %d\n", __func__, *mt, (*mt)->refcount - 1); if (--(*mt)->refcount <= 0) { GLuint i; - DBG("%s deleting %p\n", __FUNCTION__, *mt); + DBG("%s deleting %p\n", __func__, *mt); intel_region_release(&((*mt)->region)); @@ -499,7 +499,7 @@ intel_miptree_set_level_info(struct intel_mipmap_tree *mt, mt->level[level].level_x = x; mt->level[level].level_y = y; - DBG("%s level %d size: %d,%d,%d offset %d,%d\n", __FUNCTION__, + DBG("%s level %d size: %d,%d,%d offset %d,%d\n", __func__, level, w, h, d, x, y); assert(mt->level[level].slice == NULL); @@ -524,7 +524,7 @@ intel_miptree_set_image_offset(struct intel_mipmap_tree *mt, mt->level[level].slice[img].y_offset = mt->level[level].level_y + y; DBG("%s level %d img %d pos %d,%d\n", - __FUNCTION__, level, img, + __func__, level, img, mt->level[level].slice[img].x_offset, mt->level[level].slice[img].y_offset); } @@ -765,7 +765,7 @@ intel_miptree_map_gtt(struct intel_context *intel, map->ptr = base + y * map->stride + x * mt->cpp; } - DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __FUNCTION__, + DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __func__, map->x, map->y, map->w, map->h, mt, _mesa_get_format_name(mt->format), x, y, map->ptr, map->stride); @@ -811,7 +811,7 @@ intel_miptree_map_blit(struct intel_context *intel, intel_batchbuffer_flush(intel); map->ptr = intel_miptree_map_raw(intel, map->mt); - DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __FUNCTION__, + DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __func__, map->x, map->y, map->w, map->h, mt, _mesa_get_format_name(mt->format), level, slice, map->ptr, map->stride); @@ -943,7 +943,7 @@ intel_miptree_unmap(struct intel_context *intel, if (!map) return; - DBG("%s: mt %p (%s) level %d slice %d\n", __FUNCTION__, + DBG("%s: mt %p (%s) level %d slice %d\n", __func__, mt, _mesa_get_format_name(mt->format), level, slice); if (map->mt) { diff --git a/src/mesa/drivers/dri/i915/intel_pixel_bitmap.c b/src/mesa/drivers/dri/i915/intel_pixel_bitmap.c index 70cf413..a41b692 100644 --- a/src/mesa/drivers/dri/i915/intel_pixel_bitmap.c +++ b/src/mesa/drivers/dri/i915/intel_pixel_bitmap.c @@ -119,7 +119,7 @@ static GLuint get_bitmap_rect(GLsizei width, GLsizei height, GLuint count = 0; DBG("%s %d,%d %dx%d bitmap %dx%d skip %d src_offset %d mask %d\n", - __FUNCTION__, x,y,w,h,width,height,unpack->SkipPixels, src_offset, mask); + __func__, x,y,w,h,width,height,unpack->SkipPixels, src_offset, mask); if (invert) { first = h-1; diff --git a/src/mesa/drivers/dri/i915/intel_pixel_copy.c b/src/mesa/drivers/dri/i915/intel_pixel_copy.c index e7b5353..a718556 100644 --- a/src/mesa/drivers/dri/i915/intel_pixel_copy.c +++ b/src/mesa/drivers/dri/i915/intel_pixel_copy.c @@ -176,7 +176,7 @@ do_blit_copypixels(struct gl_context * ctx, width, height, (ctx->Color.ColorLogicOpEnabled ? ctx->Color.LogicOp : GL_COPY))) { - DBG("%s: blit failure\n", __FUNCTION__); + DBG("%s: blit failure\n", __func__); return false; } @@ -186,7 +186,7 @@ do_blit_copypixels(struct gl_context * ctx, out: intel_check_front_buffer_rendering(intel); - DBG("%s: success\n", __FUNCTION__); + DBG("%s: success\n", __func__); return true; } @@ -197,7 +197,7 @@ intelCopyPixels(struct gl_context * ctx, GLsizei width, GLsizei height, GLint destx, GLint desty, GLenum type) { - DBG("%s\n", __FUNCTION__); + DBG("%s\n", __func__); if (!_mesa_check_conditional_render(ctx)) return; diff --git a/src/mesa/drivers/dri/i915/intel_pixel_read.c b/src/mesa/drivers/dri/i915/intel_pixel_read.c index ce3ad25..149e921 100644 --- a/src/mesa/drivers/dri/i915/intel_pixel_read.c +++ b/src/mesa/drivers/dri/i915/intel_pixel_read.c @@ -82,7 +82,7 @@ do_blit_readpixels(struct gl_context * ctx, GLint dst_x, dst_y; GLuint dirty; - DBG("%s\n", __FUNCTION__); + DBG("%s\n", __func__); assert(_mesa_is_bufferobj(pack->BufferObj)); @@ -92,12 +92,12 @@ do_blit_readpixels(struct gl_context * ctx, if (ctx->_ImageTransferState || !_mesa_format_matches_format_and_type(irb->mt->format, format, type, false)) { - DBG("%s - bad format for blit\n", __FUNCTION__); + DBG("%s - bad format for blit\n", __func__); return false; } if (pack->SwapBytes || pack->LsbFirst) { - DBG("%s: bad packing params\n", __FUNCTION__); + DBG("%s: bad packing params\n", __func__); return false; } @@ -148,7 +148,7 @@ do_blit_readpixels(struct gl_context * ctx, intel_miptree_release(&pbo_mt); - DBG("%s - DONE\n", __FUNCTION__); + DBG("%s - DONE\n", __func__); return true; } @@ -164,7 +164,7 @@ intelReadPixels(struct gl_context * ctx, intel_flush_rendering_to_batch(ctx); - DBG("%s\n", __FUNCTION__); + DBG("%s\n", __func__); if (_mesa_is_bufferobj(pack->BufferObj)) { /* Using PBOs, so try the BLT based path. */ @@ -173,7 +173,7 @@ intelReadPixels(struct gl_context * ctx, return; } - perf_debug("%s: fallback to CPU mapping in PBO case\n", __FUNCTION__); + perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__); } /* glReadPixels() wont dirty the front buffer, so reset the dirty diff --git a/src/mesa/drivers/dri/i915/intel_regions.c b/src/mesa/drivers/dri/i915/intel_regions.c index 5768357..c9b776d 100644 --- a/src/mesa/drivers/dri/i915/intel_regions.c +++ b/src/mesa/drivers/dri/i915/intel_regions.c @@ -124,7 +124,7 @@ intel_region_alloc_internal(struct intel_screen *screen, region->bo = buffer; region->tiling = tiling; - _DBG("%s <-- %p\n", __FUNCTION__, region); + _DBG("%s <-- %p\n", __func__, region); return region; } @@ -241,7 +241,7 @@ intel_region_alloc_for_fd(struct intel_screen *screen, void intel_region_reference(struct intel_region **dst, struct intel_region *src) { - _DBG("%s: %p(%d) -> %p(%d)\n", __FUNCTION__, + _DBG("%s: %p(%d) -> %p(%d)\n", __func__, *dst, *dst ? (*dst)->refcount : 0, src, src ? src->refcount : 0); if (src != *dst) { @@ -260,11 +260,11 @@ intel_region_release(struct intel_region **region_handle) struct intel_region *region = *region_handle; if (region == NULL) { - _DBG("%s NULL\n", __FUNCTION__); + _DBG("%s NULL\n", __func__); return; } - _DBG("%s %p %d\n", __FUNCTION__, region, region->refcount - 1); + _DBG("%s %p %d\n", __func__, region, region->refcount - 1); assert(region->refcount > 0); region->refcount--; diff --git a/src/mesa/drivers/dri/i915/intel_render.c b/src/mesa/drivers/dri/i915/intel_render.c index bf48f38..0b0d48e 100644 --- a/src/mesa/drivers/dri/i915/intel_render.c +++ b/src/mesa/drivers/dri/i915/intel_render.c @@ -113,7 +113,7 @@ static void intelDmaPrimitive(struct intel_context *intel, GLenum prim) { if (0) - fprintf(stderr, "%s %s\n", __FUNCTION__, _mesa_lookup_enum_by_nr(prim)); + fprintf(stderr, "%s %s\n", __func__, _mesa_lookup_enum_by_nr(prim)); INTEL_FIREVERTICES(intel); intel->vtbl.reduced_primitive_state(intel, reduced_prim[prim]); intel_set_prim(intel, hw_prim[prim]); diff --git a/src/mesa/drivers/dri/i915/intel_state.c b/src/mesa/drivers/dri/i915/intel_state.c index 5846b58..c951ff7 100644 --- a/src/mesa/drivers/dri/i915/intel_state.c +++ b/src/mesa/drivers/dri/i915/intel_state.c @@ -58,7 +58,7 @@ intel_translate_shadow_compare_func(GLenum func) return COMPAREFUNC_NEVER; } - fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, func); + fprintf(stderr, "Unknown value in %s: %x\n", __func__, func); return COMPAREFUNC_NEVER; } @@ -84,7 +84,7 @@ intel_translate_compare_func(GLenum func) return COMPAREFUNC_ALWAYS; } - fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, func); + fprintf(stderr, "Unknown value in %s: %x\n", __func__, func); return COMPAREFUNC_ALWAYS; } @@ -149,7 +149,7 @@ intel_translate_blend_factor(GLenum factor) return BLENDFACT_INV_CONST_ALPHA; } - fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, factor); + fprintf(stderr, "Unknown value in %s: %x\n", __func__, factor); return BLENDFACT_ZERO; } diff --git a/src/mesa/drivers/dri/i915/intel_tex.c b/src/mesa/drivers/dri/i915/intel_tex.c index ce17395..4c48d3b 100644 --- a/src/mesa/drivers/dri/i915/intel_tex.c +++ b/src/mesa/drivers/dri/i915/intel_tex.c @@ -14,7 +14,7 @@ static struct gl_texture_image * intelNewTextureImage(struct gl_context * ctx) { - DBG("%s\n", __FUNCTION__); + DBG("%s\n", __func__); (void) ctx; return (struct gl_texture_image *) CALLOC_STRUCT(intel_texture_image); } @@ -34,7 +34,7 @@ intelNewTextureObject(struct gl_context * ctx, GLuint name, GLenum target) (void) ctx; - DBG("%s\n", __FUNCTION__); + DBG("%s\n", __func__); if (obj == NULL) return NULL; @@ -80,7 +80,7 @@ intel_alloc_texture_image_buffer(struct gl_context *ctx, intel_miptree_match_image(intel_texobj->mt, image)) { intel_miptree_reference(&intel_image->mt, intel_texobj->mt); DBG("%s: alloc obj %p level %d %dx%dx%d using object's miptree %p\n", - __FUNCTION__, texobj, image->Level, + __func__, texobj, image->Level, image->Width, image->Height, image->Depth, intel_texobj->mt); } else { intel_image->mt = intel_miptree_create_for_teximage(intel, intel_texobj, @@ -95,7 +95,7 @@ intel_alloc_texture_image_buffer(struct gl_context *ctx, intel_miptree_reference(&intel_texobj->mt, intel_image->mt); DBG("%s: alloc obj %p level %d %dx%dx%d using new miptree %p\n", - __FUNCTION__, texobj, image->Level, + __func__, texobj, image->Level, image->Width, image->Height, image->Depth, intel_image->mt); } @@ -110,7 +110,7 @@ intel_free_texture_image_buffer(struct gl_context * ctx, { struct intel_texture_image *intelImage = intel_texture_image(texImage); - DBG("%s\n", __FUNCTION__); + DBG("%s\n", __func__); intel_miptree_release(&intelImage->mt); diff --git a/src/mesa/drivers/dri/i915/intel_tex_copy.c b/src/mesa/drivers/dri/i915/intel_tex_copy.c index 33e4aa8..a5d00af 100644 --- a/src/mesa/drivers/dri/i915/intel_tex_copy.c +++ b/src/mesa/drivers/dri/i915/intel_tex_copy.c @@ -59,7 +59,7 @@ intel_copy_texsubimage(struct intel_context *intel, if (!intelImage->mt || !irb || !irb->mt) { if (unlikely(INTEL_DEBUG & DEBUG_PERF)) fprintf(stderr, "%s fail %p %p (0x%08x)\n", - __FUNCTION__, intelImage->mt, irb, internalFormat); + __func__, intelImage->mt, irb, internalFormat); return false; } @@ -97,7 +97,7 @@ intelCopyTexSubImage(struct gl_context *ctx, GLuint dims, } /* Otherwise, fall back to meta. This will likely be slow. */ - perf_debug("%s - fallback to swrast\n", __FUNCTION__); + perf_debug("%s - fallback to swrast\n", __func__); _mesa_meta_CopyTexSubImage(ctx, dims, texImage, xoffset, yoffset, slice, rb, x, y, width, height); diff --git a/src/mesa/drivers/dri/i915/intel_tex_image.c b/src/mesa/drivers/dri/i915/intel_tex_image.c index 57674b9..01de966 100644 --- a/src/mesa/drivers/dri/i915/intel_tex_image.c +++ b/src/mesa/drivers/dri/i915/intel_tex_image.c @@ -42,7 +42,7 @@ intel_miptree_create_for_teximage(struct intel_context *intel, intel_miptree_get_dimensions_for_image(&intelImage->base.Base, &width, &height, &depth); - DBG("%s\n", __FUNCTION__); + DBG("%s\n", __func__); if (intelImage->base.Base.Level > intelObj->base.BaseLevel && (width == 1 || @@ -122,28 +122,28 @@ try_pbo_upload(struct gl_context *ctx, if (intel->ctx._ImageTransferState || unpack->SkipPixels || unpack->SkipRows) { - DBG("%s: image transfer\n", __FUNCTION__); + DBG("%s: image transfer\n", __func__); return false; } ctx->Driver.AllocTextureImageBuffer(ctx, image); if (!intelImage->mt) { - DBG("%s: no miptree\n", __FUNCTION__); + DBG("%s: no miptree\n", __func__); return false; } if (!_mesa_format_matches_format_and_type(intelImage->mt->format, format, type, false)) { DBG("%s: format mismatch (upload to %s with format 0x%x, type 0x%x)\n", - __FUNCTION__, _mesa_get_format_name(intelImage->mt->format), + __func__, _mesa_get_format_name(intelImage->mt->format), format, type); return false; } if (image->TexObject->Target == GL_TEXTURE_1D_ARRAY || image->TexObject->Target == GL_TEXTURE_2D_ARRAY) { - DBG("%s: no support for array textures\n", __FUNCTION__); + DBG("%s: no support for array textures\n", __func__); return false; } @@ -170,14 +170,14 @@ try_pbo_upload(struct gl_context *ctx, intelImage->mt, image->Level, image->Face, 0, 0, false, image->Width, image->Height, GL_COPY)) { - DBG("%s: blit failed\n", __FUNCTION__); + DBG("%s: blit failed\n", __func__); intel_miptree_release(&pbo_mt); return false; } intel_miptree_release(&pbo_mt); - DBG("%s: success\n", __FUNCTION__); + DBG("%s: success\n", __func__); return true; } @@ -188,7 +188,7 @@ intelTexImage(struct gl_context * ctx, GLenum format, GLenum type, const void *pixels, const struct gl_pixelstore_attrib *unpack) { - DBG("%s target %s level %d %dx%dx%d\n", __FUNCTION__, + DBG("%s target %s level %d %dx%dx%d\n", __func__, _mesa_lookup_enum_by_nr(texImage->TexObject->Target), texImage->Level, texImage->Width, texImage->Height, texImage->Depth); @@ -200,7 +200,7 @@ intelTexImage(struct gl_context * ctx, } DBG("%s: upload image %dx%dx%d pixels %p\n", - __FUNCTION__, texImage->Width, texImage->Height, texImage->Depth, + __func__, texImage->Width, texImage->Height, texImage->Depth, pixels); _mesa_store_teximage(ctx, dims, texImage, diff --git a/src/mesa/drivers/dri/i915/intel_tex_subimage.c b/src/mesa/drivers/dri/i915/intel_tex_subimage.c index 2f46c73..2e02d50 100644 --- a/src/mesa/drivers/dri/i915/intel_tex_subimage.c +++ b/src/mesa/drivers/dri/i915/intel_tex_subimage.c @@ -71,7 +71,7 @@ intel_blit_texsubimage(struct gl_context * ctx, return false; DBG("BLT subimage %s target %s level %d offset %d,%d %dx%d\n", - __FUNCTION__, + __func__, _mesa_lookup_enum_by_nr(texImage->TexObject->Target), texImage->Level, xoffset, yoffset, width, height); diff --git a/src/mesa/drivers/dri/i915/intel_tris.c b/src/mesa/drivers/dri/i915/intel_tris.c index 2c0a785..144f0fc 100644 --- a/src/mesa/drivers/dri/i915/intel_tris.c +++ b/src/mesa/drivers/dri/i915/intel_tris.c @@ -798,7 +798,7 @@ intel_fallback_tri(struct intel_context *intel, SWvertex v[3]; if (0) - fprintf(stderr, "\n%s\n", __FUNCTION__); + fprintf(stderr, "\n%s\n", __func__); INTEL_FIREVERTICES(intel); @@ -819,7 +819,7 @@ intel_fallback_line(struct intel_context *intel, SWvertex v[2]; if (0) - fprintf(stderr, "\n%s\n", __FUNCTION__); + fprintf(stderr, "\n%s\n", __func__); INTEL_FIREVERTICES(intel); @@ -838,7 +838,7 @@ intel_fallback_point(struct intel_context *intel, SWvertex v[1]; if (0) - fprintf(stderr, "\n%s\n", __FUNCTION__); + fprintf(stderr, "\n%s\n", __func__); INTEL_FIREVERTICES(intel); @@ -971,7 +971,7 @@ intelChooseRenderState(struct gl_context * ctx) GLuint index = 0; if (INTEL_DEBUG & DEBUG_STATE) - fprintf(stderr, "\n%s\n", __FUNCTION__); + fprintf(stderr, "\n%s\n", __func__); if ((flags & (ANY_FALLBACK_FLAGS | ANY_RASTER_FLAGS)) || have_wpos) { @@ -1133,7 +1133,7 @@ intelRasterPrimitive(struct gl_context * ctx, GLenum rprim, GLuint hwprim) struct intel_context *intel = intel_context(ctx); if (0) - fprintf(stderr, "%s %s %x\n", __FUNCTION__, + fprintf(stderr, "%s %s %x\n", __func__, _mesa_lookup_enum_by_nr(rprim), hwprim); intel->vtbl.reduced_primitive_state(intel, rprim); @@ -1158,7 +1158,7 @@ intelRenderPrimitive(struct gl_context * ctx, GLenum prim) ctx->Polygon.BackMode != GL_FILL); if (0) - fprintf(stderr, "%s %s\n", __FUNCTION__, _mesa_lookup_enum_by_nr(prim)); + fprintf(stderr, "%s %s\n", __func__, _mesa_lookup_enum_by_nr(prim)); /* Let some clipping routines know which primitive they're dealing * with. From bwidawsk at kemper.freedesktop.org Tue Apr 14 22:23:01 2015 From: bwidawsk at kemper.freedesktop.org (Ben Widawsky) Date: Tue, 14 Apr 2015 15:23:01 -0700 (PDT) Subject: Mesa (master): i965/fs: Create a has_side_effects for fs_inst Message-ID: <20150414222301.3EF79761EB@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 38707e1478a4b6f4687c583d06fbd68e22900735 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=38707e1478a4b6f4687c583d06fbd68e22900735 Author: Ben Widawsky Date: Tue Mar 31 15:49:42 2015 -0700 i965/fs: Create a has_side_effects for fs_inst When an instruction has a side effect, it impacts the available options when reordering an instruction. As the EOT flag is an implied write to the render target in the FS, it can be considered a side effect. This patch shouldn't actually have any impact on the current code since the EOT flag implies that the opcode is already one with side effects, FS_OPCODE_FB_WRITE. The next patch however will introduce an optimization whereby the EOT flag can occur with an opcode SHADER_OPCODE_TEX, and as that instruction will perform the same implied write to the render target, it cannot be reordered. v2: Remove extra whitespace (Matt) Signed-off-by: Ben Widawsky Reviewed-by: Kenneth Graunke Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs.cpp | 6 ++++++ src/mesa/drivers/dri/i965/brw_ir_fs.h | 1 + 2 files changed, 7 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 8bd8da2..aea1ebb 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -531,6 +531,12 @@ fs_inst::can_do_source_mods(struct brw_context *brw) return true; } +bool +fs_inst::has_side_effects() const +{ + return this->eot || backend_instruction::has_side_effects(); +} + void fs_reg::init() { diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h index 9ef1261..30c19f4 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_fs.h +++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h @@ -225,6 +225,7 @@ public: bool is_partial_write() const; int regs_read(int arg) const; bool can_do_source_mods(struct brw_context *brw); + bool has_side_effects() const; bool reads_flag() const; bool writes_flag() const; From bwidawsk at kemper.freedesktop.org Tue Apr 14 22:23:01 2015 From: bwidawsk at kemper.freedesktop.org (Ben Widawsky) Date: Tue, 14 Apr 2015 15:23:01 -0700 (PDT) Subject: Mesa (master): i965/fs: Only emit FS_OPCODE_PLACEHOLDER_HALT if there are discards Message-ID: <20150414222301.472D4761EB@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 6866378cf42c86d03f38616804e6714a932ab70b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6866378cf42c86d03f38616804e6714a932ab70b Author: Ben Widawsky Date: Fri Apr 10 10:04:55 2015 -0700 i965/fs: Only emit FS_OPCODE_PLACEHOLDER_HALT if there are discards Based originally on a patch from Ken in May 2014 of the same title. Things changed enough that I didn't feel comfortable leaving his authorship. v2: Replace fp->UsesKill with wm_prog_data->uses_kill. Since Ken took the time to also explain the difference to me, here is his explanation for posterity: "fp->UsesKill indicates that a ARB_fragment_program shader uses the KIL instruction, or that a GLSL shader uses the "discard" insntruction (which are analogous). On Gen4-5, we sometimes have to simulate OpenGL's "Alpha Test" feature by emitting shader code that implicitly does a "discard" instruction. In the key setup, we do: /* key->alpha_test_func means simulating alpha testing via discards, * so the shader definitely kills pixels. */ prog_data.uses_kill = fp->program.UsesKill || key->alpha_test_func; Even though the shader may not technically contain a "discard", we need to act as if it does. I've also been trying to move the i965 state setup code to use brw_wm_prog_key for everything, rather than poking at core Mesa's gl_program/gl_fragment_program/gl_shader/gl_shader_program structures. --Ken" Signed-off-by: Ben Widawsky Reviewed-by: Matt Turner Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index aea1ebb..f04fb59 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1699,6 +1699,8 @@ fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1) void fs_visitor::emit_discard_jump() { + assert(((brw_wm_prog_data*) this->prog_data)->uses_kill); + /* For performance, after a discard, jump to the end of the * shader if all relevant channels have been discarded. */ @@ -3958,7 +3960,8 @@ fs_visitor::run_fs() if (failed) return false; - emit(FS_OPCODE_PLACEHOLDER_HALT); + if (wm_prog_data->uses_kill) + emit(FS_OPCODE_PLACEHOLDER_HALT); if (wm_key->alpha_test_func) emit_alpha_test(); From bwidawsk at kemper.freedesktop.org Tue Apr 14 22:23:01 2015 From: bwidawsk at kemper.freedesktop.org (Ben Widawsky) Date: Tue, 14 Apr 2015 15:23:01 -0700 (PDT) Subject: Mesa (master): i965/fs: Combine tex/fb_write operations (opt) Message-ID: <20150414222301.535F0761EB@kemper.freedesktop.org> Module: Mesa Branch: master Commit: b069f9eafd945a86be633d8fff4e715fc6d7ec2d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b069f9eafd945a86be633d8fff4e715fc6d7ec2d Author: Ben Widawsky Date: Sun Feb 8 13:59:57 2015 -0800 i965/fs: Combine tex/fb_write operations (opt) Certain platforms support the ability to sample from a texture, and write it out to the file RT - thus saving a costly send instructions (note that this is a potnential win if one wanted to backport to a tag that didn't have the patch from Topi which removed excess MOVs from LOAD_PAYLOAD - 97caf5fa04dbd2), v2: Modify the algorithm. Instead of iterating in reverse through blocks and insts, since the last block/inst is the only thing which can benefit. Rebased on top of Ken's patching modifying is_last_send v3: Rebased over almost 2 months, and Incorporated feedback from Matt: Some comment typo fixes and rewordings. Whitespace Move the optimization pass outside of the optimize loop v4: Some cosmetic changes requested from Ken. These changes ensured that the optimization function always returned true when an optimization occurred, and false when one did not. This behavior did not exist with the original patch. As a result, having the separate helper function which Matt did not like no longer made sense, and so now I believe everyone should be happy. Benchmark (n=20) %diff *OglBatch5 -1.4 *OglBatch7 -1.79 OglFillTexMulti 5.57 OglFillTexSingle 1.16 OglShMapPcf 0.05 OglTexFilterAniso 3.01 OglTexFilterTri 1.94 No piglit regressions: (http://otc-gfxtest-01.jf.intel.com:8080/view/dev/job/bwidawsk/112/) [*] I believe my measurements are incorrect for Batch5-7. If I add this new optimization, but never emit the new instruction I see similar results. v5: Remove declaration of combine_tex_header since v4 dropped that function (Ben) Remove check for impossible case of an empty block (Matt) Set dest earlier to avoid extra special-casing in generate_tex (Matt) Signed-off-by: Ben Widawsky Reviewed-by: Matt Turner Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs.cpp | 90 ++++++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_fs.h | 2 + src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 11 +++ 3 files changed, 103 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index f04fb59..7cc88ea 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -2555,6 +2555,94 @@ fs_visitor::opt_algebraic() return progress; } +/** + * Optimize sample messages which are followed by the final RT write. + * + * CHV, and GEN9+ can mark a texturing SEND instruction with EOT to have its + * results sent directly to the framebuffer, bypassing the EU. Recognize the + * final texturing results copied to the framebuffer write payload and modify + * them to write to the framebuffer directly. + */ +bool +fs_visitor::opt_sampler_eot() +{ + brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; + + if (brw->gen < 9 && !brw->is_cherryview) + return false; + + /* FINISHME: It should be possible to implement this optimization when there + * are multiple drawbuffers. + */ + if (key->nr_color_regions != 1) + return false; + + /* Look for a texturing instruction immediately before the final FB_WRITE. */ + fs_inst *fb_write = (fs_inst *) cfg->blocks[cfg->num_blocks - 1]->end(); + assert(fb_write->eot); + assert(fb_write->opcode == FS_OPCODE_FB_WRITE); + + fs_inst *tex_inst = (fs_inst *) fb_write->prev; + + /* There wasn't one; nothing to do. */ + if (unlikely(tex_inst->is_head_sentinel()) || !tex_inst->is_tex()) + return false; + + /* If there's no header present, we need to munge the LOAD_PAYLOAD as well. + * It's very likely to be the previous instruction. + */ + fs_inst *load_payload = (fs_inst *) tex_inst->prev; + if (load_payload->is_head_sentinel() || + load_payload->opcode != SHADER_OPCODE_LOAD_PAYLOAD) + return false; + + assert(!tex_inst->eot); /* We can't get here twice */ + assert((tex_inst->offset & (0xff << 24)) == 0); + + tex_inst->offset |= fb_write->target << 24; + tex_inst->eot = true; + fb_write->remove(cfg->blocks[cfg->num_blocks - 1]); + + /* If a header is present, marking the eot is sufficient. Otherwise, we need + * to create a new LOAD_PAYLOAD command with the same sources and a space + * saved for the header. Using a new destination register not only makes sure + * we have enough space, but it will make sure the dead code eliminator kills + * the instruction that this will replace. + */ + if (tex_inst->header_present) + return true; + + fs_reg send_header = vgrf(load_payload->sources + 1); + fs_reg *new_sources = + ralloc_array(mem_ctx, fs_reg, load_payload->sources + 1); + + new_sources[0] = fs_reg(); + for (int i = 0; i < load_payload->sources; i++) + new_sources[i+1] = load_payload->src[i]; + + /* The LOAD_PAYLOAD helper seems like the obvious choice here. However, it + * requires a lot of information about the sources to appropriately figure + * out the number of registers needed to be used. Given this stage in our + * optimization, we may not have the appropriate GRFs required by + * LOAD_PAYLOAD at this point (copy propagation). Therefore, we need to + * manually emit the instruction. + */ + fs_inst *new_load_payload = new(mem_ctx) fs_inst(SHADER_OPCODE_LOAD_PAYLOAD, + load_payload->exec_size, + send_header, + new_sources, + load_payload->sources + 1); + + new_load_payload->regs_written = load_payload->regs_written + 1; + tex_inst->mlen++; + tex_inst->header_present = true; + tex_inst->insert_before(cfg->blocks[cfg->num_blocks - 1], new_load_payload); + tex_inst->src[0] = send_header; + tex_inst->dst = reg_null_ud; + + return true; +} + bool fs_visitor::opt_register_renaming() { @@ -3761,6 +3849,8 @@ fs_visitor::optimize() pass_num = 0; + OPT(opt_sampler_eot); + if (OPT(lower_load_payload)) { split_virtual_grfs(); OPT(register_coalesce); diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index cfdbf55..32063f0 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -231,6 +231,8 @@ public: bool compute_to_mrf(); bool dead_code_eliminate(); bool remove_duplicate_mrf_writes(); + + bool opt_sampler_eot(); bool virtual_grf_interferes(int a, int b); void schedule_instructions(instruction_scheduler_mode mode); void insert_gen4_send_dependency_workarounds(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 7c00020..b06a947 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -517,6 +517,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src int rlen = 4; uint32_t simd_mode; uint32_t return_format; + bool is_combined_send = inst->eot; switch (dst.type) { case BRW_REGISTER_TYPE_D: @@ -688,6 +689,11 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src dst = vec16(dst); } + if (is_combined_send) { + assert(brw->gen >= 9 || brw->is_cherryview); + rlen = 0; + } + assert(brw->gen < 7 || !inst->header_present || src.file == BRW_GENERAL_REGISTER_FILE); @@ -793,6 +799,11 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src * so has already done marking. */ } + + if (is_combined_send) { + brw_inst_set_eot(brw, brw_last_inst, true); + brw_inst_set_opcode(brw, brw_last_inst, BRW_OPCODE_SENDC); + } } From cwabbott0 at kemper.freedesktop.org Tue Apr 14 23:07:59 2015 From: cwabbott0 at kemper.freedesktop.org (Connor Abbott) Date: Tue, 14 Apr 2015 16:07:59 -0700 (PDT) Subject: Mesa (master): nir/cse: fix bug with comparing non-per-component sources Message-ID: <20150414230759.382B0761EB@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 47a1b4841d39fc358fc5ab67dc129a02419fba8d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=47a1b4841d39fc358fc5ab67dc129a02419fba8d Author: Connor Abbott Date: Tue Apr 14 15:44:24 2015 -0400 nir/cse: fix bug with comparing non-per-component sources We weren't comparing the right number of components when checking swizzles. Use nir_ssa_alu_instr_num_src_components() to do the right thing. No piglit regressions, and no fixes either. Reviewed-by: Ian Romanick Reviewed-by: Jason Ekstrand Signed-off-by: Connor Abbott --- src/glsl/nir/nir_opt_cse.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/src/glsl/nir/nir_opt_cse.c b/src/glsl/nir/nir_opt_cse.c index 9b38320..db873ce 100644 --- a/src/glsl/nir/nir_opt_cse.c +++ b/src/glsl/nir/nir_opt_cse.c @@ -37,20 +37,18 @@ struct cse_state { }; static bool -nir_alu_srcs_equal(nir_alu_src src1, nir_alu_src src2, uint8_t read_mask) +nir_alu_srcs_equal(nir_alu_instr *alu1, nir_alu_instr *alu2, unsigned src) { - if (src1.abs != src2.abs || src1.negate != src2.negate) + if (alu1->src[src].abs != alu2->src[src].abs || + alu1->src[src].negate != alu2->src[src].negate) return false; - for (int i = 0; i < 4; ++i) { - if (!(read_mask & (1 << i))) - continue; - - if (src1.swizzle[i] != src2.swizzle[i]) + for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(alu1, src); i++) { + if (alu1->src[src].swizzle[i] != alu2->src[src].swizzle[i]) return false; } - return nir_srcs_equal(src1.src, src2.src); + return nir_srcs_equal(alu1->src[src].src, alu2->src[src].src); } static bool @@ -74,8 +72,7 @@ nir_instrs_equal(nir_instr *instr1, nir_instr *instr2) return false; for (unsigned i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) { - if (!nir_alu_srcs_equal(alu1->src[i], alu2->src[i], - (1 << alu1->dest.dest.ssa.num_components) - 1)) + if (!nir_alu_srcs_equal(alu1, alu2, i)) return false; } return true; From idr at kemper.freedesktop.org Tue Apr 14 23:42:36 2015 From: idr at kemper.freedesktop.org (Ian Romanick) Date: Tue, 14 Apr 2015 16:42:36 -0700 (PDT) Subject: Mesa (master): nir: Silence unused parameter warnings Message-ID: <20150414234236.30C8C761EB@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 67a8610caff15ba9f55ac5ee2404f19033b5bae6 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=67a8610caff15ba9f55ac5ee2404f19033b5bae6 Author: Ian Romanick Date: Mon Apr 13 16:42:59 2015 -0700 nir: Silence unused parameter warnings nir/nir.h: In function 'nir_validate_shader': nir/nir.h:1567:56: warning: unused parameter 'shader' [-Wunused-parameter] static inline void nir_validate_shader(nir_shader *shader) { } ^ nir/nir_opt_cse.c: In function 'src_is_ssa': nir/nir_opt_cse.c:165:32: warning: unused parameter 'data' [-Wunused-parameter] src_is_ssa(nir_src *src, void *data) ^ nir/nir_opt_cse.c: In function 'dest_is_ssa': nir/nir_opt_cse.c:171:35: warning: unused parameter 'data' [-Wunused-parameter] dest_is_ssa(nir_dest *dest, void *data) ^ Signed-off-by: Ian Romanick Reviewed-by: Jordan Justen Reviewed-by: Connor Abbott --- src/glsl/nir/nir.h | 2 +- src/glsl/nir/nir_opt_cse.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 2d1d870..74772c7 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1570,7 +1570,7 @@ void nir_print_instr(const nir_instr *instr, FILE *fp); #ifdef DEBUG void nir_validate_shader(nir_shader *shader); #else -static inline void nir_validate_shader(nir_shader *shader) { } +static inline void nir_validate_shader(nir_shader *shader) { (void) shader; } #endif /* DEBUG */ void nir_calc_dominance_impl(nir_function_impl *impl); diff --git a/src/glsl/nir/nir_opt_cse.c b/src/glsl/nir/nir_opt_cse.c index db873ce..56d491c 100644 --- a/src/glsl/nir/nir_opt_cse.c +++ b/src/glsl/nir/nir_opt_cse.c @@ -151,12 +151,14 @@ nir_instrs_equal(nir_instr *instr1, nir_instr *instr2) static bool src_is_ssa(nir_src *src, void *data) { + (void) data; return src->is_ssa; } static bool dest_is_ssa(nir_dest *dest, void *data) { + (void) data; return dest->is_ssa; } From idr at kemper.freedesktop.org Tue Apr 14 23:42:36 2015 From: idr at kemper.freedesktop.org (Ian Romanick) Date: Tue, 14 Apr 2015 16:42:36 -0700 (PDT) Subject: Mesa (master): nir: Fix typo in "ushr by 0" algebraic replacement Message-ID: <20150414234236.39145761EC@kemper.freedesktop.org> Module: Mesa Branch: master Commit: bc672e261c5f7ff56cd2b8f6b518ebfdc0163bb7 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=bc672e261c5f7ff56cd2b8f6b518ebfdc0163bb7 Author: Ian Romanick Date: Tue Apr 14 08:40:22 2015 -0700 nir: Fix typo in "ushr by 0" algebraic replacement Signed-off-by: Ian Romanick Reviewed-by: Jordan Justen Reviewed-by: Jason Ekstrand Reviewed-by: Connor Abbott Cc: "10.5" --- src/glsl/nir/nir_opt_algebraic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py index 319732d..cdb1924 100644 --- a/src/glsl/nir/nir_opt_algebraic.py +++ b/src/glsl/nir/nir_opt_algebraic.py @@ -133,7 +133,7 @@ optimizations = [ (('ishr', 0, a), 0), (('ishr', a, 0), a), (('ushr', 0, a), 0), - (('ushr', a, 0), 0), + (('ushr', a, 0), a), # Exponential/logarithmic identities (('fexp2', ('flog2', a)), a), # 2^lg2(a) = a (('fexp', ('flog', a)), a), # e^ln(a) = a From samuelig at kemper.freedesktop.org Wed Apr 15 06:12:02 2015 From: samuelig at kemper.freedesktop.org (Samuel Iglesias Gonsálvez) Date: Tue, 14 Apr 2015 23:12:02 -0700 (PDT) Subject: Mesa (master): glsl: fix assignment of multiple scalar and vecs to matrices. Message-ID: <20150415061202.896DB761EB@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 3cbefe3cf4c745c7c681cfc18a1e47461fec91db URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3cbefe3cf4c745c7c681cfc18a1e47461fec91db Author: Samuel Iglesias Gonsalvez Date: Thu Nov 27 09:39:05 2014 +0100 glsl: fix assignment of multiple scalar and vecs to matrices. When a vec has more elements than row components in a matrix, the code could end up failing an assert inside assign_to_matrix_column(). This patch makes sure that when there is still room in the matrix for more elements (but in other columns of the matrix), the data is actually assigned. This patch fixes the following dEQP test: dEQP-GLES3.functional.shaders.conversions.matrix_combine.float_bvec4_ivec2_bool_to_mat4x2_vertex dEQP-GLES3.functional.shaders.conversions.matrix_combine.float_bvec4_ivec2_bool_to_mat4x2_fragment Signed-off-by: Samuel Iglesias Gonsalvez Reviewed-by: Ben Widawsky --- src/glsl/ast_function.cpp | 110 ++++++++++++++++++++------------------------- 1 file changed, 49 insertions(+), 61 deletions(-) diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp index 36a0d19..87df93e 100644 --- a/src/glsl/ast_function.cpp +++ b/src/glsl/ast_function.cpp @@ -1370,71 +1370,59 @@ emit_inline_matrix_constructor(const glsl_type *type, } else { const unsigned cols = type->matrix_columns; const unsigned rows = type->vector_elements; + unsigned remaining_slots = rows * cols; unsigned col_idx = 0; unsigned row_idx = 0; foreach_in_list(ir_rvalue, rhs, parameters) { - const unsigned components_remaining_this_column = rows - row_idx; - unsigned rhs_components = rhs->type->components(); - unsigned rhs_base = 0; - - /* Since the parameter might be used in the RHS of two assignments, - * generate a temporary and copy the paramter there. - */ - ir_variable *rhs_var = - new(ctx) ir_variable(rhs->type, "mat_ctor_vec", ir_var_temporary); - instructions->push_tail(rhs_var); - - ir_dereference *rhs_var_ref = - new(ctx) ir_dereference_variable(rhs_var); - ir_instruction *inst = new(ctx) ir_assignment(rhs_var_ref, rhs, NULL); - instructions->push_tail(inst); - - /* Assign the current parameter to as many components of the matrix - * as it will fill. - * - * NOTE: A single vector parameter can span two matrix columns. A - * single vec4, for example, can completely fill a mat2. - */ - if (rhs_components >= components_remaining_this_column) { - const unsigned count = MIN2(rhs_components, - components_remaining_this_column); - - rhs_var_ref = new(ctx) ir_dereference_variable(rhs_var); - - ir_instruction *inst = assign_to_matrix_column(var, col_idx, - row_idx, - rhs_var_ref, 0, - count, ctx); - instructions->push_tail(inst); - - rhs_base = count; - - col_idx++; - row_idx = 0; - } - - /* If there is data left in the parameter and components left to be - * set in the destination, emit another assignment. It is possible - * that the assignment could be of a vec4 to the last element of the - * matrix. In this case col_idx==cols, but there is still data - * left in the source parameter. Obviously, don't emit an assignment - * to data outside the destination matrix. - */ - if ((col_idx < cols) && (rhs_base < rhs_components)) { - const unsigned count = rhs_components - rhs_base; - - rhs_var_ref = new(ctx) ir_dereference_variable(rhs_var); - - ir_instruction *inst = assign_to_matrix_column(var, col_idx, - row_idx, - rhs_var_ref, - rhs_base, - count, ctx); - instructions->push_tail(inst); - - row_idx += count; - } + unsigned rhs_components = rhs->type->components(); + unsigned rhs_base = 0; + + if (remaining_slots == 0) + break; + + /* Since the parameter might be used in the RHS of two assignments, + * generate a temporary and copy the paramter there. + */ + ir_variable *rhs_var = + new(ctx) ir_variable(rhs->type, "mat_ctor_vec", ir_var_temporary); + instructions->push_tail(rhs_var); + + ir_dereference *rhs_var_ref = + new(ctx) ir_dereference_variable(rhs_var); + ir_instruction *inst = new(ctx) ir_assignment(rhs_var_ref, rhs, NULL); + instructions->push_tail(inst); + + do { + /* Assign the current parameter to as many components of the matrix + * as it will fill. + * + * NOTE: A single vector parameter can span two matrix columns. A + * single vec4, for example, can completely fill a mat2. + */ + unsigned count = MIN2(rows - row_idx, + rhs_components - rhs_base); + + rhs_var_ref = new(ctx) ir_dereference_variable(rhs_var); + ir_instruction *inst = assign_to_matrix_column(var, col_idx, + row_idx, + rhs_var_ref, + rhs_base, + count, ctx); + instructions->push_tail(inst); + rhs_base += count; + row_idx += count; + remaining_slots -= count; + + /* Sometimes, there is still data left in the parameters and + * components left to be set in the destination but in other + * column. + */ + if (row_idx >= rows) { + row_idx = 0; + col_idx++; + } + } while(remaining_slots > 0 && rhs_base < rhs_components); } } From jrfonseca at kemper.freedesktop.org Wed Apr 15 08:59:47 2015 From: jrfonseca at kemper.freedesktop.org (Jose Fonseca) Date: Wed, 15 Apr 2015 01:59:47 -0700 (PDT) Subject: Mesa (master): mesa: Enable _mesa_dlopen on MSVC too. Message-ID: <20150415085947.AB799761EB@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 6635fb6cae702b195e9912747b5a11c41970ab9e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6635fb6cae702b195e9912747b5a11c41970ab9e Author: Jose Fonseca Date: Tue Apr 14 14:25:06 2015 +0100 mesa: Enable _mesa_dlopen on MSVC too. As pointed out by Shervin Sharifi. Reviewed-by: Brian Paul --- src/mesa/main/dlopen.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mesa/main/dlopen.h b/src/mesa/main/dlopen.h index 1e77849..4d20ff2 100644 --- a/src/mesa/main/dlopen.h +++ b/src/mesa/main/dlopen.h @@ -50,7 +50,7 @@ _mesa_dlopen(const char *libname, int flags) #if defined(HAVE_DLOPEN) flags = RTLD_LAZY | RTLD_GLOBAL; /* Overriding flags at this time */ return dlopen(libname, flags); -#elif defined(__MINGW32__) +#elif defined(_WIN32) return LoadLibraryA(libname); #else return NULL; @@ -71,7 +71,7 @@ _mesa_dlsym(void *handle, const char *fname) } u; #if defined(HAVE_DLOPEN) u.v = dlsym(handle, fname); -#elif defined(__MINGW32__) +#elif defined(_WIN32) u.v = (void *) GetProcAddress(handle, fname); #else u.v = NULL; @@ -87,7 +87,7 @@ _mesa_dlclose(void *handle) { #if defined(HAVE_DLOPEN) dlclose(handle); -#elif defined(__MINGW32__) +#elif defined(_WIN32) FreeLibrary(handle); #else (void) handle; From jrfonseca at kemper.freedesktop.org Wed Apr 15 08:59:47 2015 From: jrfonseca at kemper.freedesktop.org (Jose Fonseca) Date: Wed, 15 Apr 2015 01:59:47 -0700 (PDT) Subject: Mesa (master): st/wgl: Couple of fixes to opengl32.dll's wglCreateContext/ wglDeleteContext dispatch. Message-ID: <20150415085947.B1398761EC@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 1aa50339d816c5d5ad3107673c315569ce9b21d3 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1aa50339d816c5d5ad3107673c315569ce9b21d3 Author: Jose Fonseca Date: Tue Apr 14 14:25:21 2015 +0100 st/wgl: Couple of fixes to opengl32.dll's wglCreateContext/wglDeleteContext dispatch. - Use GetModuleHandle instead of LoadLibrary to avoid incrementing the opengl32.dll reference count (otherwise the opengl32.dll will linger in memory forever.) - Ensure we use our fake wglCreateContext/wglDeleteContext when using Mesa as a drop-in replacement for opengl32.dll Untested. Just noticed by accident. Reviewed-by: Brian Paul --- src/gallium/state_trackers/wgl/stw_ext_context.c | 18 +++++------ src/gallium/state_trackers/wgl/stw_ext_context.h | 36 ++++++++++++++++++++++ src/gallium/state_trackers/wgl/stw_wgl.c | 21 +++++++++++++ 3 files changed, 66 insertions(+), 9 deletions(-) diff --git a/src/gallium/state_trackers/wgl/stw_ext_context.c b/src/gallium/state_trackers/wgl/stw_ext_context.c index 8a96cac..6af2062 100644 --- a/src/gallium/state_trackers/wgl/stw_ext_context.c +++ b/src/gallium/state_trackers/wgl/stw_ext_context.c @@ -33,6 +33,11 @@ #include "stw_icd.h" #include "stw_context.h" #include "stw_device.h" +#include "stw_ext_context.h" + + +wglCreateContext_t wglCreateContext_func = 0; +wglDeleteContext_t wglDeleteContext_func = 0; /** @@ -50,12 +55,7 @@ HGLRC WINAPI wglCreateContextAttribsARB(HDC hDC, HGLRC hShareContext, const int *attribList) { - typedef HGLRC (WINAPI *wglCreateContext_t)(HDC hdc); - typedef BOOL (WINAPI *wglDeleteContext_t)(HGLRC hglrc); HGLRC context; - static HMODULE opengl_lib = 0; - static wglCreateContext_t wglCreateContext_func = 0; - static wglDeleteContext_t wglDeleteContext_func = 0; int majorVersion = 1, minorVersion = 0, layerPlane = 0; int contextFlags = 0x0; @@ -135,11 +135,11 @@ wglCreateContextAttribsARB(HDC hDC, HGLRC hShareContext, const int *attribList) } /* Get pointer to OPENGL32.DLL's wglCreate/DeleteContext() functions */ - if (opengl_lib == 0) { - /* Open the OPENGL32.DLL library */ - opengl_lib = LoadLibraryA("OPENGL32.DLL"); + if (!wglCreateContext_func || !wglDeleteContext_func) { + /* Get the OPENGL32.DLL library */ + HMODULE opengl_lib = GetModuleHandleA("opengl32.dll"); if (!opengl_lib) { - _debug_printf("wgl: LoadLibrary(OPENGL32.DLL) failed\n"); + _debug_printf("wgl: GetModuleHandleA(\"opengl32.dll\") failed\n"); return 0; } diff --git a/src/gallium/state_trackers/wgl/stw_ext_context.h b/src/gallium/state_trackers/wgl/stw_ext_context.h new file mode 100644 index 0000000..9cb12b4 --- /dev/null +++ b/src/gallium/state_trackers/wgl/stw_ext_context.h @@ -0,0 +1,36 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2011 Morgan Armand + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#pragma once + +#include +#include + + +typedef HGLRC (WINAPI *wglCreateContext_t)(HDC hdc); +typedef BOOL (WINAPI *wglDeleteContext_t)(HGLRC hglrc); + +extern wglCreateContext_t wglCreateContext_func; +extern wglDeleteContext_t wglDeleteContext_func; + diff --git a/src/gallium/state_trackers/wgl/stw_wgl.c b/src/gallium/state_trackers/wgl/stw_wgl.c index 0650fbb..5146e6a 100644 --- a/src/gallium/state_trackers/wgl/stw_wgl.c +++ b/src/gallium/state_trackers/wgl/stw_wgl.c @@ -45,8 +45,12 @@ #include "stw_context.h" #include "stw_pixelformat.h" #include "stw_wgl.h" +#include "stw_ext_context.h" +static void +overrideOpenGL32EntryPoints(void); + WINGDIAPI BOOL APIENTRY wglCopyContext( HGLRC hglrcSrc, @@ -62,6 +66,7 @@ WINGDIAPI HGLRC APIENTRY wglCreateContext( HDC hdc ) { + overrideOpenGL32EntryPoints(); return (HGLRC) DrvCreateContext(hdc); } @@ -70,6 +75,7 @@ wglCreateLayerContext( HDC hdc, int iLayerPlane ) { + overrideOpenGL32EntryPoints(); return (HGLRC) DrvCreateLayerContext( hdc, iLayerPlane ); } @@ -334,3 +340,18 @@ wglRealizeLayerPalette( return FALSE; } + + +/* When this library is used as a opengl32.dll drop-in replacement, ensure we + * use the wglCreate/Destroy entrypoints above, and not the true opengl32.dll, + * which could happen if this library's name is not opengl32.dll exactly. + * + * For example, Qt 5.4 bundles this as opengl32sw.dll: + * https://blog.qt.io/blog/2014/11/27/qt-weekly-21-dynamic-opengl-implementation-loading-in-qt-5-4/ + */ +static void +overrideOpenGL32EntryPoints(void) +{ + wglCreateContext_func = &wglCreateContext; + wglDeleteContext_func = &wglDeleteContext; +} From brianp at kemper.freedesktop.org Wed Apr 15 23:23:27 2015 From: brianp at kemper.freedesktop.org (Brian Paul) Date: Wed, 15 Apr 2015 16:23:27 -0700 (PDT) Subject: Mesa (master): tgsi: also dump label for TGSI_OPCODE_BGNSUB opcode Message-ID: <20150415232327.2A86C761EC@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 11bfee4c3a9f285f4cd5467dac1af5f7f0dfa307 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=11bfee4c3a9f285f4cd5467dac1af5f7f0dfa307 Author: Brian Paul Date: Thu Apr 9 13:45:18 2015 -0600 tgsi: also dump label for TGSI_OPCODE_BGNSUB opcode So we can see the label associated with subroutines. Reviewed-by: Jos? Fonseca --- src/gallium/auxiliary/tgsi/tgsi_dump.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 7ae4049..13d6769 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -606,6 +606,7 @@ iter_instruction( case TGSI_OPCODE_BGNLOOP: case TGSI_OPCODE_ENDLOOP: case TGSI_OPCODE_CAL: + case TGSI_OPCODE_BGNSUB: TXT( " :" ); UID( inst->Label.Label ); break; From brianp at kemper.freedesktop.org Wed Apr 15 23:23:27 2015 From: brianp at kemper.freedesktop.org (Brian Paul) Date: Wed, 15 Apr 2015 16:23:27 -0700 (PDT) Subject: Mesa (master): mesa: add _mesa_log(), _mesa_get_log_file() functions Message-ID: <20150415232327.322D0761ED@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 2926bbfb280b2eb195cc031991c956da38d89508 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=2926bbfb280b2eb195cc031991c956da38d89508 Author: Brian Paul Date: Tue Apr 14 10:24:22 2015 -0600 mesa: add _mesa_log(), _mesa_get_log_file() functions _mesa_log() simply writes log information to stderr or MESA_LOG_FILE. _mesa_get_log_file() returns the file handle to use for logging. This will be used for shader dumping/logging instead of always printing to stderr. Reviewed-by: Jos? Fonseca --- src/mesa/main/errors.c | 43 ++++++++++++++++++++++++++++++++++++------- src/mesa/main/errors.h | 7 +++++++ 2 files changed, 43 insertions(+), 7 deletions(-) diff --git a/src/mesa/main/errors.c b/src/mesa/main/errors.c index 8ffbf41..2aa1deb 100644 --- a/src/mesa/main/errors.c +++ b/src/mesa/main/errors.c @@ -1232,12 +1232,14 @@ _mesa_free_errors_data(struct gl_context *ctx) /** \name Diagnostics */ /*@{*/ +static FILE *LogFile = NULL; + + static void output_if_debug(const char *prefixString, const char *outputString, GLboolean newline) { static int debug = -1; - static FILE *fout = NULL; /* Init the local 'debug' var once. * Note: the _mesa_init_debug() function should have been called @@ -1249,9 +1251,9 @@ output_if_debug(const char *prefixString, const char *outputString, */ const char *logFile = getenv("MESA_LOG_FILE"); if (logFile) - fout = fopen(logFile, "w"); - if (!fout) - fout = stderr; + LogFile = fopen(logFile, "w"); + if (!LogFile) + LogFile = stderr; #ifdef DEBUG /* in debug builds, print messages unless MESA_DEBUG="silent" */ if (MESA_DEBUG_FLAGS & DEBUG_SILENT) @@ -1266,10 +1268,13 @@ output_if_debug(const char *prefixString, const char *outputString, /* Now only print the string if we're required to do so. */ if (debug) { - fprintf(fout, "%s: %s", prefixString, outputString); + if (prefixString) + fprintf(LogFile, "%s: %s", prefixString, outputString); + else + fprintf(LogFile, "%s", outputString); if (newline) - fprintf(fout, "\n"); - fflush(fout); + fprintf(LogFile, "\n"); + fflush(LogFile); #if defined(_WIN32) /* stderr from windows applications without console is not usually @@ -1285,6 +1290,18 @@ output_if_debug(const char *prefixString, const char *outputString, /** + * Return the file handle to use for debug/logging. Defaults to stderr + * unless MESA_LOG_FILE is defined. + */ +FILE * +_mesa_get_log_file(void) +{ + assert(LogFile); + return LogFile; +} + + +/** * When a new type of error is recorded, print a message describing * previous errors which were accumulated. */ @@ -1525,6 +1542,18 @@ _mesa_debug( const struct gl_context *ctx, const char *fmtString, ... ) } +void +_mesa_log(const char *fmtString, ...) +{ + char s[MAX_DEBUG_MESSAGE_LENGTH]; + va_list args; + va_start(args, fmtString); + _mesa_vsnprintf(s, MAX_DEBUG_MESSAGE_LENGTH, fmtString, args); + va_end(args); + output_if_debug("", s, GL_FALSE); +} + + /** * Report debug information from the shader compiler via GL_ARB_debug_output. * diff --git a/src/mesa/main/errors.h b/src/mesa/main/errors.h index 0c521c0..e6dc9b5 100644 --- a/src/mesa/main/errors.h +++ b/src/mesa/main/errors.h @@ -36,6 +36,7 @@ #define ERRORS_H +#include #include "compiler.h" #include "glheader.h" #include "mtypes.h" @@ -69,6 +70,12 @@ extern void _mesa_debug( const struct gl_context *ctx, const char *fmtString, ... ) PRINTFLIKE(2, 3); extern void +_mesa_log(const char *fmtString, ...) PRINTFLIKE(1, 2); + +extern FILE * +_mesa_get_log_file(void); + +extern void _mesa_gl_debug(struct gl_context *ctx, GLuint *id, enum mesa_debug_source source, From brianp at kemper.freedesktop.org Wed Apr 15 23:23:27 2015 From: brianp at kemper.freedesktop.org (Brian Paul) Date: Wed, 15 Apr 2015 16:23:27 -0700 (PDT) Subject: Mesa (master): mesa: log shaders, GLSL info log with _mesa_log() Message-ID: <20150415232327.3A8DA761EC@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 011cad806a6e42caecfef0e9fc77c6db0333dcfe URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=011cad806a6e42caecfef0e9fc77c6db0333dcfe Author: Brian Paul Date: Tue Apr 14 10:41:56 2015 -0600 mesa: log shaders, GLSL info log with _mesa_log() Now, if we set MESA_LOG_FILE and MESA_GLSL=dump, all the shader info will get logged to the named file instead of stderr. Reviewed-by: Jos? Fonseca --- src/mesa/main/shaderapi.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index 30716f5..77e2b87 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -882,10 +882,9 @@ compile_shader(struct gl_context *ctx, GLuint shaderObj) sh->CompileStatus = GL_FALSE; } else { if (ctx->_Shader->Flags & GLSL_DUMP) { - fprintf(stderr, "GLSL source for %s shader %d:\n", + _mesa_log("GLSL source for %s shader %d:\n", _mesa_shader_stage_to_string(sh->Stage), sh->Name); - fprintf(stderr, "%s\n", sh->Source); - fflush(stderr); + _mesa_log("%s\n", sh->Source); } /* this call will set the shader->CompileStatus field to indicate if @@ -899,27 +898,25 @@ compile_shader(struct gl_context *ctx, GLuint shaderObj) if (ctx->_Shader->Flags & GLSL_DUMP) { if (sh->CompileStatus) { - fprintf(stderr, "GLSL IR for shader %d:\n", sh->Name); - _mesa_print_ir(stderr, sh->ir, NULL); - fprintf(stderr, "\n\n"); + _mesa_log("GLSL IR for shader %d:\n", sh->Name); + _mesa_print_ir(_mesa_get_log_file(), sh->ir, NULL); + _mesa_log("\n\n"); } else { - fprintf(stderr, "GLSL shader %d failed to compile.\n", sh->Name); + _mesa_log("GLSL shader %d failed to compile.\n", sh->Name); } if (sh->InfoLog && sh->InfoLog[0] != 0) { - fprintf(stderr, "GLSL shader %d info log:\n", sh->Name); - fprintf(stderr, "%s\n", sh->InfoLog); + _mesa_log("GLSL shader %d info log:\n", sh->Name); + _mesa_log("%s\n", sh->InfoLog); } - fflush(stderr); } } if (!sh->CompileStatus) { if (ctx->_Shader->Flags & GLSL_DUMP_ON_ERROR) { - fprintf(stderr, "GLSL source for %s shader %d:\n", + _mesa_log("GLSL source for %s shader %d:\n", _mesa_shader_stage_to_string(sh->Stage), sh->Name); - fprintf(stderr, "%s\n", sh->Source); - fprintf(stderr, "Info Log:\n%s\n", sh->InfoLog); - fflush(stderr); + _mesa_log("%s\n", sh->Source); + _mesa_log("Info Log:\n%s\n", sh->InfoLog); } if (ctx->_Shader->Flags & GLSL_REPORT_ERRORS) { From brianp at kemper.freedesktop.org Wed Apr 15 23:23:27 2015 From: brianp at kemper.freedesktop.org (Brian Paul) Date: Wed, 15 Apr 2015 16:23:27 -0700 (PDT) Subject: Mesa (master): st/mesa: log shaders, GLSL info log with _mesa_log() Message-ID: <20150415232327.4600E761EC@kemper.freedesktop.org> Module: Mesa Branch: master Commit: e1d095053b2b50109f77fd9eb524e8e1c7d025af URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e1d095053b2b50109f77fd9eb524e8e1c7d025af Author: Brian Paul Date: Tue Apr 14 10:54:34 2015 -0600 st/mesa: log shaders, GLSL info log with _mesa_log() As with previous patch. Reviewed-by: Jos? Fonseca --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index c05a456..04258a1 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -5532,14 +5532,12 @@ get_mesa_program(struct gl_context *ctx, v->emit(NULL, TGSI_OPCODE_END); if (ctx->_Shader->Flags & GLSL_DUMP) { - printf("\n"); - printf("GLSL IR for linked %s program %d:\n", + _mesa_log("\n"); + _mesa_log("GLSL IR for linked %s program %d:\n", _mesa_shader_stage_to_string(shader->Stage), shader_program->Name); - _mesa_print_ir(stdout, shader->ir, NULL); - printf("\n"); - printf("\n"); - fflush(stdout); + _mesa_print_ir(_mesa_get_log_file(), shader->ir, NULL); + _mesa_log("\n\n"); } prog->Instructions = NULL; From anholt at kemper.freedesktop.org Thu Apr 16 00:22:46 2015 From: anholt at kemper.freedesktop.org (Eric Anholt) Date: Wed, 15 Apr 2015 17:22:46 -0700 (PDT) Subject: Mesa (master): vc4: Hook up VC4_DEBUG=perf to some useful printfs. Message-ID: <20150416002246.47EF3761EC@kemper.freedesktop.org> Module: Mesa Branch: master Commit: bd957b1b79124c5061af1eddf16932793e806d87 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=bd957b1b79124c5061af1eddf16932793e806d87 Author: Eric Anholt Date: Tue Apr 14 11:24:00 2015 -0700 vc4: Hook up VC4_DEBUG=perf to some useful printfs. --- src/gallium/drivers/vc4/vc4_context.h | 5 +++++ src/gallium/drivers/vc4/vc4_draw.c | 7 ++++++- src/gallium/drivers/vc4/vc4_resource.c | 5 +++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index 68eacb8..d89f197 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -303,6 +303,11 @@ struct vc4_depth_stencil_alpha_state { uint32_t stencil_uniforms[3]; }; +#define perf_debug(...) do { \ + if (unlikely(vc4_debug & VC4_DEBUG_PERF)) \ + fprintf(stderr, __VA_ARGS__); \ +} while (0) + static inline struct vc4_context * vc4_context(struct pipe_context *pcontext) { diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c index 3a6d625..717eb8a 100644 --- a/src/gallium/drivers/vc4/vc4_draw.c +++ b/src/gallium/drivers/vc4/vc4_draw.c @@ -22,6 +22,7 @@ * IN THE SOFTWARE. */ +#include "util/u_prim.h" #include "util/u_format.h" #include "util/u_pack_color.h" #include "indices/u_primconvert.h" @@ -139,6 +140,8 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) util_primconvert_save_index_buffer(vc4->primconvert, &vc4->indexbuf); util_primconvert_save_rasterizer_state(vc4->primconvert, &vc4->rasterizer->base); util_primconvert_draw_vbo(vc4->primconvert, info); + perf_debug("Fallback conversion for %d %s vertices\n", + info->count, u_prim_name(info->mode)); return; } @@ -303,8 +306,10 @@ vc4_clear(struct pipe_context *pctx, unsigned buffers, /* We can't flag new buffers for clearing once we've queued draws. We * could avoid this by using the 3d engine to clear. */ - if (vc4->draw_call_queued) + if (vc4->draw_call_queued) { + perf_debug("Flushing rendering to process new clear."); vc4_flush(pctx); + } if (buffers & PIPE_CLEAR_COLOR0) { vc4->clear_color[0] = vc4->clear_color[1] = diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c index f6ca075..94bab99 100644 --- a/src/gallium/drivers/vc4/vc4_resource.c +++ b/src/gallium/drivers/vc4/vc4_resource.c @@ -586,6 +586,9 @@ vc4_update_shadow_baselevel_texture(struct pipe_context *pctx, if (shadow->writes == orig->writes) return; + perf_debug("Updating shadow texture due to %s\n", + view->u.tex.first_level ? "base level" : "raster layout"); + for (int i = 0; i <= shadow->base.b.last_level; i++) { unsigned width = u_minify(shadow->base.b.width0, i); unsigned height = u_minify(shadow->base.b.height0, i); @@ -646,6 +649,8 @@ vc4_update_shadow_index_buffer(struct pipe_context *pctx, if (shadow->writes == orig->writes) return; + perf_debug("Fallback conversion for %d uint indices\n", count); + struct pipe_transfer *src_transfer; uint32_t *src = pipe_buffer_map_range(pctx, &orig->base.b, ib->offset, From anholt at kemper.freedesktop.org Thu Apr 16 00:22:46 2015 From: anholt at kemper.freedesktop.org (Eric Anholt) Date: Wed, 15 Apr 2015 17:22:46 -0700 (PDT) Subject: Mesa (master): vc4: Don't try to use color load/stores to do depth/ stencil blits. Message-ID: <20150416002246.5A6E4761EC@kemper.freedesktop.org> Module: Mesa Branch: master Commit: cff2e08c4cb87b7c2e19100e24c336e50b9839cc URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=cff2e08c4cb87b7c2e19100e24c336e50b9839cc Author: Eric Anholt Date: Tue Apr 14 21:59:46 2015 -0700 vc4: Don't try to use color load/stores to do depth/stencil blits. Fixes regressions in fbo-generatemipmap-formats on depth/stencil (which does blits to work around baselevel/lastlevel). --- src/gallium/drivers/vc4/vc4_blit.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/gallium/drivers/vc4/vc4_blit.c b/src/gallium/drivers/vc4/vc4_blit.c index 4f87189..3204c3d 100644 --- a/src/gallium/drivers/vc4/vc4_blit.c +++ b/src/gallium/drivers/vc4/vc4_blit.c @@ -125,6 +125,9 @@ vc4_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info) { struct vc4_context *vc4 = vc4_context(pctx); + if (util_format_is_depth_or_stencil(info->dst.resource->format)) + return false; + if ((info->mask & PIPE_MASK_RGBA) == 0) return false; From anholt at kemper.freedesktop.org Thu Apr 16 00:22:46 2015 From: anholt at kemper.freedesktop.org (Eric Anholt) Date: Wed, 15 Apr 2015 17:22:46 -0700 (PDT) Subject: Mesa (master): vc4: Don't try to use color load/ stores to blit across format changes. Message-ID: <20150416002246.63DFC761EC@kemper.freedesktop.org> Module: Mesa Branch: master Commit: b229e6c7dee2bb6b1736d6867790dfcd1c50f623 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b229e6c7dee2bb6b1736d6867790dfcd1c50f623 Author: Eric Anholt Date: Tue Apr 14 22:01:55 2015 -0700 vc4: Don't try to use color load/stores to blit across format changes. We could potentially support the right combination of 8888 to 565, but the important thing for now is to not mix up our orderings of 8888. Fixes fbo-copyteximage regressions. --- src/gallium/drivers/vc4/vc4_blit.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/gallium/drivers/vc4/vc4_blit.c b/src/gallium/drivers/vc4/vc4_blit.c index 3204c3d..2d524c4 100644 --- a/src/gallium/drivers/vc4/vc4_blit.c +++ b/src/gallium/drivers/vc4/vc4_blit.c @@ -138,6 +138,9 @@ vc4_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info) return false; } + if (info->dst.resource->format != info->src.resource->format) + return false; + struct vc4_surface *dst_surf = vc4_get_blit_surface(pctx, info->dst.resource, info->dst.level); struct vc4_surface *src_surf = From anholt at kemper.freedesktop.org Thu Apr 16 00:22:46 2015 From: anholt at kemper.freedesktop.org (Eric Anholt) Date: Wed, 15 Apr 2015 17:22:46 -0700 (PDT) Subject: Mesa (master): vc4: Update the shadow texture for public textures on every draw. Message-ID: <20150416002246.512A8761ED@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 3a728d4dfbd727c30f36116772803674beffcbb6 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3a728d4dfbd727c30f36116772803674beffcbb6 Author: Eric Anholt Date: Tue Apr 14 11:31:11 2015 -0700 vc4: Update the shadow texture for public textures on every draw. We don't know who else has written to it, so we'd better update it every time. This makes the gears spin in X again. --- src/gallium/drivers/vc4/vc4_draw.c | 18 ++++++++++++++++++ src/gallium/drivers/vc4/vc4_resource.c | 2 +- src/gallium/drivers/vc4/vc4_state.c | 7 +------ 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c index 717eb8a..16418bf 100644 --- a/src/gallium/drivers/vc4/vc4_draw.c +++ b/src/gallium/drivers/vc4/vc4_draw.c @@ -132,6 +132,20 @@ vc4_start_draw(struct vc4_context *vc4) } static void +vc4_update_shadow_textures(struct pipe_context *pctx, + struct vc4_texture_stateobj *stage_tex) +{ + for (int i = 0; i < stage_tex->num_textures; i++) { + struct pipe_sampler_view *view = stage_tex->textures[i]; + if (!view) + continue; + struct vc4_resource *rsc = vc4_resource(view->texture); + if (rsc->shadow_parent) + vc4_update_shadow_baselevel_texture(pctx, view); + } +} + +static void vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) { struct vc4_context *vc4 = vc4_context(pctx); @@ -145,6 +159,10 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) return; } + /* Before setting up the draw, do any fixup blits necessary. */ + vc4_update_shadow_textures(pctx, &vc4->verttex); + vc4_update_shadow_textures(pctx, &vc4->fragtex); + vc4_get_draw_cl_space(vc4); struct vc4_vertex_stateobj *vtx = vc4->vtx; diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c index 94bab99..3f180d5 100644 --- a/src/gallium/drivers/vc4/vc4_resource.c +++ b/src/gallium/drivers/vc4/vc4_resource.c @@ -583,7 +583,7 @@ vc4_update_shadow_baselevel_texture(struct pipe_context *pctx, struct vc4_resource *orig = vc4_resource(shadow->shadow_parent); assert(orig); - if (shadow->writes == orig->writes) + if (shadow->writes == orig->writes && orig->bo->private) return; perf_debug("Updating shadow texture due to %s\n", diff --git a/src/gallium/drivers/vc4/vc4_state.c b/src/gallium/drivers/vc4/vc4_state.c index df75b6e..80e963e 100644 --- a/src/gallium/drivers/vc4/vc4_state.c +++ b/src/gallium/drivers/vc4/vc4_state.c @@ -578,13 +578,8 @@ vc4_set_sampler_views(struct pipe_context *pctx, unsigned shader, vc4->dirty |= VC4_DIRTY_TEXSTATE; for (i = 0; i < nr; i++) { - if (views[i]) { - struct vc4_resource *rsc = - vc4_resource(views[i]->texture); + if (views[i]) new_nr = i + 1; - if (rsc->shadow_parent) - vc4_update_shadow_baselevel_texture(pctx, views[i]); - } pipe_sampler_view_reference(&stage_tex->textures[i], views[i]); stage_tex->dirty_samplers |= (1 << i); } From idr at kemper.freedesktop.org Thu Apr 16 01:15:42 2015 From: idr at kemper.freedesktop.org (Ian Romanick) Date: Wed, 15 Apr 2015 18:15:42 -0700 (PDT) Subject: Mesa (master): glx: Create proper server dependency for GLX_EXT_create_context_es2_profile Message-ID: <20150416011542.2434A761EC@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 8957c9e448670e5aa78065619692cf285f9b0a30 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8957c9e448670e5aa78065619692cf285f9b0a30 Author: Ian Romanick Date: Tue Apr 14 09:24:06 2015 -0700 glx: Create proper server dependency for GLX_EXT_create_context_es2_profile Previously GLX_EXT_create_context_es2_profile was marked as "direct only" so that it would not depend on server support. Since the extension required functions that are part of GLX_ARB_create_context_profile, support for the EXT was disabled if the ARB was not supported. This was complete rubbish. If the server supported the ARB but not the EXT, sending a request with GLX_CONTEXT_ES2_PROFILE_BIT_EXT would result in GLXBadProfileARB. Instead of the misguided hack, make GLX_EXT_create_context_es2_profile properly depend on server support by not marking it as "direct only." Signed-off-by: Ian Romanick Acked-by: Jos? Fonseca Reviewed-by: Chad Versace Cc: Emil Velikov --- src/glx/glxextensions.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/src/glx/glxextensions.c b/src/glx/glxextensions.c index ce5d66d..a326f0d 100644 --- a/src/glx/glxextensions.c +++ b/src/glx/glxextensions.c @@ -84,7 +84,7 @@ static const struct extension_info known_glx_extensions[] = { { GLX(EXT_visual_rating), VER(0,0), Y, Y, N, N }, { GLX(EXT_fbconfig_packed_float), VER(0,0), Y, Y, N, N }, { GLX(EXT_framebuffer_sRGB), VER(0,0), Y, Y, N, N }, - { GLX(EXT_create_context_es2_profile), VER(0,0), Y, N, N, Y }, + { GLX(EXT_create_context_es2_profile), VER(0,0), Y, N, N, N }, { GLX(MESA_copy_sub_buffer), VER(0,0), Y, N, N, N }, { GLX(MESA_multithread_makecurrent),VER(0,0), Y, N, Y, N }, { GLX(MESA_query_renderer), VER(0,0), Y, N, N, Y }, @@ -627,16 +627,6 @@ __glXCalculateUsableExtensions(struct glx_screen * psc, } } - /* This hack is necessary because GLX_ARB_create_context_profile depends on - * server support, but GLX_EXT_create_context_es2_profile is direct-only. - * Without this hack, it would be possible to advertise - * GLX_EXT_create_context_es2_profile without - * GLX_ARB_create_context_profile. That would be a problem. - */ - if (!IS_SET(server_support, ARB_create_context_profile_bit)) { - CLR_BIT(usable, EXT_create_context_es2_profile_bit); - } - psc->effectiveGLXexts = __glXGetStringFromTable(known_glx_extensions, usable); } From idr at kemper.freedesktop.org Thu Apr 16 01:16:32 2015 From: idr at kemper.freedesktop.org (Ian Romanick) Date: Wed, 15 Apr 2015 18:16:32 -0700 (PDT) Subject: Mesa (master): nir: Try commutative sources in CSE Message-ID: <20150416011632.51686761EC@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 4cf5ca5ca5e70755723f7f0ced77c168d9e3a86f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4cf5ca5ca5e70755723f7f0ced77c168d9e3a86f Author: Ian Romanick Date: Mon Apr 13 16:16:10 2015 -0700 nir: Try commutative sources in CSE Shader-db results: GM45 NIR: total instructions in shared programs: 4082044 -> 4081919 (-0.00%) instructions in affected programs: 27609 -> 27484 (-0.45%) helped: 44 Iron Lake NIR: total instructions in shared programs: 5678776 -> 5678646 (-0.00%) instructions in affected programs: 27406 -> 27276 (-0.47%) helped: 45 Sandy Bridge NIR: total instructions in shared programs: 7329995 -> 7329096 (-0.01%) instructions in affected programs: 142035 -> 141136 (-0.63%) helped: 406 HURT: 19 Ivy Bridge NIR: total instructions in shared programs: 6769314 -> 6768359 (-0.01%) instructions in affected programs: 140820 -> 139865 (-0.68%) helped: 423 HURT: 2 Haswell NIR: total instructions in shared programs: 6183693 -> 6183298 (-0.01%) instructions in affected programs: 96538 -> 96143 (-0.41%) helped: 303 HURT: 4 Broadwell NIR: total instructions in shared programs: 7501711 -> 7498170 (-0.05%) instructions in affected programs: 266403 -> 262862 (-1.33%) helped: 705 HURT: 5 GAINED: 4 v2: Rebase on top of Connor's fix. v3: Convert the if-test for num_inputs == 2 to an assertion. Suggested by Jason after some comments / questions by Ilia. Signed-off-by: Ian Romanick Reviewed-by: Jordan Justen [v1] Reviewed-by: Jason Ekstrand Cc: Connor Abbott --- src/glsl/nir/nir_opt_cse.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/src/glsl/nir/nir_opt_cse.c b/src/glsl/nir/nir_opt_cse.c index 56d491c..553906e 100644 --- a/src/glsl/nir/nir_opt_cse.c +++ b/src/glsl/nir/nir_opt_cse.c @@ -37,18 +37,19 @@ struct cse_state { }; static bool -nir_alu_srcs_equal(nir_alu_instr *alu1, nir_alu_instr *alu2, unsigned src) +nir_alu_srcs_equal(nir_alu_instr *alu1, nir_alu_instr *alu2, unsigned src1, + unsigned src2) { - if (alu1->src[src].abs != alu2->src[src].abs || - alu1->src[src].negate != alu2->src[src].negate) + if (alu1->src[src1].abs != alu2->src[src2].abs || + alu1->src[src1].negate != alu2->src[src2].negate) return false; - for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(alu1, src); i++) { - if (alu1->src[src].swizzle[i] != alu2->src[src].swizzle[i]) + for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(alu1, src1); i++) { + if (alu1->src[src1].swizzle[i] != alu2->src[src2].swizzle[i]) return false; } - return nir_srcs_equal(alu1->src[src].src, alu2->src[src].src); + return nir_srcs_equal(alu1->src[src1].src, alu2->src[src2].src); } static bool @@ -71,9 +72,17 @@ nir_instrs_equal(nir_instr *instr1, nir_instr *instr2) if (alu1->dest.dest.ssa.num_components != alu2->dest.dest.ssa.num_components) return false; - for (unsigned i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) { - if (!nir_alu_srcs_equal(alu1, alu2, i)) - return false; + if (nir_op_infos[alu1->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) { + assert(nir_op_infos[alu1->op].num_inputs == 2); + return (nir_alu_srcs_equal(alu1, alu2, 0, 0) && + nir_alu_srcs_equal(alu1, alu2, 1, 1)) || + (nir_alu_srcs_equal(alu1, alu2, 0, 1) && + nir_alu_srcs_equal(alu1, alu2, 1, 0)); + } else { + for (unsigned i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) { + if (!nir_alu_srcs_equal(alu1, alu2, i, i)) + return false; + } } return true; } From airlied at kemper.freedesktop.org Thu Apr 16 02:43:45 2015 From: airlied at kemper.freedesktop.org (Dave Airlie) Date: Wed, 15 Apr 2015 19:43:45 -0700 (PDT) Subject: Mesa (master): r600g/sb: Skip empty ALU clause while scheduling Message-ID: <20150416024345.943EA761EC@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 17d69862a9232e2bcdfa032c5a65c27557dd9275 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=17d69862a9232e2bcdfa032c5a65c27557dd9275 Author: Glenn Kennard Date: Wed Apr 8 11:30:37 2015 +0200 r600g/sb: Skip empty ALU clause while scheduling Fixes assert triggered by ext_transform_feedback-intervening-read output use_gs piglit test. Signed-off-by: Glenn Kennard Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/sb/sb_sched.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/gallium/drivers/r600/sb/sb_sched.cpp b/src/gallium/drivers/r600/sb/sb_sched.cpp index 4248a3f..2e38a62 100644 --- a/src/gallium/drivers/r600/sb/sb_sched.cpp +++ b/src/gallium/drivers/r600/sb/sb_sched.cpp @@ -825,6 +825,9 @@ void post_scheduler::init_regmap() { void post_scheduler::process_alu(container_node *c) { + if (c->empty()) + return; + ucm.clear(); alu.reset(); From tpalli at kemper.freedesktop.org Thu Apr 16 05:00:10 2015 From: tpalli at kemper.freedesktop.org (Tapani Pälli) Date: Wed, 15 Apr 2015 22:00:10 -0700 (PDT) Subject: Mesa (master): 24 new commits Message-ID: <20150416050010.97D40761EC@kemper.freedesktop.org> URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7c154bbe6080ced9607bc3f3240d2f0866018aeb Author: Tapani P?lli Date: Fri Mar 13 08:53:23 2015 +0200 mesa: refactor GetUniformBlockIndex Use _mesa_program_resource_index to get index. Signed-off-by: Tapani P?lli Reviewed-by: Martin Peres URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1b256eb0ec1f919f23641f8d47d4b2ce126dd1a4 Author: Tapani P?lli Date: Thu Mar 12 14:37:20 2015 +0200 mesa: refactor GetUniformIndices Use _mesa_program_resource_index to get indices. Signed-off-by: Tapani P?lli Reviewed-by: Martin Peres URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=51313f567d59afd5491e03d1593030066df974eb Author: Tapani P?lli Date: Thu Mar 12 14:31:15 2015 +0200 mesa: refactor GetUniformLocation Use _mesa_program_resource_location to get location. Signed-off-by: Tapani P?lli Reviewed-by: Martin Peres URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=45637e9c1f47df8cc8284b39b18f6b5f2e14aae9 Author: Tapani P?lli Date: Thu Mar 12 13:33:25 2015 +0200 mesa: refactor GetActiveUniformBlockName Use _mesa_get_program_resource_name to get name. Signed-off-by: Tapani P?lli Reviewed-by: Martin Peres URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=284003e1f1b84f165f2cffdcec47bee972b12a8d Author: Tapani P?lli Date: Thu Mar 12 13:53:07 2015 +0200 mesa: remove unused _mesa_get_uniform_name Signed-off-by: Tapani P?lli Reviewed-by: Martin Peres URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8d6fa52e33183942fe93e2fad27bf4881b0a1ce0 Author: Tapani P?lli Date: Thu Mar 12 13:22:16 2015 +0200 mesa: refactor GetActiveUniformName Use _mesa_get_program_resource_name to get name. Signed-off-by: Tapani P?lli Reviewed-by: Martin Peres URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=17dc939f756cfed23574ad84764f5e2a5a9b6751 Author: Tapani P?lli Date: Thu Mar 12 12:55:18 2015 +0200 mesa: refactor GetActiveUniform Signed-off-by: Tapani P?lli Reviewed-by: Martin Peres URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=dc39d843d21898752ce5d79804ff9f638595d3a9 Author: Tapani P?lli Date: Thu Mar 12 12:08:56 2015 +0200 mesa: refactor GetTransformFeedbackVarying Signed-off-by: Tapani P?lli Reviewed-by: Martin Peres URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7519ddb4d8192f992c4a8b3fff84465b52905958 Author: Tapani P?lli Date: Thu Mar 12 10:17:09 2015 +0200 mesa: refactor GetActiveUniformsiv, use _mesa_program_resource_prop Signed-off-by: Tapani P?lli Reviewed-by: Ilia Mirkin URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=34df5ebd778fd3027db0f380eab71a95cfa298c0 Author: Tapani P?lli Date: Thu Mar 12 07:58:48 2015 +0200 mesa: mesa_bufferiv utility function for buffer objects Patch adds new function 'mesa_bufferiv' and refactors existing GetActiveUniformBlockiv and GetActiveAtomicCounterBufferiv to use it. corresponding Piglit tests: arb_uniform_buffer_object* arb_shader_atomic_counters* (Many tests hit the corresponding queries.) Signed-off-by: Tapani P?lli Reviewed-by: Martin Peres URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4e7f134f89f52cf103e2b682faa72dac4cc2f840 Author: Tapani P?lli Date: Thu Mar 12 15:14:31 2015 +0200 mesa: refactor GetFragDataIndex Use _mesa_program_resource_location_index to fetch index. Signed-off-by: Tapani P?lli Reviewed-by: Ilia Mirkin Reviewed-by: Martin Peres URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=62057c77f135ae7ebe5f10def714edb5492cb0be Author: Tapani P?lli Date: Thu Mar 12 15:13:30 2015 +0200 mesa: refactor GetFragDataLocation Use program_resource_location to fetch location. Signed-off-by: Tapani P?lli Reviewed-by: Martin Peres URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3d1544cc91cad4cca1deb67c82fc7390fe4196f9 Author: Tapani P?lli Date: Thu Mar 12 15:11:04 2015 +0200 mesa: refactor GetAttribLocation Use program_resource_location to fetch location. Signed-off-by: Tapani P?lli Reviewed-by: Martin Peres URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=26c0394a964f13d0266b1dcf7283bf21b7bca340 Author: Tapani P?lli Date: Wed Mar 11 09:05:47 2015 +0200 mesa: refactor GetActiveAttrib Instead of iterating IR, retrieve required information through the new program resource functions. Signed-off-by: Tapani P?lli Reviewed-by: Ilia Mirkin URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=41c230cd983195e3989153f8d82ec95e298352d4 Author: Tapani P?lli Date: Tue Apr 7 12:07:03 2015 +0300 mesa: enable GL_ARB_program_interface_query extension (and mark it as DONE in docs/GL3.txt + 10.6.0 relnotes) Signed-off-by: Tapani P?lli Reviewed-by: Martin Peres URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=2ab8de2181988870a95de23aeb906df5678e1e90 Author: Tapani P?lli Date: Tue Mar 10 13:11:14 2015 +0200 mesa: implementation of glGetProgramResourceiv Patch adds required helper functions to shaderapi.h and the actual implementation. The property query functionality can be tested with tests for following functions that are refactored by later patches: GetActiveAtomicCounterBufferiv GetActiveUniformBlockiv GetActiveUniformsiv v2: code cleanup (Ilia Mirkin) add bufSize < 0 check and error out fix is_resource_referenced to return bool check for propCount and bufSize, fixes in buffer_prop Signed-off-by: Tapani P?lli Reviewed-by: Martin Peres URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=9367ade331e5d0a7724c595e7afb0322caaaddf7 Author: Tapani P?lli Date: Thu Mar 12 14:08:38 2015 +0200 mesa: glGetProgramResourceLocationIndex Patch adds required helper functions to shaderapi.h and the actual implementation. The added functionality can be tested by tests for following functions that are refactored by later patches: GetFragDataIndex v2: return -1 if output not referenced by fragment stage (Ilia Mirkin) Signed-off-by: Tapani P?lli Reviewed-by: Martin Peres URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e0e4d77f0120865b3ca0a4055358fc87d38d1cfe Author: Tapani P?lli Date: Thu Mar 12 13:45:22 2015 +0200 mesa: glGetProgramResourceLocation Patch adds required helper functions to shaderapi.h and the actual implementation. corresponding Piglit test: arb_program_interface_query-resource-location The added functionality can be tested by tests for following functions that are refactored by later patches: GetAttribLocation GetUniformLocation GetFragDataLocation v2: code cleanup, changes to array element syntax checking (Ilia Mirkin) Signed-off-by: Tapani P?lli Reviewed-by: Martin Peres URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=2a5a0d19d67b2ccd7eee33a6f3bead66cc2d78ff Author: Tapani P?lli Date: Tue Mar 10 10:33:20 2015 +0200 mesa: glGetProgramResourceName Patch adds required helper functions to shaderapi.h and the actual implementation. Name generation copied from '_mesa_get_uniform_name' which can be removed later by refactoring functions to use resource list. The added functionality can be tested by tests for following functions that are refactored by later patches: GetActiveUniformName GetActiveUniformBlockName v2: no index for geometry shader inputs (Ilia Mirkin) add bufSize < 0 check and error out validate enum corresponding Piglit test: arb_program_interface_query-getprogramresourcename Signed-off-by: Tapani P?lli Reviewed-by: Martin Peres URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=161f57f6103802de55d792bcc6a4370afa5c5173 Author: Tapani P?lli Date: Tue Mar 10 09:30:30 2015 +0200 mesa: glGetProgramResourceIndex Patch adds required helper functions to shaderapi.h and the actual implementation. v2: code cleanup (Ilia Mirkin) corresponding Piglit test: arb_program_interface_query-getprogramresourceindex Signed-off-by: Tapani P?lli Reviewed-by: Martin Peres URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4d3b98bc5801df27a7f9f2e3df28d66d83f883d9 Author: Tapani P?lli Date: Fri Mar 6 15:05:51 2015 +0200 mesa: glGetProgramInterfaceiv Patch adds required helper functions to shaderapi.h and the actual implementation. v2: code cleanup (Ilia Mirkin) fix array size fo xfb varyings validate programInterface and throw error v3: put GL_MAX_NUM_COMPATIBLE_SUBROUTINES where it belongs corresponding Piglit test: arb_program_interface_query-getprograminterfaceiv Signed-off-by: Tapani P?lli Reviewed-by: Martin Peres URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c796ce4108ccc4987c24df43606d04a0f3658d44 Author: Tapani P?lli Date: Fri Mar 6 09:14:49 2015 +0200 mesa/glsl: build list of program resources during linking Patch adds ProgramResourceList to gl_shader_program structure. List contains references to active program resources and is constructed during linking phase. This list will be used by follow-up patches to implement hooks for GL_ARB_program_interface_query. It can be also used to implement any of the older shader program query APIs. v2: code cleanups + note for SSBO and subroutines (Ilia Mirkin) v3: code cleanups + assert(MESA_SHADER_STAGES < 8) (Martin Peres) Signed-off-by: Tapani P?lli Reviewed-by: Martin Peres URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b297fc27aa93c4af4cf8ecf9702fd0b95d2c4f9a Author: Tapani P?lli Date: Mon Feb 16 14:15:36 2015 +0200 glapi: add GL_ARB_program_interface_query skeleton v2: update dispatch_sanity test (Jason Ekstrand) + small code cleanups v3: xml and Makefile fixes (Ilia Mirkin, Matt Turner) Signed-off-by: Tapani P?lli Reviewed-by: Matt Turner Reviewed-by: Martin Peres URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=993b9b6adbd2c97ccb58b7cbc46382d1ae87b9ab Author: Tapani P?lli Date: Tue Mar 17 13:58:57 2015 +0200 linker: fix varying linking if SSO program has only gs and fs Previously linker did not take in to account case where one would have only gs and fs (with SSO), patch adds the case by refactoring code around assign_varying_locations. This makes sure locations for gs get populated correctly. This was found with some of the SSO subtests of Martin's upcoming GetProgramInterfaceiv Piglit test which passes with the patch, no Piglit regressions. v2: code cleanups (Martin Peres) Signed-off-by: Tapani P?lli Reviewed-by: Martin Peres From daenzer at kemper.freedesktop.org Thu Apr 16 06:52:00 2015 From: daenzer at kemper.freedesktop.org (Michel Dänzer) Date: Wed, 15 Apr 2015 23:52:00 -0700 (PDT) Subject: Mesa (master): gbm: Add GBM_BO_USE_LINEAR flag Message-ID: <20150416065200.AEBE7761EC@kemper.freedesktop.org> Module: Mesa Branch: master Commit: f78b2c432f31a18794b9f62f0a0cfb8eca256151 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f78b2c432f31a18794b9f62f0a0cfb8eca256151 Author: Flora Cui Date: Tue Mar 24 18:34:29 2015 +0800 gbm: Add GBM_BO_USE_LINEAR flag Signed-off-by: Flora Cui Reviewed-by: Jammy Zhou Reviewed-by: Michel D?nzer Reviewed-by: Alex Deucher --- src/gbm/backends/dri/gbm_dri.c | 2 ++ src/gbm/main/gbm.h | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c index c513672..62bdf89 100644 --- a/src/gbm/backends/dri/gbm_dri.c +++ b/src/gbm/backends/dri/gbm_dri.c @@ -858,6 +858,8 @@ gbm_dri_bo_create(struct gbm_device *gbm, dri_use |= __DRI_IMAGE_USE_SCANOUT; if (usage & GBM_BO_USE_CURSOR) dri_use |= __DRI_IMAGE_USE_CURSOR; + if (usage & GBM_BO_USE_LINEAR) + dri_use |= __DRI_IMAGE_USE_LINEAR; /* Gallium drivers requires shared in order to get the handle/stride */ dri_use |= __DRI_IMAGE_USE_SHARE; diff --git a/src/gbm/main/gbm.h b/src/gbm/main/gbm.h index 7b23c26..2708e50 100644 --- a/src/gbm/main/gbm.h +++ b/src/gbm/main/gbm.h @@ -209,6 +209,10 @@ enum gbm_bo_flags { * with GBM_BO_USE_CURSOR. but may not work for other combinations. */ GBM_BO_USE_WRITE = (1 << 3), + /** + * Buffer is linear, i.e. not tiled. + */ + GBM_BO_USE_LINEAR = (1 << 4), }; int From jrfonseca at kemper.freedesktop.org Thu Apr 16 09:24:14 2015 From: jrfonseca at kemper.freedesktop.org (Jose Fonseca) Date: Thu, 16 Apr 2015 02:24:14 -0700 (PDT) Subject: Mesa (master): mesa,glsl: rename `interface` to `programInterface`. Message-ID: <20150416092414.6F574761EC@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 037e0e78abf0c312f737d33f3c33e37b22bf226d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=037e0e78abf0c312f737d33f3c33e37b22bf226d Author: Jose Fonseca Date: Thu Apr 16 10:19:57 2015 +0100 mesa,glsl: rename `interface` to `programInterface`. `interface` is a define on Windows -- an alias for `struct` keyword, used when declaring COM interfaces in C or C++. So use instead `programInterface`, therefore matching the name used in GL_ARB_program_interface_query spec/headers, which was renamed exactly for the same reason: "Revision 10, May 10, 2012 (pbrown) - Rename the formal parameter used by the functions in this extension to . Certain versions of the Microsoft C/C++ compiler and/or its headers cause "interface" to be treated as a reserved keyword." Trivial. --- src/glsl/linker.cpp | 8 ++++---- src/mesa/main/shader_query.cpp | 30 +++++++++++++++--------------- src/mesa/main/shaderapi.h | 12 ++++++------ 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index 651ecd8..b6baa5d 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -2552,7 +2552,7 @@ build_stageref(struct gl_shader_program *shProg, const char *name) static bool add_interface_variables(struct gl_shader_program *shProg, - struct gl_shader *sh, GLenum interface) + struct gl_shader *sh, GLenum programInterface) { foreach_in_list(ir_instruction, node, sh->ir) { ir_variable *var = node->as_variable(); @@ -2572,18 +2572,18 @@ add_interface_variables(struct gl_shader_program *shProg, var->data.location != SYSTEM_VALUE_INSTANCE_ID) continue; case ir_var_shader_in: - if (interface != GL_PROGRAM_INPUT) + if (programInterface != GL_PROGRAM_INPUT) continue; break; case ir_var_shader_out: - if (interface != GL_PROGRAM_OUTPUT) + if (programInterface != GL_PROGRAM_OUTPUT) continue; break; default: continue; }; - if (!add_program_resource(shProg, interface, var, + if (!add_program_resource(shProg, programInterface, var, build_stageref(shProg, var->name))) return false; } diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp index c7062ab..b5f1d08 100644 --- a/src/mesa/main/shader_query.cpp +++ b/src/mesa/main/shader_query.cpp @@ -544,18 +544,18 @@ array_index_of_resource(struct gl_program_resource *res, */ struct gl_program_resource * _mesa_program_resource_find_name(struct gl_shader_program *shProg, - GLenum interface, const char *name) + GLenum programInterface, const char *name) { struct gl_program_resource *res = shProg->ProgramResourceList; for (unsigned i = 0; i < shProg->NumProgramResourceList; i++, res++) { - if (res->Type != interface) + if (res->Type != programInterface) continue; /* Resource basename. */ const char *rname = _mesa_program_resource_name(res); unsigned baselen = strlen(rname); - switch (interface) { + switch (programInterface) { case GL_TRANSFORM_FEEDBACK_VARYING: case GL_UNIFORM_BLOCK: case GL_UNIFORM: @@ -620,13 +620,13 @@ _mesa_program_resource_index(struct gl_shader_program *shProg, */ struct gl_program_resource * _mesa_program_resource_find_index(struct gl_shader_program *shProg, - GLenum interface, GLuint index) + GLenum programInterface, GLuint index) { struct gl_program_resource *res = shProg->ProgramResourceList; int idx = -1; for (unsigned i = 0; i < shProg->NumProgramResourceList; i++, res++) { - if (res->Type != interface) + if (res->Type != programInterface) continue; switch (res->Type) { @@ -653,7 +653,7 @@ _mesa_program_resource_find_index(struct gl_shader_program *shProg, */ bool _mesa_get_program_resource_name(struct gl_shader_program *shProg, - GLenum interface, GLuint index, + GLenum programInterface, GLuint index, GLsizei bufSize, GLsizei *length, GLchar *name, const char *caller) { @@ -661,7 +661,7 @@ _mesa_get_program_resource_name(struct gl_shader_program *shProg, /* Find resource with given interface and index. */ struct gl_program_resource *res = - _mesa_program_resource_find_index(shProg, interface, index); + _mesa_program_resource_find_index(shProg, programInterface, index); /* The error INVALID_VALUE is generated if is greater than * or equal to the number of entries in the active resource list for @@ -704,7 +704,7 @@ _mesa_get_program_resource_name(struct gl_shader_program *shProg, * Note, that TCS outputs and TES inputs should not have index appended * either. */ - bool add_index = !(((interface == GL_PROGRAM_INPUT) && + bool add_index = !(((programInterface == GL_PROGRAM_INPUT) && res->StageReferences & (1 << MESA_SHADER_GEOMETRY))); if (add_index && _mesa_program_resource_array_size(res)) { @@ -777,10 +777,10 @@ program_resource_location(struct gl_shader_program *shProg, */ GLint _mesa_program_resource_location(struct gl_shader_program *shProg, - GLenum interface, const char *name) + GLenum programInterface, const char *name) { struct gl_program_resource *res = - _mesa_program_resource_find_name(shProg, interface, name); + _mesa_program_resource_find_name(shProg, programInterface, name); /* Resource not found. */ if (!res) @@ -795,10 +795,10 @@ _mesa_program_resource_location(struct gl_shader_program *shProg, */ GLint _mesa_program_resource_location_index(struct gl_shader_program *shProg, - GLenum interface, const char *name) + GLenum programInterface, const char *name) { struct gl_program_resource *res = - _mesa_program_resource_find_name(shProg, interface, name); + _mesa_program_resource_find_name(shProg, programInterface, name); /* Non-existent variable or resource is not referenced by fragment stage. */ if (!res || !(res->StageReferences & (1 << MESA_SHADER_FRAGMENT))) @@ -1033,7 +1033,7 @@ invalid_operation: extern void _mesa_get_program_resourceiv(struct gl_shader_program *shProg, - GLenum interface, GLuint index, GLsizei propCount, + GLenum programInterface, GLuint index, GLsizei propCount, const GLenum *props, GLsizei bufSize, GLsizei *length, GLint *params) { @@ -1043,13 +1043,13 @@ _mesa_get_program_resourceiv(struct gl_shader_program *shProg, GLsizei amount = 0; struct gl_program_resource *res = - _mesa_program_resource_find_index(shProg, interface, index); + _mesa_program_resource_find_index(shProg, programInterface, index); /* No such resource found or bufSize negative. */ if (!res || bufSize < 0) { _mesa_error(ctx, GL_INVALID_VALUE, "glGetProgramResourceiv(%s index %d bufSize %d)", - _mesa_lookup_enum_by_nr(interface), index, bufSize); + _mesa_lookup_enum_by_nr(programInterface), index, bufSize); return; } diff --git a/src/mesa/main/shaderapi.h b/src/mesa/main/shaderapi.h index 0cd2fad..aba6d5d 100644 --- a/src/mesa/main/shaderapi.h +++ b/src/mesa/main/shaderapi.h @@ -232,25 +232,25 @@ _mesa_program_resource_index(struct gl_shader_program *shProg, extern struct gl_program_resource * _mesa_program_resource_find_name(struct gl_shader_program *shProg, - GLenum interface, const char *name); + GLenum programInterface, const char *name); extern struct gl_program_resource * _mesa_program_resource_find_index(struct gl_shader_program *shProg, - GLenum interface, GLuint index); + GLenum programInterface, GLuint index); extern bool _mesa_get_program_resource_name(struct gl_shader_program *shProg, - GLenum interface, GLuint index, + GLenum programInterface, GLuint index, GLsizei bufSize, GLsizei *length, GLchar *name, const char *caller); extern GLint _mesa_program_resource_location(struct gl_shader_program *shProg, - GLenum interface, const char *name); + GLenum programInterface, const char *name); extern GLint _mesa_program_resource_location_index(struct gl_shader_program *shProg, - GLenum interface, const char *name); + GLenum programInterface, const char *name); extern unsigned _mesa_program_resource_prop(struct gl_shader_program *shProg, @@ -259,7 +259,7 @@ _mesa_program_resource_prop(struct gl_shader_program *shProg, extern void _mesa_get_program_resourceiv(struct gl_shader_program *shProg, - GLenum interface, GLuint index, + GLenum programInterface, GLuint index, GLsizei propCount, const GLenum *props, GLsizei bufSize, GLsizei *length, GLint *params); From nroberts at kemper.freedesktop.org Thu Apr 16 12:09:17 2015 From: nroberts at kemper.freedesktop.org (Neil Roberts) Date: Thu, 16 Apr 2015 05:09:17 -0700 (PDT) Subject: Mesa (master): i965/skl: Add the header for constant loads outside of the generator Message-ID: <20150416120917.68A2D7626E@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 33f73e93ff6e14f72153d3df7e80763137fcb943 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=33f73e93ff6e14f72153d3df7e80763137fcb943 Author: Neil Roberts Date: Tue Mar 24 15:52:20 2015 +0000 i965/skl: Add the header for constant loads outside of the generator Commit 5a06ee738 added a step to the generator to set up the message header when generating the VS_OPCODE_PULL_CONSTANT_LOAD_GEN7 instruction. That pseudo opcode is implemented in terms of multiple actual opcodes, one of which writes to one of the source registers in order to set up the message header. This causes problems because the scheduler isn't aware that the source register is written to and it can end up reorganising the instructions incorrectly such that the write to the source register overwrites a needed value from a previous instruction. This problem was presenting itself as a rendering error in the weapon in Enemy Territory: Quake Wars. Since commit 588859e1 there is an additional problem that the double register allocated to include the message header would end up being split into two. This wasn't happening previously because the code to split registers was explicitly avoided for instructions that are sending from the GRF. This patch fixes both problems by splitting the code to set up the message header into a new pseudo opcode so that it will be done outside of the generator. This new opcode has the header register as a destination so the scheduler can recognise that the register is written to. This has the additional benefit that the scheduler can optimise the message header slightly better by moving the mov instructions further away from the send instructions. On Skylake it appears to fix the following three Piglit tests without causing any regressions: gs-float-array-variable-index gs-mat3x4-row-major gs-mat4x3-row-major I think we actually may need to do something similar for the fs backend and possibly for message headers from regular texture sampling but I'm not entirely sure. v2: Make sure the exec-size is retained as 8 for the mov instruction to initialise the header from g0. This was accidentally lost during a rebase on top of 07c571a39fa1. Split the patch into two so that the helper function is a separate change. Fix emitting the MOV instruction on Gen7. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89058 Reviewed-by: Ben Widawsky --- src/mesa/drivers/dri/i965/brw_defines.h | 1 + src/mesa/drivers/dri/i965/brw_shader.cpp | 4 ++ src/mesa/drivers/dri/i965/brw_vec4.h | 2 + .../dri/i965/brw_vec4_dead_code_eliminate.cpp | 1 + src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 52 ++++++++++---------- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 38 ++++++++++---- 6 files changed, 63 insertions(+), 35 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index da6ed5b..a97a944 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -948,6 +948,7 @@ enum opcode { VS_OPCODE_URB_WRITE, VS_OPCODE_PULL_CONSTANT_LOAD, VS_OPCODE_PULL_CONSTANT_LOAD_GEN7, + VS_OPCODE_SET_SIMD4X2_HEADER_GEN9, VS_OPCODE_UNPACK_FLAGS_SIMD4X2, /** diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 335a800..0d6ac0c 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -568,6 +568,10 @@ brw_instruction_name(enum opcode op) return "pull_constant_load"; case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: return "pull_constant_load_gen7"; + + case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9: + return "set_simd4x2_header_gen9"; + case VS_OPCODE_UNPACK_FLAGS_SIMD4X2: return "unpack_flags_simd4x2"; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 0363924..a0ee2cc 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -500,6 +500,8 @@ private: struct brw_reg dst, struct brw_reg surf_index, struct brw_reg offset); + void generate_set_simd4x2_header_gen9(vec4_instruction *inst, + struct brw_reg dst); void generate_unpack_flags(struct brw_reg dst); void generate_untyped_atomic(vec4_instruction *inst, diff --git a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp index 980e266..70d2af5 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp @@ -44,6 +44,7 @@ can_do_writemask(const struct brw_context *brw, case SHADER_OPCODE_GEN4_SCRATCH_READ: case VS_OPCODE_PULL_CONSTANT_LOAD: case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: + case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9: return false; default: /* The MATH instruction on Gen6 only executes in align1 mode, which does diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index e4addf7..b22a555 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -1039,38 +1039,18 @@ vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst, { assert(surf_index.type == BRW_REGISTER_TYPE_UD); - struct brw_reg src = offset; - bool header_present = false; - int mlen = 1; - - if (brw->gen >= 9) { - /* Skylake requires a message header in order to use SIMD4x2 mode. */ - src = retype(brw_vec4_grf(offset.nr - 1, 0), BRW_REGISTER_TYPE_UD); - mlen = 2; - header_present = true; - - brw_push_insn_state(p); - brw_set_default_mask_control(p, BRW_MASK_DISABLE); - brw_MOV(p, vec8(src), retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); - brw_set_default_access_mode(p, BRW_ALIGN_1); - - brw_MOV(p, get_element_ud(src, 2), - brw_imm_ud(GEN9_SAMPLER_SIMD_MODE_EXTENSION_SIMD4X2)); - brw_pop_insn_state(p); - } - if (surf_index.file == BRW_IMMEDIATE_VALUE) { brw_inst *insn = brw_next_insn(p, BRW_OPCODE_SEND); brw_set_dest(p, insn, dst); - brw_set_src0(p, insn, src); + brw_set_src0(p, insn, offset); brw_set_sampler_message(p, insn, surf_index.dw1.ud, 0, /* LD message ignores sampler unit */ GEN5_SAMPLER_MESSAGE_SAMPLE_LD, 1, /* rlen */ - mlen, - header_present, + inst->mlen, + inst->header_present, BRW_SAMPLER_SIMD_MODE_SIMD4X2, 0); @@ -1095,14 +1075,14 @@ vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst, /* dst = send(offset, a0.0 | ) */ brw_inst *insn = brw_send_indirect_message( - p, BRW_SFID_SAMPLER, dst, src, addr); + p, BRW_SFID_SAMPLER, dst, offset, addr); brw_set_sampler_message(p, insn, 0 /* surface */, 0 /* sampler */, GEN5_SAMPLER_MESSAGE_SAMPLE_LD, 1 /* rlen */, - mlen /* mlen */, - header_present /* header */, + inst->mlen, + inst->header_present, BRW_SAMPLER_SIMD_MODE_SIMD4X2, 0); @@ -1113,6 +1093,22 @@ vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst, } void +vec4_generator::generate_set_simd4x2_header_gen9(vec4_instruction *inst, + struct brw_reg dst) +{ + brw_push_insn_state(p); + brw_set_default_mask_control(p, BRW_MASK_DISABLE); + + brw_MOV(p, vec8(dst), retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); + + brw_set_default_access_mode(p, BRW_ALIGN_1); + brw_MOV(p, get_element_ud(dst, 2), + brw_imm_ud(GEN9_SAMPLER_SIMD_MODE_EXTENSION_SIMD4X2)); + + brw_pop_insn_state(p); +} + +void vec4_generator::generate_untyped_atomic(vec4_instruction *inst, struct brw_reg dst, struct brw_reg atomic_op, @@ -1435,6 +1431,10 @@ vec4_generator::generate_code(const cfg_t *cfg) generate_pull_constant_load_gen7(inst, dst, src[0], src[1]); break; + case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9: + generate_set_simd4x2_header_gen9(inst, dst); + break; + case GS_OPCODE_URB_WRITE: generate_gs_urb_write(inst); break; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index f7d542b..3d16caa 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1313,16 +1313,36 @@ vec4_visitor::emit_pull_constant_load_reg(dst_reg dst, vec4_instruction *pull; - if (brw->gen >= 7) { - dst_reg grf_offset = dst_reg(this, glsl_type::int_type); + if (brw->gen >= 9) { + /* Gen9+ needs a message header in order to use SIMD4x2 mode */ + src_reg header(this, glsl_type::uvec4_type, 2); - /* We have to use a message header on Skylake to get SIMD4x2 mode. - * Reserve space for the register. - */ - if (brw->gen >= 9) { - grf_offset.reg_offset++; - alloc.sizes[grf_offset.reg] = 2; - } + pull = new(mem_ctx) + vec4_instruction(VS_OPCODE_SET_SIMD4X2_HEADER_GEN9, + dst_reg(header)); + + if (before_inst) + emit_before(before_block, before_inst, pull); + else + emit(pull); + + dst_reg index_reg = retype(offset(dst_reg(header), 1), + offset_reg.type); + pull = MOV(writemask(index_reg, WRITEMASK_X), offset_reg); + + if (before_inst) + emit_before(before_block, before_inst, pull); + else + emit(pull); + + pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7, + dst, + surf_index, + header); + pull->mlen = 2; + pull->header_present = true; + } else if (brw->gen >= 7) { + dst_reg grf_offset = dst_reg(this, glsl_type::int_type); grf_offset.type = offset_reg.type; From nroberts at kemper.freedesktop.org Thu Apr 16 12:09:17 2015 From: nroberts at kemper.freedesktop.org (Neil Roberts) Date: Thu, 16 Apr 2015 05:09:17 -0700 (PDT) Subject: Mesa (master): i965/vec4: Add a helper function to emit VS_OPCODE_PULL_CONSTANT_LOAD Message-ID: <20150416120917.5F97476250@kemper.freedesktop.org> Module: Mesa Branch: master Commit: a9e4cf5d323dbf11e42deda389ed03db571a7df7 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a9e4cf5d323dbf11e42deda389ed03db571a7df7 Author: Neil Roberts Date: Wed Apr 15 14:28:26 2015 +0100 i965/vec4: Add a helper function to emit VS_OPCODE_PULL_CONSTANT_LOAD There were three places in the visitor that had a similar chunk of code to emit the VS_OPCODE_PULL_CONSTANT_LOAD opcode using a register for the offset. This patch combines the chunks into a helper function to reduce the code duplication. It will also be useful in the next patch to expand what happens on Gen9+. This shouldn't introduce any functional changes. Reviewed-by: Ben Widawsky --- src/mesa/drivers/dri/i965/brw_vec4.h | 5 + src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 120 +++++++++++++----------- src/mesa/drivers/dri/i965/brw_vec4_vp.cpp | 27 +----- 3 files changed, 75 insertions(+), 77 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 700ca69..0363924 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -364,6 +364,11 @@ public: dst_reg dst, src_reg orig_src, int base_offset); + void emit_pull_constant_load_reg(dst_reg dst, + src_reg surf_index, + src_reg offset, + bblock_t *before_block, + vec4_instruction *before_inst); src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block, vec4_instruction *inst, src_reg src); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index ffbe04d..f7d542b 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1296,6 +1296,63 @@ vec4_visitor::emit_lrp(const dst_reg &dst, } } +/** + * Emits the instructions needed to perform a pull constant load. before_block + * and before_inst can be NULL in which case the instruction will be appended + * to the end of the instruction list. + */ +void +vec4_visitor::emit_pull_constant_load_reg(dst_reg dst, + src_reg surf_index, + src_reg offset_reg, + bblock_t *before_block, + vec4_instruction *before_inst) +{ + assert((before_inst == NULL && before_block == NULL) || + (before_inst && before_block)); + + vec4_instruction *pull; + + if (brw->gen >= 7) { + dst_reg grf_offset = dst_reg(this, glsl_type::int_type); + + /* We have to use a message header on Skylake to get SIMD4x2 mode. + * Reserve space for the register. + */ + if (brw->gen >= 9) { + grf_offset.reg_offset++; + alloc.sizes[grf_offset.reg] = 2; + } + + grf_offset.type = offset_reg.type; + + pull = MOV(grf_offset, offset_reg); + + if (before_inst) + emit_before(before_block, before_inst, pull); + else + emit(pull); + + pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7, + dst, + surf_index, + src_reg(grf_offset)); + pull->mlen = 1; + } else { + pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD, + dst, + surf_index, + offset_reg); + pull->base_mrf = 14; + pull->mlen = 1; + } + + if (before_inst) + emit_before(before_block, before_inst, pull); + else + emit(pull); +} + void vec4_visitor::visit(ir_expression *ir) { @@ -1774,36 +1831,10 @@ vec4_visitor::visit(ir_expression *ir) emit(SHR(dst_reg(offset), op[1], src_reg(4))); } - if (brw->gen >= 7) { - dst_reg grf_offset = dst_reg(this, glsl_type::int_type); - - /* We have to use a message header on Skylake to get SIMD4x2 mode. - * Reserve space for the register. - */ - if (brw->gen >= 9) { - grf_offset.reg_offset++; - alloc.sizes[grf_offset.reg] = 2; - } - - grf_offset.type = offset.type; - - emit(MOV(grf_offset, offset)); - - vec4_instruction *pull = - emit(new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7, - dst_reg(packed_consts), - surf_index, - src_reg(grf_offset))); - pull->mlen = 1; - } else { - vec4_instruction *pull = - emit(new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD, - dst_reg(packed_consts), - surf_index, - offset)); - pull->base_mrf = 14; - pull->mlen = 1; - } + emit_pull_constant_load_reg(dst_reg(packed_consts), + surf_index, + offset, + NULL, NULL /* before_block/inst */); packed_consts.swizzle = brw_swizzle_for_size(ir->type->vector_elements); packed_consts.swizzle += BRW_SWIZZLE4(const_offset % 16 / 4, @@ -3475,32 +3506,11 @@ vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst, src_reg index = src_reg(prog_data->base.binding_table.pull_constants_start); src_reg offset = get_pull_constant_offset(block, inst, orig_src.reladdr, reg_offset); - vec4_instruction *load; - - if (brw->gen >= 7) { - dst_reg grf_offset = dst_reg(this, glsl_type::int_type); - - /* We have to use a message header on Skylake to get SIMD4x2 mode. - * Reserve space for the register. - */ - if (brw->gen >= 9) { - grf_offset.reg_offset++; - alloc.sizes[grf_offset.reg] = 2; - } - grf_offset.type = offset.type; - emit_before(block, inst, MOV(grf_offset, offset)); - - load = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7, - temp, index, src_reg(grf_offset)); - load->mlen = 1; - } else { - load = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD, - temp, index, offset); - load->base_mrf = 14; - load->mlen = 1; - } - emit_before(block, inst, load); + emit_pull_constant_load_reg(temp, + index, + offset, + block, inst); } /** diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp index c3b0233..8756bef 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp @@ -528,14 +528,6 @@ vec4_vs_visitor::get_vp_src_reg(const prog_src_register &src) /* Add the small constant index to the address register */ src_reg reladdr = src_reg(this, glsl_type::int_type); - /* We have to use a message header on Skylake to get SIMD4x2 mode. - * Reserve space for the register. - */ - if (brw->gen >= 9) { - reladdr.reg_offset++; - alloc.sizes[reladdr.reg] = 2; - } - dst_reg dst_reladdr = dst_reg(reladdr); dst_reladdr.writemask = WRITEMASK_X; emit(ADD(dst_reladdr, this->vp_addr_reg, src_reg(src.Index))); @@ -553,20 +545,11 @@ vec4_vs_visitor::get_vp_src_reg(const prog_src_register &src) result = src_reg(this, glsl_type::vec4_type); src_reg surf_index = src_reg(unsigned(prog_data->base.binding_table.pull_constants_start)); - vec4_instruction *load; - if (brw->gen >= 7) { - load = new(mem_ctx) - vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7, - dst_reg(result), surf_index, reladdr); - load->mlen = 1; - } else { - load = new(mem_ctx) - vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD, - dst_reg(result), surf_index, reladdr); - load->base_mrf = 14; - load->mlen = 1; - } - emit(load); + + emit_pull_constant_load_reg(dst_reg(result), + surf_index, + reladdr, + NULL, NULL /* before_block/inst */); break; } From evelikov at kemper.freedesktop.org Thu Apr 16 13:12:17 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Thu, 16 Apr 2015 06:12:17 -0700 (PDT) Subject: Mesa (master): radeonsi: remove bogus r600-- triple Message-ID: <20150416131217.28B6476250@kemper.freedesktop.org> Module: Mesa Branch: master Commit: a7d018accfd0161510a75ba685e056256de494c2 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a7d018accfd0161510a75ba685e056256de494c2 Author: Emil Velikov Date: Wed Apr 8 19:27:02 2015 +0100 radeonsi: remove bogus r600-- triple As mentioned by Michel D?nzer for LLVM >= 3.6 we create the LLVMTargetMachine (with triple amdgcn--), as we setup the radeonsi context. For older LLVM or hardware (r600) the triple is always r600-- and is created at a later stage - radeon_llvm_compile() Signed-off-by: Emil Velikov Reviewed-by: Michel D?nzer --- src/gallium/drivers/radeonsi/si_pipe.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index ae96b6b..5dc657c 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -85,8 +85,6 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void * LLVMTargetRef r600_target; #if HAVE_LLVM >= 0x0306 const char *triple = "amdgcn--"; -#else - const char *triple = "r600--"; #endif int shader, i; From krh at kemper.freedesktop.org Thu Apr 16 16:24:52 2015 From: krh at kemper.freedesktop.org (Kristian Høgsberg) Date: Thu, 16 Apr 2015 09:24:52 -0700 (PDT) Subject: Mesa (master): i965: Rewrite ir_tex to ir_txl with lod 0 for vertex shaders Message-ID: <20150416162453.02577761C1@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 993a6288f72fa98932df7cdb6f64d9dd645e670d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=993a6288f72fa98932df7cdb6f64d9dd645e670d Author: Kristian H?gsberg Date: Tue Apr 14 15:02:18 2015 +0000 i965: Rewrite ir_tex to ir_txl with lod 0 for vertex shaders The ir_tex opcode turns into a sample or sample_c message, which will try to compute derivatives to determine the lod. This produces garbage for non-fragment shaders where the sample coordinates don't correspond to subspans. We fix this by rewriting the opcode from ir_tex to ir_txl and setting the lod to 0. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89457 Cc: "10.5" Signed-off-by: Kristian H?gsberg Reviewed-by: Kenneth Graunke Reviewed-by: Ian Romanick --- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 0049b2d..4e99366 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1839,6 +1839,15 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst, offset_value.file != BAD_FILE && offset_value.file != IMM; bool coordinate_done = false; + /* The sampler can only meaningfully compute LOD for fragment shader + * messages. For all other stages, we change the opcode to ir_txl and + * hardcode the LOD to 0. + */ + if (stage != MESA_SHADER_FRAGMENT && op == ir_tex) { + op = ir_txl; + lod = fs_reg(0.0f); + } + /* Set up the LOD info */ switch (op) { case ir_tex: From mareko at kemper.freedesktop.org Thu Apr 16 16:36:44 2015 From: mareko at kemper.freedesktop.org (Marek Olšák) Date: Thu, 16 Apr 2015 09:36:44 -0700 (PDT) Subject: Mesa (master): st/mesa: add a debug option to compile shaders at link time Message-ID: <20150416163644.D6E22761C1@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 99eef3b8b324d3be6f3b8f2a34c95006d8205599 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=99eef3b8b324d3be6f3b8f2a34c95006d8205599 Author: Marek Ol??k Date: Fri Apr 10 23:58:34 2015 +0200 st/mesa: add a debug option to compile shaders at link time v2: fix crashes Tested-by: Tom Stellard Reviewed-by: Dave Airlie --- src/mesa/state_tracker/st_cb_program.c | 4 +++ src/mesa/state_tracker/st_debug.c | 1 + src/mesa/state_tracker/st_debug.h | 1 + src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 3 +- src/mesa/state_tracker/st_program.c | 47 ++++++++++++++++++++++++++-- src/mesa/state_tracker/st_program.h | 3 ++ 6 files changed, 55 insertions(+), 4 deletions(-) diff --git a/src/mesa/state_tracker/st_cb_program.c b/src/mesa/state_tracker/st_cb_program.c index aa301d8..c382d7d 100644 --- a/src/mesa/state_tracker/st_cb_program.c +++ b/src/mesa/state_tracker/st_cb_program.c @@ -41,6 +41,7 @@ #include "draw/draw_context.h" #include "st_context.h" +#include "st_debug.h" #include "st_program.h" #include "st_mesa_to_tgsi.h" #include "st_cb_program.h" @@ -214,6 +215,9 @@ st_program_string_notify( struct gl_context *ctx, st->dirty.st |= ST_NEW_VERTEX_PROGRAM; } + if (ST_DEBUG & DEBUG_PRECOMPILE) + st_precompile_shader_variant(st, prog); + /* XXX check if program is legal, within limits */ return GL_TRUE; } diff --git a/src/mesa/state_tracker/st_debug.c b/src/mesa/state_tracker/st_debug.c index de3e3a9..50891c1 100644 --- a/src/mesa/state_tracker/st_debug.c +++ b/src/mesa/state_tracker/st_debug.c @@ -56,6 +56,7 @@ static const struct debug_named_value st_debug_flags[] = { { "draw", DEBUG_DRAW, NULL }, { "buffer", DEBUG_BUFFER, NULL }, { "wf", DEBUG_WIREFRAME, NULL }, + { "precompile", DEBUG_PRECOMPILE, NULL }, DEBUG_NAMED_VALUE_END }; diff --git a/src/mesa/state_tracker/st_debug.h b/src/mesa/state_tracker/st_debug.h index cc81978..288eccf 100644 --- a/src/mesa/state_tracker/st_debug.h +++ b/src/mesa/state_tracker/st_debug.h @@ -47,6 +47,7 @@ st_print_current(void); #define DEBUG_DRAW 0x100 #define DEBUG_BUFFER 0x200 #define DEBUG_WIREFRAME 0x400 +#define DEBUG_PRECOMPILE 0x800 #ifdef DEBUG extern int ST_DEBUG; diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 04258a1..111616d 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -5383,7 +5383,8 @@ st_translate_program( * program constant) has to happen before creating this linkage. */ for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - if (program->shader_program->_LinkedShaders[i] == NULL) + if (program->shader_program->_LinkedShaders[i] == NULL || + program->shader_program->_LinkedShaders[i]->Program == NULL) continue; _mesa_associate_uniform_storage(ctx, program->shader_program, diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index 3b0ac4a..d93b3c7 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -185,9 +185,6 @@ st_prepare_vertex_program(struct gl_context *ctx, if (stvp->Base.IsPositionInvariant) _mesa_insert_mvp_code(ctx, &stvp->Base); - if (!stvp->glsl_to_tgsi) - assert(stvp->Base.Base.NumInstructions > 1); - /* * Determine number of inputs, the mappings between VERT_ATTRIB_x * and TGSI generic input indexes, plus input attrib semantic info. @@ -1318,3 +1315,47 @@ st_print_current_vertex_program(void) } } } + + +/** + * Compile one shader variant. + */ +void +st_precompile_shader_variant(struct st_context *st, + struct gl_program *prog) +{ + switch (prog->Target) { + case GL_VERTEX_PROGRAM_ARB: { + struct st_vertex_program *p = (struct st_vertex_program *)prog; + struct st_vp_variant_key key; + + memset(&key, 0, sizeof(key)); + key.st = st; + st_get_vp_variant(st, p, &key); + break; + } + + case GL_GEOMETRY_PROGRAM_NV: { + struct st_geometry_program *p = (struct st_geometry_program *)prog; + struct st_gp_variant_key key; + + memset(&key, 0, sizeof(key)); + key.st = st; + st_get_gp_variant(st, p, &key); + break; + } + + case GL_FRAGMENT_PROGRAM_ARB: { + struct st_fragment_program *p = (struct st_fragment_program *)prog; + struct st_fp_variant_key key; + + memset(&key, 0, sizeof(key)); + key.st = st; + st_get_fp_variant(st, p, &key); + break; + } + + default: + assert(0); + } +} diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h index 451d7bb..b2c86fa 100644 --- a/src/mesa/state_tracker/st_program.h +++ b/src/mesa/state_tracker/st_program.h @@ -329,6 +329,9 @@ st_destroy_program_variants(struct st_context *st); extern void st_print_current_vertex_program(void); +extern void +st_precompile_shader_variant(struct st_context *st, + struct gl_program *prog); #ifdef __cplusplus } From mareko at kemper.freedesktop.org Thu Apr 16 16:36:44 2015 From: mareko at kemper.freedesktop.org (Marek Olšák) Date: Thu, 16 Apr 2015 09:36:44 -0700 (PDT) Subject: Mesa (master): radeonsi: add a debug option to compile shaders when they' re created Message-ID: <20150416163644.E3C53761C1@kemper.freedesktop.org> Module: Mesa Branch: master Commit: b79c620663dc4eab1ad342a7961fa7aa16cff562 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b79c620663dc4eab1ad342a7961fa7aa16cff562 Author: Marek Ol??k Date: Fri Apr 10 23:58:34 2015 +0200 radeonsi: add a debug option to compile shaders when they're created Tested-by: Tom Stellard --- src/gallium/drivers/radeon/r600_pipe_common.c | 1 + src/gallium/drivers/radeon/r600_pipe_common.h | 1 + src/gallium/drivers/radeonsi/si_state_shaders.c | 4 ++++ 3 files changed, 6 insertions(+) diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index 0ef5fc2..2b27e0a 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -323,6 +323,7 @@ static const struct debug_named_value common_debug_options[] = { { "notiling", DBG_NO_TILING, "Disable tiling" }, { "switch_on_eop", DBG_SWITCH_ON_EOP, "Program WD/IA to switch on end-of-packet." }, { "forcedma", DBG_FORCE_DMA, "Use asynchronous DMA for all operations when possible." }, + { "precompile", DBG_PRECOMPILE, "Compile one shader variant at shader creation." }, DEBUG_NAMED_VALUE_END /* must be last */ }; diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index a08d08c..febd2a1 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -83,6 +83,7 @@ #define DBG_NO_TILING (1 << 14) #define DBG_SWITCH_ON_EOP (1 << 15) #define DBG_FORCE_DMA (1 << 16) +#define DBG_PRECOMPILE (1 << 17) /* The maximum allowed bit is 20. */ #define R600_MAP_BUFFER_ALIGNMENT 64 diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index b0a6fb9..1bbc6b3 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -465,6 +465,7 @@ static void *si_create_shader_state(struct pipe_context *ctx, const struct pipe_shader_state *state, unsigned pipe_shader_type) { + struct si_screen *sscreen = (struct si_screen *)ctx->screen; struct si_shader_selector *sel = CALLOC_STRUCT(si_shader_selector); int i; @@ -494,6 +495,9 @@ static void *si_create_shader_state(struct pipe_context *ctx, } } + if (sscreen->b.debug_flags & DBG_PRECOMPILE) + si_shader_select(ctx, sel); + return sel; } From mareko at kemper.freedesktop.org Thu Apr 16 16:36:45 2015 From: mareko at kemper.freedesktop.org (Marek Olšák) Date: Thu, 16 Apr 2015 09:36:45 -0700 (PDT) Subject: Mesa (master): glsl_to_tgsi: cleanup includes Message-ID: <20150416163645.09815761C1@kemper.freedesktop.org> Module: Mesa Branch: master Commit: d3045d391b0b06faf4fb6be0394ff64c415cf336 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d3045d391b0b06faf4fb6be0394ff64c415cf336 Author: Marek Ol??k Date: Wed Mar 18 12:38:19 2015 +0100 glsl_to_tgsi: cleanup includes Reviewed-by: Brian Paul --- src/mesa/state_tracker/st_atom_shader.c | 3 +-- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 25 +++++-------------------- src/mesa/state_tracker/st_glsl_to_tgsi.h | 7 ++++--- 3 files changed, 10 insertions(+), 25 deletions(-) diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c index 73768ed..629f54f 100644 --- a/src/mesa/state_tracker/st_atom_shader.c +++ b/src/mesa/state_tracker/st_atom_shader.c @@ -40,9 +40,8 @@ #include "program/program.h" #include "pipe/p_context.h" - +#include "pipe/p_shader_tokens.h" #include "util/u_simple_shaders.h" - #include "cso_cache/cso_context.h" #include "st_context.h" diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 111616d..435c126 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -30,40 +30,25 @@ * Translate GLSL IR to TGSI. */ -#include -#include "main/compiler.h" -#include "ir.h" -#include "ir_visitor.h" -#include "ir_expression_flattening.h" -#include "glsl_types.h" +#include "st_glsl_to_tgsi.h" + #include "glsl_parser_extras.h" -#include "../glsl/program.h" #include "ir_optimization.h" -#include "ast.h" -#include "main/mtypes.h" +#include "main/errors.h" #include "main/shaderobj.h" #include "main/uniforms.h" #include "main/shaderapi.h" -#include "program/hash_table.h" #include "program/prog_instruction.h" -#include "program/prog_optimize.h" -#include "program/prog_print.h" -#include "program/program.h" -#include "program/prog_parameter.h" #include "program/sampler.h" -#include "pipe/p_compiler.h" #include "pipe/p_context.h" #include "pipe/p_screen.h" -#include "pipe/p_shader_tokens.h" -#include "pipe/p_state.h" -#include "util/u_math.h" #include "tgsi/tgsi_ureg.h" #include "tgsi/tgsi_info.h" -#include "st_context.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "st_program.h" -#include "st_glsl_to_tgsi.h" #include "st_mesa_to_tgsi.h" diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.h b/src/mesa/state_tracker/st_glsl_to_tgsi.h index 5ed6407..2cb80bc 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.h +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.h @@ -22,17 +22,18 @@ * DEALINGS IN THE SOFTWARE. */ +#include "pipe/p_defines.h" +#include "main/mtypes.h" + #ifdef __cplusplus extern "C" { #endif -#include "main/glheader.h" -#include "tgsi/tgsi_ureg.h" - struct gl_context; struct gl_shader; struct gl_shader_program; struct glsl_to_tgsi_visitor; +struct ureg_program; enum pipe_error st_translate_program( struct gl_context *ctx, From mareko at kemper.freedesktop.org Thu Apr 16 16:36:45 2015 From: mareko at kemper.freedesktop.org (Marek Olšák) Date: Thu, 16 Apr 2015 09:36:45 -0700 (PDT) Subject: Mesa (master): glsl_to_tgsi: fix out-of-bounds constant access and crash for uniforms Message-ID: <20150416163645.17B53761C1@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 14c5bc3b9a6b03a8e42ef79da66d8b81b239cf96 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=14c5bc3b9a6b03a8e42ef79da66d8b81b239cf96 Author: Marek Ol??k Date: Sat Apr 11 13:49:38 2015 +0200 glsl_to_tgsi: fix out-of-bounds constant access and crash for uniforms This fixes piglit shaders at glsl-fs-uniform-array-loop-unroll with immediate shader compilation - it's a compiler test, so it has never been translated to TGSI before. Cc: 10.4 10.5 Reviewed-by: Brian Paul --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 435c126..b732c0b 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -4347,6 +4347,7 @@ struct st_translate { struct ureg_dst arrays[MAX_ARRAYS]; struct ureg_src *constants; + int num_constants; struct ureg_src *immediates; struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; @@ -4557,15 +4558,15 @@ src_register(struct st_translate *t, const st_src_reg *reg) case PROGRAM_UNIFORM: assert(reg->index >= 0); - return t->constants[reg->index]; + return reg->index < t->num_constants ? + t->constants[reg->index] : ureg_imm4f(t->ureg, 0, 0, 0, 0); case PROGRAM_STATE_VAR: case PROGRAM_CONSTANT: /* ie, immediate */ if (reg->has_index2) return ureg_src_register(TGSI_FILE_CONSTANT, reg->index); - else if (reg->index < 0) - return ureg_DECL_constant(t->ureg, 0); else - return t->constants[reg->index]; + return reg->index >= 0 && reg->index < t->num_constants ? + t->constants[reg->index] : ureg_imm4f(t->ureg, 0, 0, 0, 0); case PROGRAM_IMMEDIATE: return t->immediates[reg->index]; @@ -5283,6 +5284,7 @@ st_translate_program( ret = PIPE_ERROR_OUT_OF_MEMORY; goto out; } + t->num_constants = proginfo->Parameters->NumParameters; for (i = 0; i < proginfo->Parameters->NumParameters; i++) { switch (proginfo->Parameters->Parameters[i].Type) { @@ -5383,6 +5385,7 @@ out: free(t->insn); free(t->labels); free(t->constants); + t->num_constants = 0; free(t->immediates); if (t->error) { From mareko at kemper.freedesktop.org Thu Apr 16 16:36:44 2015 From: mareko at kemper.freedesktop.org (Marek Olšák) Date: Thu, 16 Apr 2015 09:36:44 -0700 (PDT) Subject: Mesa (master): mesa/program: remove dead code Message-ID: <20150416163644.EF53C761C1@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 76c2d4498ddfcf127eecd2045d188a59b47b731b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=76c2d4498ddfcf127eecd2045d188a59b47b731b Author: Marek Ol??k Date: Sat Apr 11 19:35:40 2015 +0200 mesa/program: remove dead code Reviewed-by: Matt Turner --- src/mesa/program/prog_parameter.c | 72 ------------------------------------- src/mesa/program/prog_parameter.h | 9 ----- 2 files changed, 81 deletions(-) diff --git a/src/mesa/program/prog_parameter.c b/src/mesa/program/prog_parameter.c index cdfe251..53e9813 100644 --- a/src/mesa/program/prog_parameter.c +++ b/src/mesa/program/prog_parameter.c @@ -190,40 +190,6 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList, /** - * Add a new named constant to the parameter list. - * This will be used when the program contains something like this: - * PARAM myVals = { 0, 1, 2, 3 }; - * - * \param paramList the parameter list - * \param name the name for the constant - * \param values four float values - * \return index/position of the new parameter in the parameter list - */ -GLint -_mesa_add_named_constant(struct gl_program_parameter_list *paramList, - const char *name, const gl_constant_value values[4], - GLuint size) -{ - /* first check if this is a duplicate constant */ - GLint pos; - for (pos = 0; pos < (GLint)paramList->NumParameters; pos++) { - const gl_constant_value *pvals = paramList->ParameterValues[pos]; - if (pvals[0].u == values[0].u && - pvals[1].u == values[1].u && - pvals[2].u == values[2].u && - pvals[3].u == values[3].u && - strcmp(paramList->Parameters[pos].Name, name) == 0) { - /* Same name and value is already in the param list - reuse it */ - return pos; - } - } - /* not found, add new parameter */ - return _mesa_add_parameter(paramList, PROGRAM_CONSTANT, name, - size, GL_NONE, values, NULL); -} - - -/** * Add a new unnamed constant to the parameter list. This will be used * when a fragment/vertex program contains something like this: * MOV r, { 0, 1, 2, 3 }; @@ -303,28 +269,6 @@ _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList, swizzleOut); } -#if 0 /* not used yet */ -/** - * Returns the number of 4-component registers needed to store a piece - * of GL state. For matrices this may be as many as 4 registers, - * everything else needs - * just 1 register. - */ -static GLuint -sizeof_state_reference(const GLint *stateTokens) -{ - if (stateTokens[0] == STATE_MATRIX) { - GLuint rows = stateTokens[4] - stateTokens[3] + 1; - assert(rows >= 1); - assert(rows <= 4); - return rows; - } - else { - return 1; - } -} -#endif - /** * Add a new state reference to the parameter list. @@ -365,22 +309,6 @@ _mesa_add_state_reference(struct gl_program_parameter_list *paramList, /** - * Lookup a parameter value by name in the given parameter list. - * \return pointer to the float[4] values. - */ -gl_constant_value * -_mesa_lookup_parameter_value(const struct gl_program_parameter_list *paramList, - GLsizei nameLen, const char *name) -{ - GLint i = _mesa_lookup_parameter_index(paramList, nameLen, name); - if (i < 0) - return NULL; - else - return paramList->ParameterValues[i]; -} - - -/** * Given a program parameter name, find its position in the list of parameters. * \param paramList the parameter list to search * \param nameLen length of name (in chars). diff --git a/src/mesa/program/prog_parameter.h b/src/mesa/program/prog_parameter.h index 6b3b3c2..74a5fd9 100644 --- a/src/mesa/program/prog_parameter.h +++ b/src/mesa/program/prog_parameter.h @@ -120,11 +120,6 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList, const gl_state_index state[STATE_LENGTH]); extern GLint -_mesa_add_named_constant(struct gl_program_parameter_list *paramList, - const char *name, const gl_constant_value values[4], - GLuint size); - -extern GLint _mesa_add_typed_unnamed_constant(struct gl_program_parameter_list *paramList, const gl_constant_value values[4], GLuint size, GLenum datatype, GLuint *swizzleOut); @@ -138,10 +133,6 @@ extern GLint _mesa_add_state_reference(struct gl_program_parameter_list *paramList, const gl_state_index stateTokens[STATE_LENGTH]); -extern gl_constant_value * -_mesa_lookup_parameter_value(const struct gl_program_parameter_list *paramList, - GLsizei nameLen, const char *name); - extern GLint _mesa_lookup_parameter_index(const struct gl_program_parameter_list *paramList, GLsizei nameLen, const char *name); From mareko at kemper.freedesktop.org Thu Apr 16 16:36:45 2015 From: mareko at kemper.freedesktop.org (Marek Olšák) Date: Thu, 16 Apr 2015 09:36:45 -0700 (PDT) Subject: Mesa (master): glsl_to_tgsi: don' t use a potentially-undefined immediate for ir_query_levels Message-ID: <20150416163645.252C9761C1@kemper.freedesktop.org> Module: Mesa Branch: master Commit: dcc74d47c40bf117f2dfaa359f9de7faef2c2200 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=dcc74d47c40bf117f2dfaa359f9de7faef2c2200 Author: Marek Ol??k Date: Sat Apr 11 14:55:26 2015 +0200 glsl_to_tgsi: don't use a potentially-undefined immediate for ir_query_levels Cc: 10.4 10.5 Reviewed-by: Brian Paul --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index b732c0b..5344ff9 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -3015,7 +3015,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) break; case ir_query_levels: opcode = TGSI_OPCODE_TXQ; - lod_info = st_src_reg(PROGRAM_IMMEDIATE, 0, GLSL_TYPE_INT); + lod_info = undef_src; levels_src = get_temp(ir->type); break; case ir_txf: @@ -4550,7 +4550,7 @@ src_register(struct st_translate *t, const st_src_reg *reg) { switch(reg->file) { case PROGRAM_UNDEFINED: - return ureg_src_undef(); + return ureg_imm4f(t->ureg, 0, 0, 0, 0); case PROGRAM_TEMPORARY: case PROGRAM_ARRAY: @@ -4751,10 +4751,8 @@ compile_tgsi_instruction(struct st_translate *t, inst->saturate, clamp_dst_color_output); - for (i = 0; i < num_src; i++) { - assert(inst->src[i].file != PROGRAM_UNDEFINED); + for (i = 0; i < num_src; i++) src[i] = translate_src(t, &inst->src[i]); - } switch(inst->op) { case TGSI_OPCODE_BGNLOOP: From mareko at kemper.freedesktop.org Thu Apr 16 16:36:45 2015 From: mareko at kemper.freedesktop.org (Marek Olšák) Date: Thu, 16 Apr 2015 09:36:45 -0700 (PDT) Subject: Mesa (master): glsl_to_tgsi: add STATE_FB_WPOS_Y_TRANSFORM at link time Message-ID: <20150416163645.4119D761C1@kemper.freedesktop.org> Module: Mesa Branch: master Commit: bb5df7350b9ac780389bce9c7642cb88681a4b2d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=bb5df7350b9ac780389bce9c7642cb88681a4b2d Author: Marek Ol??k Date: Sat Apr 11 20:01:22 2015 +0200 glsl_to_tgsi: add STATE_FB_WPOS_Y_TRANSFORM at link time This will allow removing the uniform storage re-association during TGSI generation at draw time. Reviewed-by: Brian Paul --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 44 +++++++++++++++------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index a38f1b6..9401dcb 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -313,6 +313,7 @@ public: int num_address_regs; int samplers_used; bool indirect_addr_consts; + int wpos_transform_const; int glsl_version; bool native_integers; @@ -3337,6 +3338,7 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() num_address_regs = 0; samplers_used = 0; indirect_addr_consts = false; + wpos_transform_const = -1; glsl_version = 0; native_integers = false; mem_ctx = ralloc_context(NULL); @@ -4824,28 +4826,19 @@ compile_tgsi_instruction(struct st_translate *t, */ static void emit_wpos_adjustment( struct st_translate *t, - const struct gl_program *program, + int wpos_transform_const, boolean invert, GLfloat adjX, GLfloat adjY[2]) { struct ureg_program *ureg = t->ureg; + assert(wpos_transform_const >= 0); + /* Fragment program uses fragment position input. * Need to replace instances of INPUT[WPOS] with temp T - * where T = INPUT[WPOS] by y is inverted. - */ - static const gl_state_index wposTransformState[STATE_LENGTH] - = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, - (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 }; - - /* XXX: note we are modifying the incoming shader here! Need to - * do this before emitting the constant decls below, or this - * will be missed: + * where T = INPUT[WPOS] is inverted by Y. */ - unsigned wposTransConst = _mesa_add_state_reference(program->Parameters, - wposTransformState); - - struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst ); + struct ureg_src wpostrans = ureg_DECL_constant(ureg, wpos_transform_const); struct ureg_dst wpos_temp = ureg_DECL_temporary( ureg ); struct ureg_src wpos_input = t->inputs[t->inputMapping[VARYING_SLOT_POS]]; @@ -4909,7 +4902,8 @@ static void emit_wpos(struct st_context *st, struct st_translate *t, const struct gl_program *program, - struct ureg_program *ureg) + struct ureg_program *ureg, + int wpos_transform_const) { const struct gl_fragment_program *fp = (const struct gl_fragment_program *) program; @@ -5006,7 +5000,7 @@ emit_wpos(struct st_context *st, /* we invert after adjustment so that we avoid the MOV to temporary, * and reuse the adjustment ADD instead */ - emit_wpos_adjustment(t, program, invert, adjX, adjY); + emit_wpos_adjustment(t, wpos_transform_const, invert, adjX, adjY); } /** @@ -5145,10 +5139,9 @@ st_translate_program( } if (proginfo->InputsRead & VARYING_BIT_POS) { - /* Must do this after setting up t->inputs, and before - * emitting constant references, below: - */ - emit_wpos(st_context(ctx), t, proginfo, ureg); + /* Must do this after setting up t->inputs. */ + emit_wpos(st_context(ctx), t, proginfo, ureg, + program->wpos_transform_const); } if (proginfo->InputsRead & VARYING_BIT_FACE) @@ -5539,6 +5532,17 @@ get_mesa_program(struct gl_context *ctx, do_set_program_inouts(shader->ir, prog, shader->Stage); count_resources(v, prog); + /* This must be done before the uniform storage is associated. */ + if (shader->Type == GL_FRAGMENT_SHADER && + prog->InputsRead & VARYING_BIT_POS){ + static const gl_state_index wposTransformState[STATE_LENGTH] = { + STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM + }; + + v->wpos_transform_const = _mesa_add_state_reference(prog->Parameters, + wposTransformState); + } + _mesa_reference_program(ctx, &shader->Program, prog); /* This has to be done last. Any operation the can cause From mareko at kemper.freedesktop.org Thu Apr 16 16:36:45 2015 From: mareko at kemper.freedesktop.org (Marek Olšák) Date: Thu, 16 Apr 2015 09:36:45 -0700 (PDT) Subject: Mesa (master): glsl_to_tgsi: only associate the uniform storage once at link time Message-ID: <20150416163645.4F593761C1@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 0d46440c3a31ee9bd35bb48de419332c65761205 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=0d46440c3a31ee9bd35bb48de419332c65761205 Author: Marek Ol??k Date: Sat Apr 11 20:05:41 2015 +0200 glsl_to_tgsi: only associate the uniform storage once at link time This hack is no longer needed. (see the previous commit) Reviewed-by: Brian Paul --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 9401dcb..fa390c9 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -5117,15 +5117,6 @@ st_translate_program( t->outputMapping = outputMapping; t->ureg = ureg; - if (program->shader_program) { - for (i = 0; i < program->shader_program->NumUserUniformStorage; i++) { - struct gl_uniform_storage *const storage = - &program->shader_program->UniformStorage[i]; - - _mesa_uniform_detach_all_driver_storage(storage); - } - } - /* * Declare input attributes. */ @@ -5360,21 +5351,6 @@ st_translate_program( t->insn[t->labels[i].branch_target]); } - if (program->shader_program) { - /* This has to be done last. Any operation the can cause - * prog->ParameterValues to get reallocated (e.g., anything that adds a - * program constant) has to happen before creating this linkage. - */ - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - if (program->shader_program->_LinkedShaders[i] == NULL || - program->shader_program->_LinkedShaders[i]->Program == NULL) - continue; - - _mesa_associate_uniform_storage(ctx, program->shader_program, - program->shader_program->_LinkedShaders[i]->Program->Parameters); - } - } - out: if (t) { free(t->temps); From mareko at kemper.freedesktop.org Thu Apr 16 16:36:45 2015 From: mareko at kemper.freedesktop.org (Marek Olšák) Date: Thu, 16 Apr 2015 09:36:45 -0700 (PDT) Subject: Mesa (master): configure.ac: print LLVM_LDFLAGS Message-ID: <20150416163645.5D7B1761C1@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 61293bfcedcab450e1a6f3301152bda5f6e348ed URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=61293bfcedcab450e1a6f3301152bda5f6e348ed Author: Marek Ol??k Date: Mon Apr 13 21:44:02 2015 +0200 configure.ac: print LLVM_LDFLAGS Reviewed-by: Brian Paul Reviewed-by: Emil Velikov --- configure.ac | 1 + 1 file changed, 1 insertion(+) diff --git a/configure.ac b/configure.ac index 9e8c1d8..6ccf3b4 100644 --- a/configure.ac +++ b/configure.ac @@ -2543,6 +2543,7 @@ if test "x$MESA_LLVM" = x1; then echo " LLVM_CFLAGS: $LLVM_CFLAGS" echo " LLVM_CXXFLAGS: $LLVM_CXXFLAGS" echo " LLVM_CPPFLAGS: $LLVM_CPPFLAGS" + echo " LLVM_LDFLAGS: $LLVM_LDFLAGS" echo "" fi echo " PYTHON2: $PYTHON2" From mareko at kemper.freedesktop.org Thu Apr 16 16:36:45 2015 From: mareko at kemper.freedesktop.org (Marek Olšák) Date: Thu, 16 Apr 2015 09:36:45 -0700 (PDT) Subject: Mesa (master): glsl_to_tgsi: add assertions for detecting out-of-bounds immediates access Message-ID: <20150416163645.33D1F761C1@kemper.freedesktop.org> Module: Mesa Branch: master Commit: e2066a4344e05ab66442e5de5a3caa19f7809323 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e2066a4344e05ab66442e5de5a3caa19f7809323 Author: Marek Ol??k Date: Sat Apr 11 14:40:09 2015 +0200 glsl_to_tgsi: add assertions for detecting out-of-bounds immediates access Reviewed-by: Brian Paul --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 5344ff9..a38f1b6 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -4349,6 +4349,7 @@ struct st_translate { struct ureg_src *constants; int num_constants; struct ureg_src *immediates; + int num_immediates; struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; struct ureg_dst address[3]; @@ -4569,6 +4570,7 @@ src_register(struct st_translate *t, const st_src_reg *reg) t->constants[reg->index] : ureg_imm4f(t->ureg, 0, 0, 0, 0); case PROGRAM_IMMEDIATE: + assert(reg->index >= 0 && reg->index < t->num_immediates); return t->immediates[reg->index]; case PROGRAM_INPUT: @@ -4689,6 +4691,7 @@ translate_tex_offset(struct st_translate *t, switch (in_offset->file) { case PROGRAM_IMMEDIATE: + assert(in_offset->index >= 0 && in_offset->index < t->num_immediates); imm_src = t->immediates[in_offset->index]; offset.File = imm_src.File; @@ -5334,6 +5337,8 @@ st_translate_program( ret = PIPE_ERROR_OUT_OF_MEMORY; goto out; } + t->num_immediates = program->num_immediates; + i = 0; foreach_in_list(immediate_storage, imm, &program->immediates) { assert(i < program->num_immediates); @@ -5385,6 +5390,7 @@ out: free(t->constants); t->num_constants = 0; free(t->immediates); + t->num_immediates = 0; if (t->error) { debug_printf("%s: translate error flag set\n", __func__); From idr at kemper.freedesktop.org Thu Apr 16 16:57:05 2015 From: idr at kemper.freedesktop.org (Ian Romanick) Date: Thu, 16 Apr 2015 09:57:05 -0700 (PDT) Subject: Mesa (master): nir: Convert the if-test for num_inputs == 2 to an assertion Message-ID: <20150416165705.47AB9761C1@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 94aab6cde696ab1cd1243f5c62444166efb1a2fa URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=94aab6cde696ab1cd1243f5c62444166efb1a2fa Author: Ian Romanick Date: Wed Apr 15 15:20:57 2015 -0700 nir: Convert the if-test for num_inputs == 2 to an assertion Suggested by Jason on a different patch after some comments / questions by Ilia. Signed-off-by: Ian Romanick Reviewed-by: Jason Ekstrand Reviewed-by: Connor Abbott --- src/glsl/nir/nir_search.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/glsl/nir/nir_search.c b/src/glsl/nir/nir_search.c index 73a802b..5ba0160 100644 --- a/src/glsl/nir/nir_search.c +++ b/src/glsl/nir/nir_search.c @@ -218,8 +218,8 @@ match_expression(const nir_search_expression *expr, nir_alu_instr *instr, if (matched) return true; - if (nir_op_infos[instr->op].num_inputs == 2 && - (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE)) { + if (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) { + assert(nir_op_infos[instr->op].num_inputs == 2); if (!match_value(expr->srcs[0], instr, 1, num_components, swizzle, state)) return false; From vsyrjala at kemper.freedesktop.org Thu Apr 16 18:33:57 2015 From: vsyrjala at kemper.freedesktop.org (Ville Syrjala) Date: Thu, 16 Apr 2015 11:33:57 -0700 (PDT) Subject: Mesa (master): i965: Add marketing names for CHV Message-ID: <20150416183358.01E6E761C1@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 4fc645aed10f3470bba2237a7e7314c3e2b3c25b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4fc645aed10f3470bba2237a7e7314c3e2b3c25b Author: Ville Syrj?l? Date: Thu Apr 16 19:21:07 2015 +0300 i965: Add marketing names for CHV All CHV devices will be branded as "Intel(r) HD Graphics". Reviewed-by: Kenneth Graunke Signed-off-by: Ville Syrj?l? --- include/pci_ids/i965_pci_ids.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/pci_ids/i965_pci_ids.h b/include/pci_ids/i965_pci_ids.h index 3e3e8fe..8d757aa 100644 --- a/include/pci_ids/i965_pci_ids.h +++ b/include/pci_ids/i965_pci_ids.h @@ -124,7 +124,7 @@ CHIPSET(0x1921, skl_gt2, "Intel(R) Skylake ULT GT2F") CHIPSET(0x1926, skl_gt3, "Intel(R) Skylake ULT GT3") CHIPSET(0x192A, skl_gt3, "Intel(R) Skylake SRV GT3") CHIPSET(0x192B, skl_gt3, "Intel(R) Skylake Halo GT3") -CHIPSET(0x22B0, chv, "Intel(R) Cherryview") -CHIPSET(0x22B1, chv, "Intel(R) Cherryview") -CHIPSET(0x22B2, chv, "Intel(R) Cherryview") -CHIPSET(0x22B3, chv, "Intel(R) Cherryview") +CHIPSET(0x22B0, chv, "Intel(R) HD Graphics (Cherryview)") +CHIPSET(0x22B1, chv, "Intel(R) HD Graphics (Cherryview)") +CHIPSET(0x22B2, chv, "Intel(R) HD Graphics (Cherryview)") +CHIPSET(0x22B3, chv, "Intel(R) HD Graphics (Cherryview)") From jrfonseca at kemper.freedesktop.org Thu Apr 16 19:42:43 2015 From: jrfonseca at kemper.freedesktop.org (Jose Fonseca) Date: Thu, 16 Apr 2015 12:42:43 -0700 (PDT) Subject: Mesa (master): libgl-gdi: Prevent "pure virtual method called" error when. Message-ID: <20150416194243.5FDA6761C1@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 8638e3ae1b33a887f24a6e50bdb722361ee9414c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8638e3ae1b33a887f24a6e50bdb722361ee9414c Author: Jose Fonseca Date: Thu Apr 16 13:08:56 2015 +0100 libgl-gdi: Prevent "pure virtual method called" error when. When running piglit w/ llvmpipe on Windows several tests terminate abnormally just when the test exits. The problem was that LLVMContextDispose was being called after LLVM global destructors. Reviewed-by: Roland Scheidegger --- src/gallium/targets/libgl-gdi/libgl_gdi.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/src/gallium/targets/libgl-gdi/libgl_gdi.c b/src/gallium/targets/libgl-gdi/libgl_gdi.c index a2fb161..922c186 100644 --- a/src/gallium/targets/libgl-gdi/libgl_gdi.c +++ b/src/gallium/targets/libgl-gdi/libgl_gdi.c @@ -38,6 +38,7 @@ #include "util/u_debug.h" #include "stw_winsys.h" +#include "stw_device.h" #include "gdi/gdi_sw_winsys.h" #include "softpipe/sp_texture.h" @@ -143,8 +144,12 @@ static const struct stw_winsys stw_winsys = { }; +EXTERN_C BOOL WINAPI +DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved); + + BOOL WINAPI -DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpReserved) +DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) { switch (fdwReason) { case DLL_PROCESS_ATTACH: @@ -161,9 +166,22 @@ DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpReserved) break; case DLL_PROCESS_DETACH: - if (lpReserved == NULL) { + if (lpvReserved == NULL) { + // We're being unloaded from the process. stw_cleanup_thread(); stw_cleanup(); + } else { + // Process itself is terminating, and all threads and modules are + // being detached. + // + // The order threads (including llvmpipe rasterizer threads) are + // destroyed can not be relied up, so it's not safe to cleanup. + // + // However global destructors (e.g., LLVM's) will still be called, and + // if Microsoft OPENGL32.DLL's DllMain is called after us, it will + // still try to invoke DrvDeleteContext to destroys all outstanding, + // so set stw_dev to NULL to return immediately if that happens. + stw_dev = NULL; } break; } From jrfonseca at kemper.freedesktop.org Fri Apr 17 14:16:11 2015 From: jrfonseca at kemper.freedesktop.org (Jose Fonseca) Date: Fri, 17 Apr 2015 07:16:11 -0700 (PDT) Subject: Mesa (master): mesa/st: Free st_translate with FREE macro. Message-ID: <20150417141611.09A7B7626E@kemper.freedesktop.org> Module: Mesa Branch: master Commit: e050a19af895318aaf417919c6a8ecba2da53304 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e050a19af895318aaf417919c6a8ecba2da53304 Author: Brian Paul Date: Fri Apr 17 15:14:23 2015 +0100 mesa/st: Free st_translate with FREE macro. To match CALLOC_STRUCT macro. Fixes memory corruption on Windows when u_memory's memory debugging is enabled. Reviewed-by: Jose Fonseca --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index fa390c9..93671ba 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -5365,7 +5365,7 @@ out: debug_printf("%s: translate error flag set\n", __func__); } - free(t); + FREE(t); } return ret; From robclark at kemper.freedesktop.org Fri Apr 17 15:52:46 2015 From: robclark at kemper.freedesktop.org (Rob Clark) Date: Fri, 17 Apr 2015 08:52:46 -0700 (PDT) Subject: Mesa (master): gallium/ttn: add support for texture offsets Message-ID: <20150417155246.848587626E@kemper.freedesktop.org> Module: Mesa Branch: master Commit: f44d836d7a26a7d7c4374cc9e1e8bdc96cd2db8e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f44d836d7a26a7d7c4374cc9e1e8bdc96cd2db8e Author: Rob Clark Date: Sat Apr 11 11:35:29 2015 -0400 gallium/ttn: add support for texture offsets Signed-off-by: Rob Clark Reviewed-by: Eric Anholt --- src/gallium/auxiliary/nir/tgsi_to_nir.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index 9d988b06..7312e54 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -982,7 +982,7 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src) struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction; nir_tex_instr *instr; nir_texop op; - unsigned num_srcs, samp = 1; + unsigned num_srcs, samp = 1, i; switch (tgsi_inst->Instruction.Opcode) { case TGSI_OPCODE_TEX: @@ -1026,6 +1026,8 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src) num_srcs++; } + num_srcs += tgsi_inst->Texture.NumOffsets; + instr = nir_tex_instr_create(b->shader, num_srcs); instr->op = op; @@ -1103,6 +1105,31 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src) src_number++; } + for (i = 0; i < tgsi_inst->Texture.NumOffsets; i++) { + struct tgsi_texture_offset *tex_offset = &tgsi_inst->TexOffsets[i]; + /* since TexOffset ins't using tgsi_full_src_register we get to + * do some extra gymnastics: + */ + nir_alu_src src; + + memset(&src, 0, sizeof(src)); + + src.src = ttn_src_for_file_and_index(c, + tex_offset->File, + tex_offset->Index, + NULL); + + src.swizzle[0] = tex_offset->SwizzleX; + src.swizzle[1] = tex_offset->SwizzleY; + src.swizzle[2] = tex_offset->SwizzleZ; + src.swizzle[3] = TGSI_SWIZZLE_W; + + instr->src[src_number].src_type = nir_tex_src_offset; + instr->src[src_number].src = nir_src_for_ssa( + nir_fmov_alu(b, src, nir_tex_instr_src_size(instr, src_number))); + src_number++; + } + assert(src_number == num_srcs); nir_ssa_dest_init(&instr->instr, &instr->dest, 4, NULL); From robclark at kemper.freedesktop.org Fri Apr 17 15:52:46 2015 From: robclark at kemper.freedesktop.org (Rob Clark) Date: Fri, 17 Apr 2015 08:52:46 -0700 (PDT) Subject: Mesa (master): gallium/ttn: minor cleanup Message-ID: <20150417155246.959AD7626E@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 8efe20467b2710cc502cbfec19c1e543e572f864 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8efe20467b2710cc502cbfec19c1e543e572f864 Author: Rob Clark Date: Sat Apr 11 15:25:58 2015 -0400 gallium/ttn: minor cleanup v2: also use ttn_src_for_indirect() everywhere for addr access, rather than open-coding it for INPUT/CONST srcs v3: move ralloc out of ttn_src_for_indirect() into the one call site that needs a ptr Signed-off-by: Rob Clark Reviewed-by: Eric Anholt --- src/gallium/auxiliary/nir/tgsi_to_nir.c | 40 +++++++++++++------------------ 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index 42671c8..b6123e0 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -52,7 +52,6 @@ struct ttn_reg_info { struct ttn_compile { union tgsi_full_token *token; nir_builder build; - struct nir_shader *s; struct tgsi_shader_info *scan; struct ttn_reg_info *output_regs; @@ -256,7 +255,7 @@ ttn_emit_immediate(struct ttn_compile *c) nir_instr_insert_after_cf_list(b->cf_node_list, &load_const->instr); } -static nir_src * +static nir_src ttn_src_for_indirect(struct ttn_compile *c, struct tgsi_ind_register *indirect); /* generate either a constant or indirect deref chain for accessing an @@ -275,7 +274,7 @@ ttn_array_deref(struct ttn_compile *c, nir_intrinsic_instr *instr, if (indirect) { arr->deref_array_type = nir_deref_array_type_indirect; - arr->indirect = *ttn_src_for_indirect(c, indirect); + arr->indirect = ttn_src_for_indirect(c, indirect); } else { arr->deref_array_type = nir_deref_array_type_direct; } @@ -361,35 +360,29 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index, case TGSI_FILE_INPUT: case TGSI_FILE_CONSTANT: { nir_intrinsic_instr *load; + nir_intrinsic_op op; switch (file) { case TGSI_FILE_INPUT: - load = nir_intrinsic_instr_create(b->shader, - indirect ? - nir_intrinsic_load_input_indirect : - nir_intrinsic_load_input); + op = indirect ? nir_intrinsic_load_input_indirect : + nir_intrinsic_load_input; break; case TGSI_FILE_CONSTANT: - load = nir_intrinsic_instr_create(b->shader, - indirect ? - nir_intrinsic_load_uniform_indirect : - nir_intrinsic_load_uniform); + op = indirect ? nir_intrinsic_load_uniform_indirect : + nir_intrinsic_load_uniform; break; default: unreachable("No other load files supported"); break; } + load = nir_intrinsic_instr_create(b->shader, op); + load->num_components = 4; load->const_index[0] = index; load->const_index[1] = 1; if (indirect) { - nir_alu_src indirect_address; - memset(&indirect_address, 0, sizeof(indirect_address)); - indirect_address.src = nir_src_for_reg(c->addr_reg); - for (int i = 0; i < 4; i++) - indirect_address.swizzle[i] = indirect->Swizzle; - load->src[0] = nir_src_for_ssa(nir_imov_alu(b, indirect_address, 1)); + load->src[0] = ttn_src_for_indirect(c, indirect); } nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL); nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr); @@ -406,7 +399,7 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index, return src; } -static nir_src * +static nir_src ttn_src_for_indirect(struct ttn_compile *c, struct tgsi_ind_register *indirect) { nir_builder *b = &c->build; @@ -417,9 +410,7 @@ ttn_src_for_indirect(struct ttn_compile *c, struct tgsi_ind_register *indirect) src.src = ttn_src_for_file_and_index(c, indirect->File, indirect->Index, NULL); - nir_src *result = ralloc(b->shader, nir_src); - *result = nir_src_for_ssa(nir_imov_alu(b, src, 1)); - return result; + return nir_src_for_ssa(nir_imov_alu(b, src, 1)); } static nir_alu_dest @@ -486,8 +477,11 @@ ttn_get_dest(struct ttn_compile *c, struct tgsi_full_dst_register *tgsi_fdst) dest.write_mask = tgsi_dst->WriteMask; dest.saturate = false; - if (tgsi_dst->Indirect && (tgsi_dst->File != TGSI_FILE_TEMPORARY)) - dest.dest.reg.indirect = ttn_src_for_indirect(c, &tgsi_fdst->Indirect); + if (tgsi_dst->Indirect && (tgsi_dst->File != TGSI_FILE_TEMPORARY)) { + nir_src *indirect = ralloc(c->build.shader, nir_src); + *indirect = ttn_src_for_indirect(c, &tgsi_fdst->Indirect); + dest.dest.reg.indirect = indirect; + } return dest; } From robclark at kemper.freedesktop.org Fri Apr 17 15:52:46 2015 From: robclark at kemper.freedesktop.org (Rob Clark) Date: Fri, 17 Apr 2015 08:52:46 -0700 (PDT) Subject: Mesa (master): gallium/ttn: add UBO support Message-ID: <20150417155246.9F1D97626E@kemper.freedesktop.org> Module: Mesa Branch: master Commit: ef7c4f39bf26998f24999f03b313d2665e842d31 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ef7c4f39bf26998f24999f03b313d2665e842d31 Author: Rob Clark Date: Sun Apr 12 08:15:33 2015 -0400 gallium/ttn: add UBO support v2: move ishl into ttn (instead of driver backend) to keep the units consistent between immediate and indirect offsets Signed-off-by: Rob Clark Reviewed-by: Eric Anholt --- src/gallium/auxiliary/nir/tgsi_to_nir.c | 69 +++++++++++++++++++++++++++---- 1 file changed, 60 insertions(+), 9 deletions(-) diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index b6123e0..6c8d3fc 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -162,6 +162,10 @@ ttn_emit_declaration(struct ttn_compile *c) file == TGSI_FILE_OUTPUT || file == TGSI_FILE_CONSTANT); + /* nothing to do for UBOs: */ + if ((file == TGSI_FILE_CONSTANT) && decl->Declaration.Dimension) + return; + var = rzalloc(b->shader, nir_variable); var->data.driver_location = decl->Range.First; @@ -286,7 +290,9 @@ ttn_array_deref(struct ttn_compile *c, nir_intrinsic_instr *instr, static nir_src ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index, - struct tgsi_ind_register *indirect) + struct tgsi_ind_register *indirect, + struct tgsi_dimension *dim, + struct tgsi_ind_register *dimind) { nir_builder *b = &c->build; nir_src src; @@ -314,15 +320,18 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index, assert(!indirect); src.reg.reg = c->temp_regs[index].reg; } + assert(!dim); break; case TGSI_FILE_ADDRESS: src.reg.reg = c->addr_reg; + assert(!dim); break; case TGSI_FILE_IMMEDIATE: src = nir_src_for_ssa(c->imm_defs[index]); assert(!indirect); + assert(!dim); break; case TGSI_FILE_SYSTEM_VALUE: { @@ -330,6 +339,9 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index, nir_intrinsic_op op; unsigned ncomp = 1; + assert(!indirect); + assert(!dim); + switch (c->scan->system_value_semantic_name[index]) { case TGSI_SEMANTIC_VERTEXID_NOBASE: op = nir_intrinsic_load_vertex_id_zero_base; @@ -361,15 +373,24 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index, case TGSI_FILE_CONSTANT: { nir_intrinsic_instr *load; nir_intrinsic_op op; + unsigned srcn = 0; switch (file) { case TGSI_FILE_INPUT: op = indirect ? nir_intrinsic_load_input_indirect : nir_intrinsic_load_input; + assert(!dim); break; case TGSI_FILE_CONSTANT: - op = indirect ? nir_intrinsic_load_uniform_indirect : - nir_intrinsic_load_uniform; + if (dim) { + op = indirect ? nir_intrinsic_load_ubo_indirect : + nir_intrinsic_load_ubo; + /* convert index from vec4 to byte: */ + index *= 16; + } else { + op = indirect ? nir_intrinsic_load_uniform_indirect : + nir_intrinsic_load_uniform; + } break; default: unreachable("No other load files supported"); @@ -381,8 +402,28 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index, load->num_components = 4; load->const_index[0] = index; load->const_index[1] = 1; + if (dim) { + if (dimind) { + load->src[srcn] = + ttn_src_for_file_and_index(c, dimind->File, dimind->Index, + NULL, NULL, NULL); + } else { + /* UBOs start at index 1 in TGSI: */ + load->src[srcn] = + nir_src_for_ssa(nir_imm_int(b, dim->Index - 1)); + } + srcn++; + } if (indirect) { - load->src[0] = ttn_src_for_indirect(c, indirect); + load->src[srcn] = ttn_src_for_indirect(c, indirect); + if (dim) { + assert(load->src[srcn].is_ssa); + /* we also need to covert vec4 to byte here too: */ + load->src[srcn] = + nir_src_for_ssa(nir_ishl(b, load->src[srcn].ssa, + nir_imm_int(b, 4))); + } + srcn++; } nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL); nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr); @@ -409,7 +450,8 @@ ttn_src_for_indirect(struct ttn_compile *c, struct tgsi_ind_register *indirect) src.swizzle[i] = indirect->Swizzle; src.src = ttn_src_for_file_and_index(c, indirect->File, - indirect->Index, NULL); + indirect->Index, + NULL, NULL, NULL); return nir_src_for_ssa(nir_imov_alu(b, src, 1)); } @@ -524,11 +566,20 @@ ttn_get_src(struct ttn_compile *c, struct tgsi_full_src_register *tgsi_fsrc) assert(!tgsi_src->Indirect); return NULL; } else { + struct tgsi_ind_register *ind = NULL; + struct tgsi_dimension *dim = NULL; + struct tgsi_ind_register *dimind = NULL; + if (tgsi_src->Indirect) + ind = &tgsi_fsrc->Indirect; + if (tgsi_src->Dimension) { + dim = &tgsi_fsrc->Dimension; + if (dim->Indirect) + dimind = &tgsi_fsrc->DimIndirect; + } src.src = ttn_src_for_file_and_index(c, tgsi_src->File, tgsi_src->Index, - (tgsi_src->Indirect ? - &tgsi_fsrc->Indirect : NULL)); + ind, dim, dimind); } src.swizzle[0] = tgsi_src->SwizzleX; @@ -1122,7 +1173,7 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src) src.src = ttn_src_for_file_and_index(c, tex_offset->File, tex_offset->Index, - NULL); + NULL, NULL, NULL); src.swizzle[0] = tex_offset->SwizzleX; src.swizzle[1] = tex_offset->SwizzleY; @@ -1646,7 +1697,7 @@ tgsi_to_nir(const void *tgsi_tokens, c->scan = &scan; s->num_inputs = scan.file_max[TGSI_FILE_INPUT] + 1; - s->num_uniforms = scan.file_max[TGSI_FILE_CONSTANT] + 1; + s->num_uniforms = scan.const_file_max[0] + 1; s->num_outputs = scan.file_max[TGSI_FILE_OUTPUT] + 1; c->output_regs = rzalloc_array(c, struct ttn_reg_info, From robclark at kemper.freedesktop.org Fri Apr 17 15:52:46 2015 From: robclark at kemper.freedesktop.org (Rob Clark) Date: Fri, 17 Apr 2015 08:52:46 -0700 (PDT) Subject: Mesa (master): nir/builder: add nir_builder_insert_after_instr() Message-ID: <20150417155246.B2E227626E@kemper.freedesktop.org> Module: Mesa Branch: master Commit: e14af4c0672c974238bed4661ada383cf50501f6 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e14af4c0672c974238bed4661ada383cf50501f6 Author: Rob Clark Date: Wed Apr 15 15:49:15 2015 -0400 nir/builder: add nir_builder_insert_after_instr() For lowering if/else, I need a way to insert at the end of the previous block. Signed-off-by: Rob Clark --- src/glsl/nir/nir_builder.h | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h index 587d014..d1419ee 100644 --- a/src/glsl/nir/nir_builder.h +++ b/src/glsl/nir/nir_builder.h @@ -29,6 +29,7 @@ struct exec_list; typedef struct nir_builder { struct exec_list *cf_node_list; nir_instr *before_instr; + nir_instr *after_instr; nir_shader *shader; nir_function_impl *impl; @@ -47,12 +48,24 @@ nir_builder_insert_after_cf_list(nir_builder *build, struct exec_list *cf_node_list) { build->cf_node_list = cf_node_list; + build->before_instr = NULL; + build->after_instr = NULL; } static inline void nir_builder_insert_before_instr(nir_builder *build, nir_instr *before_instr) { + build->cf_node_list = NULL; build->before_instr = before_instr; + build->after_instr = NULL; +} + +static inline void +nir_builder_insert_after_instr(nir_builder *build, nir_instr *after_instr) +{ + build->cf_node_list = NULL; + build->before_instr = NULL; + build->after_instr = after_instr; } static inline void @@ -60,9 +73,12 @@ nir_builder_instr_insert(nir_builder *build, nir_instr *instr) { if (build->cf_node_list) { nir_instr_insert_after_cf_list(build->cf_node_list, instr); - } else { - assert(build->before_instr); + } else if (build->before_instr) { nir_instr_insert_before(build->before_instr, instr); + } else { + assert(build->after_instr); + nir_instr_insert_after(build->after_instr, instr); + build->after_instr = instr; } } From robclark at kemper.freedesktop.org Fri Apr 17 15:52:46 2015 From: robclark at kemper.freedesktop.org (Rob Clark) Date: Fri, 17 Apr 2015 08:52:46 -0700 (PDT) Subject: Mesa (master): gallium/ttn: fix TXF Message-ID: <20150417155246.A75E67626E@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 7a9063e7c735960d10f489314aa7623a3d262085 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7a9063e7c735960d10f489314aa7623a3d262085 Author: Rob Clark Date: Thu Apr 16 15:16:12 2015 -0400 gallium/ttn: fix TXF There is a level param stashed away in the .w component of the first src. Signed-off-by: Rob Clark Reviewed-by: Eric Anholt --- src/gallium/auxiliary/nir/tgsi_to_nir.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index 6c8d3fc..59aaf67 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -1053,7 +1053,7 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src) break; case TGSI_OPCODE_TXF: op = nir_texop_txf; - num_srcs = 1; + num_srcs = 2; break; case TGSI_OPCODE_TXD: op = nir_texop_txd; @@ -1138,6 +1138,12 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src) src_number++; } + if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXF) { + instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W)); + instr->src[src_number].src_type = nir_tex_src_lod; + src_number++; + } + if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXD) { instr->src[src_number].src = nir_src_for_ssa(nir_swizzle(b, src[1], SWIZ(X, Y, Z, W), From robclark at kemper.freedesktop.org Fri Apr 17 15:52:46 2015 From: robclark at kemper.freedesktop.org (Rob Clark) Date: Fri, 17 Apr 2015 08:52:46 -0700 (PDT) Subject: Mesa (master): freedreno/a4xx: sysvals and UBOs Message-ID: <20150417155246.BA6A57626E@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 70b2f872ea6ae651b832a2b3dd975efd78289fad URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=70b2f872ea6ae651b832a2b3dd975efd78289fad Author: Rob Clark Date: Sat Apr 11 12:15:17 2015 -0400 freedreno/a4xx: sysvals and UBOs Basically just sync up the cmdstream emit parts to match the changes already done on a3xx. Also, fix scheduling for mem instructions. This is needed on a4xx, and I am a bit surprised it isn't needed for a3xx. Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/a4xx/fd4_emit.c | 75 +++++++++++++++------- src/gallium/drivers/freedreno/freedreno_screen.c | 2 +- src/gallium/drivers/freedreno/ir3/ir3_depth.c | 3 +- 3 files changed, 56 insertions(+), 24 deletions(-) diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c index bf51847..c315a47 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -87,11 +87,12 @@ static void emit_constants(struct fd_ringbuffer *ring, enum adreno_state_block sb, struct fd_constbuf_stateobj *constbuf, - struct ir3_shader_variant *shader) + struct ir3_shader_variant *shader, + bool emit_immediates) { uint32_t enabled_mask = constbuf->enabled_mask; - uint32_t first_immediate; - uint32_t base = 0; + uint32_t max_const; + int i; // XXX TODO only emit dirty consts.. but we need to keep track if // they are clobbered by a clear, gmem2mem, or mem2gmem.. @@ -102,42 +103,57 @@ emit_constants(struct fd_ringbuffer *ring, * than first_immediate. In that case truncate the user consts * early to avoid HLSQ lockup caused by writing too many consts */ - first_immediate = MIN2(shader->first_immediate, shader->constlen); + max_const = MIN2(shader->first_driver_param, shader->constlen); /* emit user constants: */ - while (enabled_mask) { - unsigned index = ffs(enabled_mask) - 1; + if (enabled_mask & 1) { + const unsigned index = 0; struct pipe_constant_buffer *cb = &constbuf->cb[index]; unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */ // I expect that size should be a multiple of vec4's: assert(size == align(size, 4)); - /* gallium could leave const buffers bound above what the - * current shader uses.. don't let that confuse us. + /* and even if the start of the const buffer is before + * first_immediate, the end may not be: */ - if (base >= (4 * first_immediate)) - break; - - if (constbuf->dirty_mask & (1 << index)) { - /* and even if the start of the const buffer is before - * first_immediate, the end may not be: - */ - size = MIN2(size, (4 * first_immediate) - base); - fd4_emit_constant(ring, sb, base, + size = MIN2(size, 4 * max_const); + + if (size && (constbuf->dirty_mask & (1 << index))) { + fd4_emit_constant(ring, sb, 0, cb->buffer_offset, size, cb->user_buffer, cb->buffer); constbuf->dirty_mask &= ~(1 << index); } - base += size; enabled_mask &= ~(1 << index); } + /* emit ubos: */ + if (shader->constlen > shader->first_driver_param) { + uint32_t params = MIN2(4, shader->constlen - shader->first_driver_param); + OUT_PKT3(ring, CP_LOAD_STATE, 2 + params * 4); + OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(shader->first_driver_param) | + CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | + CP_LOAD_STATE_0_STATE_BLOCK(sb) | + CP_LOAD_STATE_0_NUM_UNIT(params)); + OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | + CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); + + for (i = 1; i <= params * 4; i++) { + struct pipe_constant_buffer *cb = &constbuf->cb[i]; + assert(!cb->user_buffer); + if ((enabled_mask & (1 << i)) && cb->buffer) + OUT_RELOC(ring, fd_resource(cb->buffer)->bo, cb->buffer_offset, 0, 0); + else + OUT_RING(ring, 0xbad00000 | ((i - 1) << 16)); + } + } + /* emit shader immediates: */ - if (shader) { + if (shader && emit_immediates) { int size = shader->immediates_count; - base = shader->first_immediate; + uint32_t base = shader->first_immediate; /* truncate size to avoid writing constants that shader * does not use: @@ -499,11 +515,26 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, fd_wfi(ctx, ring); emit_constants(ring, SB_VERT_SHADER, &ctx->constbuf[PIPE_SHADER_VERTEX], - (emit->prog->dirty & FD_SHADER_DIRTY_VP) ? vp : NULL); + vp, emit->prog->dirty & FD_SHADER_DIRTY_VP); if (!emit->key.binning_pass) { emit_constants(ring, SB_FRAG_SHADER, &ctx->constbuf[PIPE_SHADER_FRAGMENT], - (emit->prog->dirty & FD_SHADER_DIRTY_FP) ? fp : NULL); + fp, emit->prog->dirty & FD_SHADER_DIRTY_FP); + } + } + + /* emit driver params every time */ + if (emit->info && emit->prog == &ctx->prog) { + uint32_t vertex_params[4] = { + emit->info->indexed ? emit->info->index_bias : emit->info->start, + 0, + 0, + 0 + }; + if (vp->constlen >= vp->first_driver_param + 4) { + fd4_emit_constant(ring, SB_VERT_SHADER, + (vp->first_driver_param + 4) * 4, + 0, 4, vertex_params, NULL); } } diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index 1b89387..fda60ed 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -363,7 +363,7 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, */ return ((is_a3xx(screen) || is_a4xx(screen)) ? 4096 : 64) * sizeof(float[4]); case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: - return is_a3xx(screen) ? 16 : 1; + return (is_a3xx(screen) || is_a4xx(screen)) ? 16 : 1; case PIPE_SHADER_CAP_MAX_PREDS: return 0; /* nothing uses this */ case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: diff --git a/src/gallium/drivers/freedreno/ir3/ir3_depth.c b/src/gallium/drivers/freedreno/ir3/ir3_depth.c index 9e1f45d..b899c66 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_depth.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_depth.c @@ -71,7 +71,8 @@ int ir3_delayslots(struct ir3_instruction *assigner, return 0; /* assigner must be alu: */ - if (is_flow(consumer) || is_sfu(consumer) || is_tex(consumer)) { + if (is_flow(consumer) || is_sfu(consumer) || is_tex(consumer) || + is_mem(consumer)) { return 6; } else if ((consumer->category == 3) && (is_mad(consumer->opc) || is_madsh(consumer->opc)) && From robclark at kemper.freedesktop.org Fri Apr 17 15:52:46 2015 From: robclark at kemper.freedesktop.org (Rob Clark) Date: Fri, 17 Apr 2015 08:52:46 -0700 (PDT) Subject: Mesa (master): freedreno/ir3: move out helper Message-ID: <20150417155246.C5E917626E@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 87807e5cc50f404a8e3ec8864bf8b7427ab6d687 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=87807e5cc50f404a8e3ec8864bf8b7427ab6d687 Author: Rob Clark Date: Sun Apr 12 09:46:34 2015 -0400 freedreno/ir3: move out helper We'll also want it in NIR f/e for implementing UBO support. Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/ir3/ir3.h | 23 +++++++++++++++++++++++ src/gallium/drivers/freedreno/ir3/ir3_cp.c | 24 ------------------------ 2 files changed, 23 insertions(+), 24 deletions(-) diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h index 85daf10..1a3deb4 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.h +++ b/src/gallium/drivers/freedreno/ir3/ir3.h @@ -458,6 +458,29 @@ static inline bool is_nop(struct ir3_instruction *instr) return is_flow(instr) && (instr->opc == OPC_NOP); } +/* Is it a non-transformative (ie. not type changing) mov? This can + * also include absneg.s/absneg.f, which for the most part can be + * treated as a mov (single src argument). + */ +static inline bool is_same_type_mov(struct ir3_instruction *instr) +{ + struct ir3_register *dst = instr->regs[0]; + + /* mov's that write to a0.x or p0.x are special: */ + if (dst->num == regid(REG_P0, 0)) + return false; + if (dst->num == regid(REG_A0, 0)) + return false; + + if ((instr->category == 1) && + (instr->cat1.src_type == instr->cat1.dst_type)) + return true; + if ((instr->category == 2) && ((instr->opc == OPC_ABSNEG_F) || + (instr->opc == OPC_ABSNEG_S))) + return true; + return false; +} + static inline bool is_alu(struct ir3_instruction *instr) { return (1 <= instr->category) && (instr->category <= 3); diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cp.c b/src/gallium/drivers/freedreno/ir3/ir3_cp.c index 313a423..fa7d363 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_cp.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_cp.c @@ -34,30 +34,6 @@ * Copy Propagate: */ - -/* Is it a non-transformative (ie. not type changing) mov? This can - * also include absneg.s/absneg.f, which for the most part can be - * treated as a mov (single src argument). - */ -static bool is_same_type_mov(struct ir3_instruction *instr) -{ - struct ir3_register *dst = instr->regs[0]; - - /* mov's that write to a0.x or p0.x are special: */ - if (dst->num == regid(REG_P0, 0)) - return false; - if (dst->num == regid(REG_A0, 0)) - return false; - - if ((instr->category == 1) && - (instr->cat1.src_type == instr->cat1.dst_type)) - return true; - if ((instr->category == 2) && ((instr->opc == OPC_ABSNEG_F) || - (instr->opc == OPC_ABSNEG_S))) - return true; - return false; -} - /* is it a type preserving mov, with ok flags? */ static bool is_eligible_mov(struct ir3_instruction *instr, bool allow_flags) { From robclark at kemper.freedesktop.org Fri Apr 17 15:52:46 2015 From: robclark at kemper.freedesktop.org (Rob Clark) Date: Fri, 17 Apr 2015 08:52:46 -0700 (PDT) Subject: Mesa (master): freedreno: update generated headers Message-ID: <20150417155246.DBA9B7626E@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 20ea698c492ea04f52e50974ecf39e887d144e43 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=20ea698c492ea04f52e50974ecf39e887d144e43 Author: Rob Clark Date: Sun Apr 12 14:25:29 2015 -0400 freedreno: update generated headers Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/a2xx/a2xx.xml.h | 6 +- src/gallium/drivers/freedreno/a3xx/a3xx.xml.h | 139 ++++++++++++- src/gallium/drivers/freedreno/a4xx/a4xx.xml.h | 214 +++++++++++++++++++-- src/gallium/drivers/freedreno/a4xx/fd4_blend.c | 2 +- src/gallium/drivers/freedreno/a4xx/fd4_emit.c | 4 +- src/gallium/drivers/freedreno/a4xx/fd4_program.c | 2 +- src/gallium/drivers/freedreno/adreno_common.xml.h | 6 +- src/gallium/drivers/freedreno/adreno_pm4.xml.h | 23 +-- 8 files changed, 354 insertions(+), 42 deletions(-) diff --git a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h index 3811bc5..a315f5c 100644 --- a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h +++ b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h @@ -12,9 +12,9 @@ The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27) - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2014-06-02 15:21:30) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2014-11-13 22:44:30) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15085 bytes, from 2014-12-20 21:49:41) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 64771 bytes, from 2015-03-15 21:55:57) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 51942 bytes, from 2015-02-24 17:14:02) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14748 bytes, from 2015-04-12 15:01:13) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 66709 bytes, from 2015-04-12 18:16:35) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 57486 bytes, from 2015-04-12 18:10:00) Copyright (C) 2013-2014 by the following authors: - Rob Clark (robclark) diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h index 8d15ed4..0cccff1 100644 --- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h +++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h @@ -12,9 +12,9 @@ The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27) - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2014-06-02 15:21:30) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2014-11-13 22:44:30) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15085 bytes, from 2014-12-20 21:49:41) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 64771 bytes, from 2015-03-15 21:55:57) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 51942 bytes, from 2015-02-24 17:14:02) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14748 bytes, from 2015-04-12 15:01:13) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 66709 bytes, from 2015-04-12 18:16:35) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 57486 bytes, from 2015-04-12 18:10:00) Copyright (C) 2013-2015 by the following authors: - Rob Clark (robclark) @@ -216,14 +216,24 @@ enum a3xx_color_fmt { RB_R10G10B10A2_UNORM = 16, RB_A8_UNORM = 20, RB_R8_UNORM = 21, + RB_R16_FLOAT = 24, + RB_R16G16_FLOAT = 25, RB_R16G16B16A16_FLOAT = 27, RB_R11G11B10_FLOAT = 28, + RB_R16_SNORM = 32, + RB_R16G16_SNORM = 33, + RB_R16G16B16A16_SNORM = 35, + RB_R16_UNORM = 36, + RB_R16G16_UNORM = 37, + RB_R16G16B16A16_UNORM = 39, RB_R16_SINT = 40, RB_R16G16_SINT = 41, RB_R16G16B16A16_SINT = 43, RB_R16_UINT = 44, RB_R16G16_UINT = 45, RB_R16G16B16A16_UINT = 47, + RB_R32_FLOAT = 48, + RB_R32G32_FLOAT = 49, RB_R32G32B32A32_FLOAT = 51, RB_R32_SINT = 52, RB_R32G32_SINT = 53, @@ -272,6 +282,12 @@ enum a3xx_intp_mode { FLAT = 1, }; +enum a3xx_repl_mode { + S = 1, + T = 2, + ONE_T = 3, +}; + enum a3xx_tex_filter { A3XX_TEX_NEAREST = 0, A3XX_TEX_LINEAR = 1, @@ -758,7 +774,7 @@ static inline uint32_t A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL(float val) #define A3XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT 0 static inline uint32_t A3XX_GRAS_SU_POLY_OFFSET_OFFSET(float val) { - return ((((int32_t)(val * 16384.0))) << A3XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT) & A3XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK; + return ((((int32_t)(val * 64.0))) << A3XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT) & A3XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK; } #define REG_A3XX_GRAS_SU_MODE_CONTROL 0x00002070 @@ -1259,9 +1275,21 @@ static inline uint32_t A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(enum adreno_stencil_op v #define REG_A3XX_RB_STENCIL_CLEAR 0x00002105 -#define REG_A3XX_RB_STENCIL_BUF_INFO 0x00002106 +#define REG_A3XX_RB_STENCIL_INFO 0x00002106 +#define A3XX_RB_STENCIL_INFO_STENCIL_BASE__MASK 0xfffff800 +#define A3XX_RB_STENCIL_INFO_STENCIL_BASE__SHIFT 11 +static inline uint32_t A3XX_RB_STENCIL_INFO_STENCIL_BASE(uint32_t val) +{ + return ((val >> 12) << A3XX_RB_STENCIL_INFO_STENCIL_BASE__SHIFT) & A3XX_RB_STENCIL_INFO_STENCIL_BASE__MASK; +} -#define REG_A3XX_RB_STENCIL_BUF_PITCH 0x00002107 +#define REG_A3XX_RB_STENCIL_PITCH 0x00002107 +#define A3XX_RB_STENCIL_PITCH__MASK 0xffffffff +#define A3XX_RB_STENCIL_PITCH__SHIFT 0 +static inline uint32_t A3XX_RB_STENCIL_PITCH(uint32_t val) +{ + return ((val >> 3) << A3XX_RB_STENCIL_PITCH__SHIFT) & A3XX_RB_STENCIL_PITCH__MASK; +} #define REG_A3XX_RB_STENCILREFMASK 0x00002108 #define A3XX_RB_STENCILREFMASK_STENCILREF__MASK 0x000000ff @@ -1369,6 +1397,7 @@ static inline uint32_t A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(enum adreno_pa_ { return ((val) << A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE__SHIFT) & A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE__MASK; } +#define A3XX_PC_PRIM_VTX_CNTL_POLYMODE_ENABLE 0x00001000 #define A3XX_PC_PRIM_VTX_CNTL_PRIMITIVE_RESTART 0x00100000 #define A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST 0x02000000 #define A3XX_PC_PRIM_VTX_CNTL_PSIZE 0x04000000 @@ -1818,6 +1847,102 @@ static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_CF(enum a3xx_intp_mode val) static inline uint32_t REG_A3XX_VPC_VARYING_PS_REPL(uint32_t i0) { return 0x00002286 + 0x1*i0; } static inline uint32_t REG_A3XX_VPC_VARYING_PS_REPL_MODE(uint32_t i0) { return 0x00002286 + 0x1*i0; } +#define A3XX_VPC_VARYING_PS_REPL_MODE_C0__MASK 0x00000003 +#define A3XX_VPC_VARYING_PS_REPL_MODE_C0__SHIFT 0 +static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C0(enum a3xx_repl_mode val) +{ + return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C0__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C0__MASK; +} +#define A3XX_VPC_VARYING_PS_REPL_MODE_C1__MASK 0x0000000c +#define A3XX_VPC_VARYING_PS_REPL_MODE_C1__SHIFT 2 +static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C1(enum a3xx_repl_mode val) +{ + return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C1__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C1__MASK; +} +#define A3XX_VPC_VARYING_PS_REPL_MODE_C2__MASK 0x00000030 +#define A3XX_VPC_VARYING_PS_REPL_MODE_C2__SHIFT 4 +static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C2(enum a3xx_repl_mode val) +{ + return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C2__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C2__MASK; +} +#define A3XX_VPC_VARYING_PS_REPL_MODE_C3__MASK 0x000000c0 +#define A3XX_VPC_VARYING_PS_REPL_MODE_C3__SHIFT 6 +static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C3(enum a3xx_repl_mode val) +{ + return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C3__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C3__MASK; +} +#define A3XX_VPC_VARYING_PS_REPL_MODE_C4__MASK 0x00000300 +#define A3XX_VPC_VARYING_PS_REPL_MODE_C4__SHIFT 8 +static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C4(enum a3xx_repl_mode val) +{ + return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C4__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C4__MASK; +} +#define A3XX_VPC_VARYING_PS_REPL_MODE_C5__MASK 0x00000c00 +#define A3XX_VPC_VARYING_PS_REPL_MODE_C5__SHIFT 10 +static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C5(enum a3xx_repl_mode val) +{ + return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C5__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C5__MASK; +} +#define A3XX_VPC_VARYING_PS_REPL_MODE_C6__MASK 0x00003000 +#define A3XX_VPC_VARYING_PS_REPL_MODE_C6__SHIFT 12 +static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C6(enum a3xx_repl_mode val) +{ + return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C6__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C6__MASK; +} +#define A3XX_VPC_VARYING_PS_REPL_MODE_C7__MASK 0x0000c000 +#define A3XX_VPC_VARYING_PS_REPL_MODE_C7__SHIFT 14 +static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C7(enum a3xx_repl_mode val) +{ + return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C7__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C7__MASK; +} +#define A3XX_VPC_VARYING_PS_REPL_MODE_C8__MASK 0x00030000 +#define A3XX_VPC_VARYING_PS_REPL_MODE_C8__SHIFT 16 +static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C8(enum a3xx_repl_mode val) +{ + return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C8__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C8__MASK; +} +#define A3XX_VPC_VARYING_PS_REPL_MODE_C9__MASK 0x000c0000 +#define A3XX_VPC_VARYING_PS_REPL_MODE_C9__SHIFT 18 +static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C9(enum a3xx_repl_mode val) +{ + return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C9__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C9__MASK; +} +#define A3XX_VPC_VARYING_PS_REPL_MODE_CA__MASK 0x00300000 +#define A3XX_VPC_VARYING_PS_REPL_MODE_CA__SHIFT 20 +static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_CA(enum a3xx_repl_mode val) +{ + return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_CA__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_CA__MASK; +} +#define A3XX_VPC_VARYING_PS_REPL_MODE_CB__MASK 0x00c00000 +#define A3XX_VPC_VARYING_PS_REPL_MODE_CB__SHIFT 22 +static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_CB(enum a3xx_repl_mode val) +{ + return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_CB__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_CB__MASK; +} +#define A3XX_VPC_VARYING_PS_REPL_MODE_CC__MASK 0x03000000 +#define A3XX_VPC_VARYING_PS_REPL_MODE_CC__SHIFT 24 +static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_CC(enum a3xx_repl_mode val) +{ + return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_CC__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_CC__MASK; +} +#define A3XX_VPC_VARYING_PS_REPL_MODE_CD__MASK 0x0c000000 +#define A3XX_VPC_VARYING_PS_REPL_MODE_CD__SHIFT 26 +static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_CD(enum a3xx_repl_mode val) +{ + return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_CD__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_CD__MASK; +} +#define A3XX_VPC_VARYING_PS_REPL_MODE_CE__MASK 0x30000000 +#define A3XX_VPC_VARYING_PS_REPL_MODE_CE__SHIFT 28 +static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_CE(enum a3xx_repl_mode val) +{ + return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_CE__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_CE__MASK; +} +#define A3XX_VPC_VARYING_PS_REPL_MODE_CF__MASK 0xc0000000 +#define A3XX_VPC_VARYING_PS_REPL_MODE_CF__SHIFT 30 +static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_CF(enum a3xx_repl_mode val) +{ + return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_CF__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_CF__MASK; +} #define REG_A3XX_VPC_VARY_CYLWRAP_ENABLE_0 0x0000228a @@ -2680,7 +2805,7 @@ static inline uint32_t A3XX_TEX_CONST_2_SWAP(enum a3xx_color_swap val) } #define REG_A3XX_TEX_CONST_3 0x00000003 -#define A3XX_TEX_CONST_3_LAYERSZ1__MASK 0x00001fff +#define A3XX_TEX_CONST_3_LAYERSZ1__MASK 0x00007fff #define A3XX_TEX_CONST_3_LAYERSZ1__SHIFT 0 static inline uint32_t A3XX_TEX_CONST_3_LAYERSZ1(uint32_t val) { diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h index 0e80564..0f69205 100644 --- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h +++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h @@ -12,9 +12,9 @@ The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27) - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2014-06-02 15:21:30) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2014-11-13 22:44:30) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15085 bytes, from 2014-12-20 21:49:41) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 64771 bytes, from 2015-03-15 21:55:57) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 51942 bytes, from 2015-02-24 17:14:02) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14748 bytes, from 2015-04-12 15:01:13) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 66709 bytes, from 2015-04-12 18:16:35) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 57486 bytes, from 2015-04-12 18:10:00) Copyright (C) 2013-2015 by the following authors: - Rob Clark (robclark) @@ -43,10 +43,40 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. enum a4xx_color_fmt { RB4_A8_UNORM = 1, + RB4_R8_UNORM = 2, + RB4_R4G4B4A4_UNORM = 8, + RB4_R5G5B5A1_UNORM = 10, RB4_R5G6R5_UNORM = 14, - RB4_Z16_UNORM = 15, + RB4_R8G8_UNORM = 15, + RB4_R8G8_SNORM = 16, + RB4_R8G8_UINT = 17, + RB4_R8G8_SINT = 18, + RB4_R16_FLOAT = 21, + RB4_R16_UINT = 22, + RB4_R16_SINT = 23, RB4_R8G8B8_UNORM = 25, RB4_R8G8B8A8_UNORM = 26, + RB4_R8G8B8A8_SNORM = 28, + RB4_R8G8B8A8_UINT = 29, + RB4_R8G8B8A8_SINT = 30, + RB4_R10G10B10A2_UNORM = 31, + RB4_R10G10B10A2_UINT = 34, + RB4_R11G11B10_FLOAT = 39, + RB4_R16G16_FLOAT = 42, + RB4_R16G16_UINT = 43, + RB4_R16G16_SINT = 44, + RB4_R32_FLOAT = 45, + RB4_R32_UINT = 46, + RB4_R32_SINT = 47, + RB4_R16G16B16A16_FLOAT = 54, + RB4_R16G16B16A16_UINT = 55, + RB4_R16G16B16A16_SINT = 56, + RB4_R32G32_FLOAT = 57, + RB4_R32G32_UINT = 58, + RB4_R32G32_SINT = 59, + RB4_R32G32B32A32_FLOAT = 60, + RB4_R32G32B32A32_UINT = 61, + RB4_R32G32B32A32_SINT = 62, }; enum a4xx_tile_mode { @@ -91,7 +121,14 @@ enum a4xx_vtx_fmt { VFMT4_16_16_UNORM = 29, VFMT4_16_16_16_UNORM = 30, VFMT4_16_16_16_16_UNORM = 31, + VFMT4_32_UINT = 32, + VFMT4_32_32_UINT = 33, + VFMT4_32_32_32_UINT = 34, + VFMT4_32_32_32_32_UINT = 35, + VFMT4_32_SINT = 36, VFMT4_32_32_SINT = 37, + VFMT4_32_32_32_SINT = 38, + VFMT4_32_32_32_32_SINT = 39, VFMT4_8_UINT = 40, VFMT4_8_8_UINT = 41, VFMT4_8_8_8_UINT = 42, @@ -125,12 +162,57 @@ enum a4xx_tex_fmt { TFMT4_8_UNORM = 4, TFMT4_8_8_UNORM = 14, TFMT4_8_8_8_8_UNORM = 28, + TFMT4_8_8_SNORM = 15, + TFMT4_8_8_8_8_SNORM = 29, + TFMT4_8_8_UINT = 16, + TFMT4_8_8_8_8_UINT = 30, + TFMT4_8_8_SINT = 17, + TFMT4_8_8_8_8_SINT = 31, + TFMT4_16_UINT = 21, + TFMT4_16_16_UINT = 41, + TFMT4_16_16_16_16_UINT = 54, + TFMT4_16_SINT = 22, + TFMT4_16_16_SINT = 42, + TFMT4_16_16_16_16_SINT = 55, + TFMT4_32_UINT = 44, + TFMT4_32_32_UINT = 57, + TFMT4_32_32_32_32_UINT = 64, + TFMT4_32_SINT = 45, + TFMT4_32_32_SINT = 58, + TFMT4_32_32_32_32_SINT = 65, TFMT4_16_FLOAT = 20, TFMT4_16_16_FLOAT = 40, TFMT4_16_16_16_16_FLOAT = 53, TFMT4_32_FLOAT = 43, TFMT4_32_32_FLOAT = 56, TFMT4_32_32_32_32_FLOAT = 63, + TFMT4_9_9_9_E5_FLOAT = 32, + TFMT4_11_11_10_FLOAT = 37, + TFMT4_ATC_RGB = 100, + TFMT4_ATC_RGBA_EXPLICIT = 101, + TFMT4_ATC_RGBA_INTERPOLATED = 102, + TFMT4_ETC2_RG11_UNORM = 103, + TFMT4_ETC2_RG11_SNORM = 104, + TFMT4_ETC2_R11_UNORM = 105, + TFMT4_ETC2_R11_SNORM = 106, + TFMT4_ETC1 = 107, + TFMT4_ETC2_RGB8 = 108, + TFMT4_ETC2_RGBA8 = 109, + TFMT4_ETC2_RGB8A1 = 110, + TFMT4_ASTC_4x4 = 111, + TFMT4_ASTC_5x4 = 112, + TFMT4_ASTC_5x5 = 113, + TFMT4_ASTC_6x5 = 114, + TFMT4_ASTC_6x6 = 115, + TFMT4_ASTC_8x5 = 116, + TFMT4_ASTC_8x6 = 117, + TFMT4_ASTC_8x8 = 118, + TFMT4_ASTC_10x5 = 119, + TFMT4_ASTC_10x6 = 120, + TFMT4_ASTC_10x8 = 121, + TFMT4_ASTC_10x10 = 122, + TFMT4_ASTC_12x10 = 123, + TFMT4_ASTC_12x12 = 124, }; enum a4xx_tex_fetchsize { @@ -288,13 +370,16 @@ static inline uint32_t A4XX_RB_MSAA_CONTROL_SAMPLES(uint32_t val) #define A4XX_RB_RENDER_CONTROL2_YCOORD 0x00000002 #define A4XX_RB_RENDER_CONTROL2_ZCOORD 0x00000004 #define A4XX_RB_RENDER_CONTROL2_WCOORD 0x00000008 +#define A4XX_RB_RENDER_CONTROL2_SAMPLEMASK 0x00000010 #define A4XX_RB_RENDER_CONTROL2_FACENESS 0x00000020 +#define A4XX_RB_RENDER_CONTROL2_SAMPLEID 0x00000040 #define A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES__MASK 0x00000380 #define A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES__SHIFT 7 static inline uint32_t A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES(uint32_t val) { return ((val) << A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES__SHIFT) & A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES__MASK; } +#define A4XX_RB_RENDER_CONTROL2_SAMPLEID_HR 0x00000800 #define A4XX_RB_RENDER_CONTROL2_VARYING 0x00001000 static inline uint32_t REG_A4XX_RB_MRT(uint32_t i0) { return 0x000020a4 + 0x5*i0; } @@ -337,6 +422,7 @@ static inline uint32_t A4XX_RB_MRT_BUF_INFO_COLOR_SWAP(enum a3xx_color_swap val) { return ((val) << A4XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT) & A4XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK; } +#define A4XX_RB_MRT_BUF_INFO_COLOR_SRGB 0x00002000 #define A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__MASK 0x007fc000 #define A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__SHIFT 14 static inline uint32_t A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(uint32_t val) @@ -464,7 +550,12 @@ static inline uint32_t A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(enum adreno_compare } #define REG_A4XX_RB_FS_OUTPUT 0x000020f9 -#define A4XX_RB_FS_OUTPUT_ENABLE_BLEND 0x00000001 +#define A4XX_RB_FS_OUTPUT_ENABLE_BLEND__MASK 0x000000ff +#define A4XX_RB_FS_OUTPUT_ENABLE_BLEND__SHIFT 0 +static inline uint32_t A4XX_RB_FS_OUTPUT_ENABLE_BLEND(uint32_t val) +{ + return ((val) << A4XX_RB_FS_OUTPUT_ENABLE_BLEND__SHIFT) & A4XX_RB_FS_OUTPUT_ENABLE_BLEND__MASK; +} #define A4XX_RB_FS_OUTPUT_FAST_CLEAR 0x00000100 #define A4XX_RB_FS_OUTPUT_SAMPLE_MASK__MASK 0xffff0000 #define A4XX_RB_FS_OUTPUT_SAMPLE_MASK__SHIFT 16 @@ -473,12 +564,54 @@ static inline uint32_t A4XX_RB_FS_OUTPUT_SAMPLE_MASK(uint32_t val) return ((val) << A4XX_RB_FS_OUTPUT_SAMPLE_MASK__SHIFT) & A4XX_RB_FS_OUTPUT_SAMPLE_MASK__MASK; } -#define REG_A4XX_RB_RENDER_CONTROL3 0x000020fb -#define A4XX_RB_RENDER_CONTROL3_COMPONENT_ENABLE__MASK 0x0000001f -#define A4XX_RB_RENDER_CONTROL3_COMPONENT_ENABLE__SHIFT 0 -static inline uint32_t A4XX_RB_RENDER_CONTROL3_COMPONENT_ENABLE(uint32_t val) +#define REG_A4XX_RB_RENDER_COMPONENTS 0x000020fb +#define A4XX_RB_RENDER_COMPONENTS_RT0__MASK 0x0000000f +#define A4XX_RB_RENDER_COMPONENTS_RT0__SHIFT 0 +static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT0(uint32_t val) +{ + return ((val) << A4XX_RB_RENDER_COMPONENTS_RT0__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT0__MASK; +} +#define A4XX_RB_RENDER_COMPONENTS_RT1__MASK 0x000000f0 +#define A4XX_RB_RENDER_COMPONENTS_RT1__SHIFT 4 +static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT1(uint32_t val) +{ + return ((val) << A4XX_RB_RENDER_COMPONENTS_RT1__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT1__MASK; +} +#define A4XX_RB_RENDER_COMPONENTS_RT2__MASK 0x00000f00 +#define A4XX_RB_RENDER_COMPONENTS_RT2__SHIFT 8 +static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT2(uint32_t val) +{ + return ((val) << A4XX_RB_RENDER_COMPONENTS_RT2__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT2__MASK; +} +#define A4XX_RB_RENDER_COMPONENTS_RT3__MASK 0x0000f000 +#define A4XX_RB_RENDER_COMPONENTS_RT3__SHIFT 12 +static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT3(uint32_t val) +{ + return ((val) << A4XX_RB_RENDER_COMPONENTS_RT3__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT3__MASK; +} +#define A4XX_RB_RENDER_COMPONENTS_RT4__MASK 0x000f0000 +#define A4XX_RB_RENDER_COMPONENTS_RT4__SHIFT 16 +static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT4(uint32_t val) +{ + return ((val) << A4XX_RB_RENDER_COMPONENTS_RT4__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT4__MASK; +} +#define A4XX_RB_RENDER_COMPONENTS_RT5__MASK 0x00f00000 +#define A4XX_RB_RENDER_COMPONENTS_RT5__SHIFT 20 +static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT5(uint32_t val) +{ + return ((val) << A4XX_RB_RENDER_COMPONENTS_RT5__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT5__MASK; +} +#define A4XX_RB_RENDER_COMPONENTS_RT6__MASK 0x0f000000 +#define A4XX_RB_RENDER_COMPONENTS_RT6__SHIFT 24 +static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT6(uint32_t val) { - return ((val) << A4XX_RB_RENDER_CONTROL3_COMPONENT_ENABLE__SHIFT) & A4XX_RB_RENDER_CONTROL3_COMPONENT_ENABLE__MASK; + return ((val) << A4XX_RB_RENDER_COMPONENTS_RT6__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT6__MASK; +} +#define A4XX_RB_RENDER_COMPONENTS_RT7__MASK 0xf0000000 +#define A4XX_RB_RENDER_COMPONENTS_RT7__SHIFT 28 +static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT7(uint32_t val) +{ + return ((val) << A4XX_RB_RENDER_COMPONENTS_RT7__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT7__MASK; } #define REG_A4XX_RB_COPY_CONTROL 0x000020fc @@ -562,7 +695,12 @@ static inline uint32_t A4XX_RB_COPY_DEST_INFO_TILE(enum a4xx_tile_mode val) } #define REG_A4XX_RB_FS_OUTPUT_REG 0x00002100 -#define A4XX_RB_FS_OUTPUT_REG_COLOR_PIPE_ENABLE 0x00000001 +#define A4XX_RB_FS_OUTPUT_REG_MRT__MASK 0x0000000f +#define A4XX_RB_FS_OUTPUT_REG_MRT__SHIFT 0 +static inline uint32_t A4XX_RB_FS_OUTPUT_REG_MRT(uint32_t val) +{ + return ((val) << A4XX_RB_FS_OUTPUT_REG_MRT__SHIFT) & A4XX_RB_FS_OUTPUT_REG_MRT__MASK; +} #define A4XX_RB_FS_OUTPUT_REG_FRAG_WRITES_Z 0x00000020 #define REG_A4XX_RB_DEPTH_CONTROL 0x00002101 @@ -1029,6 +1167,9 @@ static inline uint32_t REG_A4XX_CP_SCRATCH_REG(uint32_t i0) { return 0x00000578 #define A4XX_SP_SP_CTRL_REG_BINNING_PASS 0x00080000 #define REG_A4XX_SP_INSTR_CACHE_CTRL 0x000022c1 +#define A4XX_SP_INSTR_CACHE_CTRL_VS_BUFFER 0x00000080 +#define A4XX_SP_INSTR_CACHE_CTRL_FS_BUFFER 0x00000100 +#define A4XX_SP_INSTR_CACHE_CTRL_INSTR_BUFFER 0x00000400 #define REG_A4XX_SP_VS_CTRL_REG0 0x000022c4 #define A4XX_SP_VS_CTRL_REG0_THREADMODE__MASK 0x00000001 @@ -1248,6 +1389,12 @@ static inline uint32_t A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val) #define REG_A4XX_SP_FS_LENGTH_REG 0x000022ef #define REG_A4XX_SP_FS_OUTPUT_REG 0x000022f0 +#define A4XX_SP_FS_OUTPUT_REG_MRT__MASK 0x0000000f +#define A4XX_SP_FS_OUTPUT_REG_MRT__SHIFT 0 +static inline uint32_t A4XX_SP_FS_OUTPUT_REG_MRT(uint32_t val) +{ + return ((val) << A4XX_SP_FS_OUTPUT_REG_MRT__SHIFT) & A4XX_SP_FS_OUTPUT_REG_MRT__MASK; +} #define A4XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE 0x00000080 #define A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID__MASK 0x0000ff00 #define A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID__SHIFT 8 @@ -1255,6 +1402,12 @@ static inline uint32_t A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID(uint32_t val) { return ((val) << A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID__SHIFT) & A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID__MASK; } +#define A4XX_SP_FS_OUTPUT_REG_SAMPLEMASK_REGID__MASK 0xff000000 +#define A4XX_SP_FS_OUTPUT_REG_SAMPLEMASK_REGID__SHIFT 24 +static inline uint32_t A4XX_SP_FS_OUTPUT_REG_SAMPLEMASK_REGID(uint32_t val) +{ + return ((val) << A4XX_SP_FS_OUTPUT_REG_SAMPLEMASK_REGID__SHIFT) & A4XX_SP_FS_OUTPUT_REG_SAMPLEMASK_REGID__MASK; +} static inline uint32_t REG_A4XX_SP_FS_MRT(uint32_t i0) { return 0x000022f1 + 0x1*i0; } @@ -1315,6 +1468,12 @@ static inline uint32_t A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val) return ((val) << A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK; } +#define REG_A4XX_SP_GS_OBJ_START 0x0000235c + +#define REG_A4XX_SP_GS_PVT_MEM_PARAM 0x0000235d + +#define REG_A4XX_SP_GS_PVT_MEM_ADDR 0x0000235e + #define REG_A4XX_SP_GS_LENGTH_REG 0x00002360 #define REG_A4XX_VPC_DEBUG_RAM_SEL 0x00000e60 @@ -1699,6 +1858,14 @@ static inline uint32_t A4XX_GRAS_SU_POLY_OFFSET_OFFSET(float val) return ((fui(val)) << A4XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT) & A4XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK; } +#define REG_A4XX_GRAS_SU_POLY_OFFSET_CLAMP 0x00002076 +#define A4XX_GRAS_SU_POLY_OFFSET_CLAMP__MASK 0xffffffff +#define A4XX_GRAS_SU_POLY_OFFSET_CLAMP__SHIFT 0 +static inline uint32_t A4XX_GRAS_SU_POLY_OFFSET_CLAMP(float val) +{ + return ((fui(val)) << A4XX_GRAS_SU_POLY_OFFSET_CLAMP__SHIFT) & A4XX_GRAS_SU_POLY_OFFSET_CLAMP__MASK; +} + #define REG_A4XX_GRAS_DEPTH_CONTROL 0x00002077 #define A4XX_GRAS_DEPTH_CONTROL_FORMAT__MASK 0x00000003 #define A4XX_GRAS_DEPTH_CONTROL_FORMAT__SHIFT 0 @@ -1905,6 +2072,18 @@ static inline uint32_t A4XX_HLSQ_CONTROL_2_REG_FACEREGID(uint32_t val) { return ((val) << A4XX_HLSQ_CONTROL_2_REG_FACEREGID__SHIFT) & A4XX_HLSQ_CONTROL_2_REG_FACEREGID__MASK; } +#define A4XX_HLSQ_CONTROL_2_REG_SAMPLEID_REGID__MASK 0x0003fc00 +#define A4XX_HLSQ_CONTROL_2_REG_SAMPLEID_REGID__SHIFT 10 +static inline uint32_t A4XX_HLSQ_CONTROL_2_REG_SAMPLEID_REGID(uint32_t val) +{ + return ((val) << A4XX_HLSQ_CONTROL_2_REG_SAMPLEID_REGID__SHIFT) & A4XX_HLSQ_CONTROL_2_REG_SAMPLEID_REGID__MASK; +} +#define A4XX_HLSQ_CONTROL_2_REG_SAMPLEMASK_REGID__MASK 0x03fc0000 +#define A4XX_HLSQ_CONTROL_2_REG_SAMPLEMASK_REGID__SHIFT 18 +static inline uint32_t A4XX_HLSQ_CONTROL_2_REG_SAMPLEMASK_REGID(uint32_t val) +{ + return ((val) << A4XX_HLSQ_CONTROL_2_REG_SAMPLEMASK_REGID__SHIFT) & A4XX_HLSQ_CONTROL_2_REG_SAMPLEMASK_REGID__MASK; +} #define REG_A4XX_HLSQ_CONTROL_3_REG 0x000023c3 #define A4XX_HLSQ_CONTROL_3_REG_REGID__MASK 0x000000ff @@ -2072,6 +2251,18 @@ static inline uint32_t A4XX_PC_PRIM_VTX_CNTL_VAROUT(uint32_t val) #define REG_A4XX_PC_RESTART_INDEX 0x000021c6 #define REG_A4XX_PC_GS_PARAM 0x000021e5 +#define A4XX_PC_GS_PARAM_MAX_VERTICES__MASK 0x000003ff +#define A4XX_PC_GS_PARAM_MAX_VERTICES__SHIFT 0 +static inline uint32_t A4XX_PC_GS_PARAM_MAX_VERTICES(uint32_t val) +{ + return ((val) << A4XX_PC_GS_PARAM_MAX_VERTICES__SHIFT) & A4XX_PC_GS_PARAM_MAX_VERTICES__MASK; +} +#define A4XX_PC_GS_PARAM_PRIMTYPE__MASK 0x01800000 +#define A4XX_PC_GS_PARAM_PRIMTYPE__SHIFT 23 +static inline uint32_t A4XX_PC_GS_PARAM_PRIMTYPE(enum adreno_pa_su_sc_draw val) +{ + return ((val) << A4XX_PC_GS_PARAM_PRIMTYPE__SHIFT) & A4XX_PC_GS_PARAM_PRIMTYPE__MASK; +} #define REG_A4XX_PC_HS_PARAM 0x000021e7 @@ -2219,6 +2410,7 @@ static inline uint32_t A4XX_TEX_SAMP_1_MIN_LOD(float val) #define REG_A4XX_TEX_CONST_0 0x00000000 #define A4XX_TEX_CONST_0_TILED 0x00000001 +#define A4XX_TEX_CONST_0_SRGB 0x00000004 #define A4XX_TEX_CONST_0_SWIZ_X__MASK 0x00000070 #define A4XX_TEX_CONST_0_SWIZ_X__SHIFT 4 static inline uint32_t A4XX_TEX_CONST_0_SWIZ_X(enum a4xx_tex_swiz val) diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_blend.c b/src/gallium/drivers/freedreno/a4xx/fd4_blend.c index b2d4949..396caa5 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_blend.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_blend.c @@ -115,7 +115,7 @@ fd4_blend_state_create(struct pipe_context *pctx, A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE | A4XX_RB_MRT_CONTROL_BLEND | A4XX_RB_MRT_CONTROL_BLEND2; - so->rb_fs_output |= A4XX_RB_FS_OUTPUT_ENABLE_BLEND; + so->rb_fs_output |= A4XX_RB_FS_OUTPUT_ENABLE_BLEND(1); } if (reads_dest) diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c index c315a47..bae55dc 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -742,8 +742,8 @@ fd4_emit_restore(struct fd_context *ctx) OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT, 1); OUT_RING(ring, A4XX_RB_FS_OUTPUT_SAMPLE_MASK(0xffff)); - OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL3, 1); - OUT_RING(ring, A4XX_RB_RENDER_CONTROL3_COMPONENT_ENABLE(0xf)); + OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1); + OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(0xf)); OUT_PKT0(ring, REG_A4XX_GRAS_CLEAR_CNTL, 1); OUT_RING(ring, A4XX_GRAS_CLEAR_CNTL_NOT_FASTCLEAR); diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/src/gallium/drivers/freedreno/a4xx/fd4_program.c index 015f6c8..7453219 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_program.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c @@ -392,7 +392,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit) A4XX_RB_RENDER_CONTROL2_YCOORD)); OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT_REG, 1); - OUT_RING(ring, A4XX_RB_FS_OUTPUT_REG_COLOR_PIPE_ENABLE | + OUT_RING(ring, A4XX_RB_FS_OUTPUT_REG_MRT(1) | COND(s[FS].v->writes_pos, A4XX_RB_FS_OUTPUT_REG_FRAG_WRITES_Z)); OUT_PKT0(ring, REG_A4XX_SP_FS_OUTPUT_REG, 1); diff --git a/src/gallium/drivers/freedreno/adreno_common.xml.h b/src/gallium/drivers/freedreno/adreno_common.xml.h index 163ac54..174b495 100644 --- a/src/gallium/drivers/freedreno/adreno_common.xml.h +++ b/src/gallium/drivers/freedreno/adreno_common.xml.h @@ -12,9 +12,9 @@ The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27) - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2014-06-02 15:21:30) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2014-11-13 22:44:30) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15085 bytes, from 2014-12-20 21:49:41) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 64771 bytes, from 2015-03-15 21:55:57) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 51942 bytes, from 2015-02-24 17:14:02) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14748 bytes, from 2015-04-12 15:01:13) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 66709 bytes, from 2015-04-12 18:16:35) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 57486 bytes, from 2015-04-12 18:10:00) Copyright (C) 2013-2014 by the following authors: - Rob Clark (robclark) diff --git a/src/gallium/drivers/freedreno/adreno_pm4.xml.h b/src/gallium/drivers/freedreno/adreno_pm4.xml.h index 05afc66..a3d5fff 100644 --- a/src/gallium/drivers/freedreno/adreno_pm4.xml.h +++ b/src/gallium/drivers/freedreno/adreno_pm4.xml.h @@ -12,11 +12,11 @@ The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27) - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2014-06-02 15:21:30) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2014-11-13 22:44:30) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15085 bytes, from 2014-12-20 21:49:41) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 64771 bytes, from 2015-03-15 21:55:57) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 51942 bytes, from 2015-02-24 17:14:02) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14748 bytes, from 2015-04-12 15:01:13) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 66709 bytes, from 2015-04-12 18:16:35) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 57486 bytes, from 2015-04-12 18:10:00) -Copyright (C) 2013-2014 by the following authors: +Copyright (C) 2013-2015 by the following authors: - Rob Clark (robclark) Permission is hereby granted, free of charge, to any person obtaining @@ -76,16 +76,10 @@ enum pc_di_primtype { DI_PT_LINELOOP = 7, DI_PT_RECTLIST = 8, DI_PT_POINTLIST_A3XX = 9, - DI_PT_QUADLIST = 13, - DI_PT_QUADSTRIP = 14, - DI_PT_POLYGON = 15, - DI_PT_2D_COPY_RECT_LIST_V0 = 16, - DI_PT_2D_COPY_RECT_LIST_V1 = 17, - DI_PT_2D_COPY_RECT_LIST_V2 = 18, - DI_PT_2D_COPY_RECT_LIST_V3 = 19, - DI_PT_2D_FILL_RECT_LIST = 20, - DI_PT_2D_LINE_STRIP = 21, - DI_PT_2D_TRI_STRIP = 22, + DI_PT_LINE_ADJ = 10, + DI_PT_LINESTRIP_ADJ = 11, + DI_PT_TRI_ADJ = 12, + DI_PT_TRISTRIP_ADJ = 13, }; enum pc_di_src_sel { @@ -192,6 +186,7 @@ enum adreno_state_block { SB_FRAG_TEX = 2, SB_FRAG_MIPADDR = 3, SB_VERT_SHADER = 4, + SB_GEOM_SHADER = 5, SB_FRAG_SHADER = 6, }; From robclark at kemper.freedesktop.org Fri Apr 17 15:52:46 2015 From: robclark at kemper.freedesktop.org (Rob Clark) Date: Fri, 17 Apr 2015 08:52:46 -0700 (PDT) Subject: Mesa (master): freedreno/ir3/nir: UBO support Message-ID: <20150417155246.CE6AB7626E@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 57f0d3b3c6ae3b9f79a03517410b8dbfab0382c6 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=57f0d3b3c6ae3b9f79a03517410b8dbfab0382c6 Author: Rob Clark Date: Sun Apr 12 09:47:40 2015 -0400 freedreno/ir3/nir: UBO support Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/ir3/ir3.h | 1 + .../drivers/freedreno/ir3/ir3_compiler_nir.c | 51 ++++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h index 1a3deb4..c0a14a0 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.h +++ b/src/gallium/drivers/freedreno/ir3/ir3.h @@ -961,6 +961,7 @@ ir3_SAM(struct ir3_block *block, opc_t opc, type_t type, /* cat6 instructions: */ INSTR2(6, LDLV) +INSTR2(6, LDG) /* ************************************************************************* */ /* split this out or find some helper to use.. like main/bitset.h.. */ diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index f7ea879..13ae7c2 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -975,6 +975,53 @@ emit_alu(struct ir3_compile *ctx, nir_alu_instr *alu) } } +/* handles direct/indirect UBO reads: */ +static void +emit_intrinsic_load_ubo(struct ir3_compile *ctx, nir_intrinsic_instr *intr, + struct ir3_instruction **dst) +{ + struct ir3_block *b = ctx->block; + struct ir3_instruction *addr, *src0, *src1; + /* UBO addresses are the first driver params: */ + unsigned ubo = regid(ctx->so->first_driver_param, 0); + unsigned off = intr->const_index[0]; + + /* First src is ubo index, which could either be an immed or not: */ + src0 = get_src(ctx, &intr->src[0])[0]; + if (is_same_type_mov(src0) && + (src0->regs[1]->flags & IR3_REG_IMMED)) { + addr = create_uniform(ctx, ubo + src0->regs[1]->iim_val); + } else { + addr = create_uniform_indirect(ctx, ubo, get_addr(ctx, src0)); + } + + if (intr->intrinsic == nir_intrinsic_load_ubo_indirect) { + /* For load_ubo_indirect, second src is indirect offset: */ + src1 = get_src(ctx, &intr->src[1])[0]; + + /* and add offset to addr: */ + addr = ir3_ADD_S(b, addr, 0, src1, 0); + } + + /* if offset is to large to encode in the ldg, split it out: */ + if ((off + (intr->num_components * 4)) > 1024) { + /* split out the minimal amount to improve the odds that + * cp can fit the immediate in the add.s instruction: + */ + unsigned off2 = off + (intr->num_components * 4) - 1024; + addr = ir3_ADD_S(b, addr, 0, create_immed(b, off2), 0); + off -= off2; + } + + for (int i = 0; i < intr->num_components; i++) { + struct ir3_instruction *load = + ir3_LDG(b, addr, 0, create_immed(b, 1), 0); + load->cat6.type = TYPE_U32; + load->cat6.offset = off + i * 4; /* byte offset */ + dst[i] = load; + } +} + /* handles array reads: */ static void emit_intrinisic_load_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr, @@ -1124,6 +1171,10 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) get_addr(ctx, src[0])); } break; + case nir_intrinsic_load_ubo: + case nir_intrinsic_load_ubo_indirect: + emit_intrinsic_load_ubo(ctx, intr, dst); + break; case nir_intrinsic_load_input: compile_assert(ctx, intr->const_index[1] == 1); for (int i = 0; i < intr->num_components; i++) { From robclark at kemper.freedesktop.org Fri Apr 17 15:52:46 2015 From: robclark at kemper.freedesktop.org (Rob Clark) Date: Fri, 17 Apr 2015 08:52:46 -0700 (PDT) Subject: Mesa (master): freedreno/a4xx: support for large shaders Message-ID: <20150417155246.E4B727626E@kemper.freedesktop.org> Module: Mesa Branch: master Commit: e5e11b5baf26e175f802c8078db92fd8492aa29d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e5e11b5baf26e175f802c8078db92fd8492aa29d Author: Rob Clark Date: Sun Apr 12 12:58:52 2015 -0400 freedreno/a4xx: support for large shaders Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/a4xx/fd4_program.c | 29 +++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/src/gallium/drivers/freedreno/a4xx/fd4_program.c index 7453219..9c4a7d9 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_program.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c @@ -184,7 +184,24 @@ setup_stages(struct fd4_emit *emit, struct stage *s) * space and FS taking entire remaining space. We probably don't * need to do that the same way, but for now mimic what the blob * does to make it easier to diff against register values from blob + * + * NOTE: if VS.instrlen + FS.instrlen > 64, then one or both shaders + * is run from external memory. */ + if ((s[VS].instrlen + s[FS].instrlen) > 64) { + /* prioritize FS for internal memory: */ + if (s[FS].instrlen < 64) { + /* if FS can fit, kick VS out to external memory: */ + s[VS].instrlen = 0; + } else if (s[VS].instrlen < 64) { + /* otherwise if VS can fit, kick out FS: */ + s[FS].instrlen = 0; + } else { + /* neither can fit, run both from external memory: */ + s[VS].instrlen = 0; + s[FS].instrlen = 0; + } + } s[VS].constlen = 66; s[FS].constlen = 128 - s[VS].constlen; s[VS].instroff = 0; @@ -279,7 +296,11 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit) COND(emit->key.binning_pass, A4XX_SP_SP_CTRL_REG_BINNING_PASS)); OUT_PKT0(ring, REG_A4XX_SP_INSTR_CACHE_CTRL, 1); - OUT_RING(ring, 0x1c3); /* XXX SP_INSTR_CACHE_CTRL */ + OUT_RING(ring, 0x7f | /* XXX */ + COND(s[VS].instrlen, A4XX_SP_INSTR_CACHE_CTRL_VS_BUFFER) | + COND(s[FS].instrlen, A4XX_SP_INSTR_CACHE_CTRL_FS_BUFFER) | + COND(s[VS].instrlen && s[FS].instrlen, + A4XX_SP_INSTR_CACHE_CTRL_INSTR_BUFFER)); OUT_PKT0(ring, REG_A4XX_SP_VS_LENGTH_REG, 1); OUT_RING(ring, s[VS].v->instrlen); /* SP_VS_LENGTH_REG */ @@ -486,10 +507,12 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit) OUT_RING(ring, s[FS].v->shader->vpsrepl[i]); /* VPC_VARYING_PS_REPL[i] */ } - emit_shader(ring, s[VS].v); + if (s[VS].instrlen) + emit_shader(ring, s[VS].v); if (!emit->key.binning_pass) - emit_shader(ring, s[FS].v); + if (s[FS].instrlen) + emit_shader(ring, s[FS].v); } /* hack.. until we figure out how to deal w/ vpsrepl properly.. */ From robclark at kemper.freedesktop.org Fri Apr 17 15:52:46 2015 From: robclark at kemper.freedesktop.org (Rob Clark) Date: Fri, 17 Apr 2015 08:52:46 -0700 (PDT) Subject: Mesa (master): gallium/ttn: add support for TXL2 Message-ID: <20150417155246.8C9707626F@kemper.freedesktop.org> Module: Mesa Branch: master Commit: a3cce7a38eb57b6845ee47c2aeeae7778c91be76 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a3cce7a38eb57b6845ee47c2aeeae7778c91be76 Author: Rob Clark Date: Sat Apr 11 11:37:12 2015 -0400 gallium/ttn: add support for TXL2 Signed-off-by: Rob Clark Reviewed-by: Eric Anholt --- src/gallium/auxiliary/nir/tgsi_to_nir.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index 7312e54..42671c8 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -1001,6 +1001,11 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src) op = nir_texop_txl; num_srcs = 2; break; + case TGSI_OPCODE_TXL2: + op = nir_texop_txl; + num_srcs = 2; + samp = 2; + break; case TGSI_OPCODE_TXF: op = nir_texop_txf; num_srcs = 1; @@ -1082,6 +1087,12 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src) src_number++; } + if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXL2) { + instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[1], X)); + instr->src[src_number].src_type = nir_tex_src_lod; + src_number++; + } + if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXD) { instr->src[src_number].src = nir_src_for_ssa(nir_swizzle(b, src[1], SWIZ(X, Y, Z, W), From robclark at kemper.freedesktop.org Fri Apr 17 15:52:46 2015 From: robclark at kemper.freedesktop.org (Rob Clark) Date: Fri, 17 Apr 2015 08:52:46 -0700 (PDT) Subject: Mesa (master): freedreno/ir3/nir: lower if/else Message-ID: <20150417155246.EDF9F7626E@kemper.freedesktop.org> Module: Mesa Branch: master Commit: efbf14e8936384ab1d243afbe3fa9bb0f40e3898 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=efbf14e8936384ab1d243afbe3fa9bb0f40e3898 Author: Rob Clark Date: Wed Apr 15 10:16:31 2015 -0400 freedreno/ir3/nir: lower if/else For now, completely flatten if/else blocks. That will almost certainly change once we have flow control. Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/Makefile.sources | 2 + src/gallium/drivers/freedreno/freedreno_screen.c | 4 - src/gallium/drivers/freedreno/ir3/ir3_compiler.h | 6 +- .../drivers/freedreno/ir3/ir3_compiler_nir.c | 3 +- src/gallium/drivers/freedreno/ir3/ir3_nir.h | 36 +++ .../drivers/freedreno/ir3/ir3_nir_lower_if_else.c | 338 ++++++++++++++++++++ 6 files changed, 381 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/freedreno/Makefile.sources b/src/gallium/drivers/freedreno/Makefile.sources index 3224efc..a565a9c 100644 --- a/src/gallium/drivers/freedreno/Makefile.sources +++ b/src/gallium/drivers/freedreno/Makefile.sources @@ -130,6 +130,8 @@ ir3_SOURCES := \ ir3/ir3_group.c \ ir3/ir3.h \ ir3/ir3_legalize.c \ + ir3/ir3_nir.h \ + ir3/ir3_nir_lower_if_else.c \ ir3/ir3_ra.c \ ir3/ir3_sched.c \ ir3/ir3_shader.c \ diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index fda60ed..15ae287 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -346,10 +346,6 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: return 16384; case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: - /* for now, let someone else flatten if/else when using NIR: */ - if ((fd_mesa_debug & FD_DBG_NIR) && - (is_a3xx(screen) || is_a4xx(screen))) - return 0; return 8; /* XXX */ case PIPE_SHADER_CAP_MAX_INPUTS: case PIPE_SHADER_CAP_MAX_OUTPUTS: diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler.h b/src/gallium/drivers/freedreno/ir3/ir3_compiler.h index ed9637b..9213386 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler.h @@ -26,8 +26,8 @@ * Rob Clark */ -#ifndef FD3_COMPILER_H_ -#define FD3_COMPILER_H_ +#ifndef IR3_COMPILER_H_ +#define IR3_COMPILER_H_ #include "ir3_shader.h" @@ -39,4 +39,4 @@ int ir3_compile_shader(struct ir3_shader_variant *so, const struct tgsi_token *tokens, struct ir3_shader_key key, bool cp); -#endif /* FD3_COMPILER_H_ */ +#endif /* IR3_COMPILER_H_ */ diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 13ae7c2..0b42cd7 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -42,6 +42,7 @@ #include "ir3_compiler.h" #include "ir3_shader.h" +#include "ir3_nir.h" #include "instr-a3xx.h" #include "ir3.h" @@ -149,7 +150,7 @@ static struct nir_shader *to_nir(const struct tgsi_token *tokens) progress |= nir_copy_prop(s); progress |= nir_opt_dce(s); progress |= nir_opt_cse(s); - progress |= nir_opt_peephole_select(s); + progress |= ir3_nir_lower_if_else(s); progress |= nir_opt_algebraic(s); progress |= nir_opt_constant_folding(s); diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir.h b/src/gallium/drivers/freedreno/ir3/ir3_nir.h new file mode 100644 index 0000000..f3d3075 --- /dev/null +++ b/src/gallium/drivers/freedreno/ir3/ir3_nir.h @@ -0,0 +1,36 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2015 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#ifndef IR3_NIR_H_ +#define IR3_NIR_H_ + +#include "glsl/nir/nir.h" + +bool ir3_nir_lower_if_else(nir_shader *shader); + +#endif /* IR3_NIR_H_ */ diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c b/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c new file mode 100644 index 0000000..ae36019 --- /dev/null +++ b/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c @@ -0,0 +1,338 @@ +/* + * Copyright ? 2014 Intel Corporation + * Copyright ? 2015 Red Hat + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason at jlekstrand.net) + * Rob Clark (robclark at freedesktop.org) + * + */ + +#include "ir3_nir.h" +#include "glsl/nir/nir_builder.h" + +/* Based on nir_opt_peephole_select, and hacked up to more aggressively + * flatten anything that can be flattened + * + * This *might* be something that other drivers could use. On the other + * hand, I think most other hw has predicated instructions or similar + * to select which side of if/else writes back result (and therefore + * not having to assign unique registers to both sides of the if/else. + * (And hopefully those drivers don't also have crazy scheduling reqs + * and can more easily do this in their backend.) + * + * TODO eventually when we have proper flow control in the backend: + * + * + Probably weight differently normal ALUs vs SFUs (cos/rcp/exp) + * since executing extra SFUs for the branch-not-taken path will + * generally be much more expensive. + * + * Possibly what constitutes an ALU vs SFU differs between hw + * backends.. but that seems doubtful. + * + * + Account for texture fetch and memory accesses (incl UBOs) + * since these will be more expensive.. + * + * + When if-condition is const (or uniform) or we have some way + * to know that all threads in the warp take the same branch + * then we should prefer to not flatten the if/else.. + */ + +struct lower_state { + nir_builder b; + void *mem_ctx; + bool progress; +}; + +static bool +valid_dest(nir_block *block, nir_dest *dest) +{ + /* It must be SSA */ + if (!dest->is_ssa) + return false; + + /* We only lower blocks that do not contain other blocks + * (so this is run iteratively in a loop). Therefore if + * we get this far, it should not have any if_uses: + */ + assert(dest->ssa.if_uses->entries == 0); + + /* The only uses of this definition must be phi's in the + * successor or in the current block + */ + struct set_entry *entry; + set_foreach(dest->ssa.uses, entry) { + const nir_instr *dest_instr = entry->key; + if (dest_instr->block == block) + continue; + if ((dest_instr->type == nir_instr_type_phi) && + (dest_instr->block == block->successors[0])) + continue; + return false; + } + + return true; +} + +static bool +block_check_for_allowed_instrs(nir_block *block) +{ + nir_foreach_instr(block, instr) { + switch (instr->type) { + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + const nir_intrinsic_info *info = + &nir_intrinsic_infos[intr->intrinsic]; + + switch (intr->intrinsic) { + case nir_intrinsic_discard_if: + /* to simplify things, we want discard_if src in ssa: */ + if (!intr->src[0].is_ssa) + return false; + /* fallthrough */ + case nir_intrinsic_discard: + /* discard/discard_if can be reordered, but only + * with some special care + */ + break; + case nir_intrinsic_store_output: + /* TODO technically, if both if and else store + * the same output, we can hoist that out to + * the end of the block w/ a phi.. + * In practice, the tgsi shaders we already get + * do this for us, so I think we don't need to + */ + default: + if (!(info->flags & NIR_INTRINSIC_CAN_REORDER)) + return false; + } + + break; + } + + case nir_instr_type_tex: { + nir_tex_instr *tex = nir_instr_as_tex(instr); + if (!valid_dest(block, &tex->dest)) + return false; + break; + } + case nir_instr_type_phi: { + nir_phi_instr *phi = nir_instr_as_phi(instr); + if (!valid_dest(block, &phi->dest)) + return false; + break; + } + case nir_instr_type_alu: { + nir_alu_instr *alu = nir_instr_as_alu(instr); + if (!valid_dest(block, &alu->dest.dest)) + return false; + break; + } + + case nir_instr_type_load_const: + case nir_instr_type_ssa_undef: + break; /* always ssa dest */ + + default: + return false; + } + } + + return true; +} + +/* flatten an then or else block: */ +static void +flatten_block(nir_builder *bld, nir_block *if_block, nir_block *prev_block, + nir_ssa_def *condition, bool invert) +{ + nir_foreach_instr_safe(if_block, instr) { + if (instr->type == nir_instr_type_intrinsic) { + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if ((intr->intrinsic == nir_intrinsic_discard) || + (intr->intrinsic == nir_intrinsic_discard_if)) { + nir_ssa_def *discard_cond; + + nir_builder_insert_after_instr(bld, + nir_block_last_instr(prev_block)); + + if (invert) { + condition = nir_inot(bld, condition); + invert = false; + } + + if (intr->intrinsic == nir_intrinsic_discard) { + discard_cond = condition; + } else { + assert(intr->src[0].is_ssa); + /* discard_if gets re-written w/ src and'd: */ + discard_cond = nir_iand(bld, condition, intr->src[0].ssa); + } + + nir_intrinsic_instr *discard_if = + nir_intrinsic_instr_create(bld->shader, + nir_intrinsic_discard_if); + discard_if->src[0] = nir_src_for_ssa(discard_cond); + + nir_instr_insert_after(nir_block_last_instr(prev_block), + &discard_if->instr); + nir_instr_remove(instr); + instr = NULL; + } + } + /* if not an handled specially, just move to prev block: */ + if (instr) { + /* NOTE: exec_node_remove() is safe here (vs nir_instr_remove() + * since we are re-adding the instructin back in to the prev + * block (so no dangling SSA uses) + */ + exec_node_remove(&instr->node); + instr->block = prev_block; + exec_list_push_tail(&prev_block->instr_list, &instr->node); + } + } +} + +static bool +lower_if_else_block(nir_block *block, void *void_state) +{ + struct lower_state *state = void_state; + + /* If the block is empty, then it certainly doesn't have any phi nodes, + * so we can skip it. This also ensures that we do an early skip on the + * end block of the function which isn't actually attached to the CFG. + */ + if (exec_list_is_empty(&block->instr_list)) + return true; + + if (nir_cf_node_is_first(&block->cf_node)) + return true; + + nir_cf_node *prev_node = nir_cf_node_prev(&block->cf_node); + if (prev_node->type != nir_cf_node_if) + return true; + + nir_if *if_stmt = nir_cf_node_as_if(prev_node); + nir_cf_node *then_node = nir_if_first_then_node(if_stmt); + nir_cf_node *else_node = nir_if_first_else_node(if_stmt); + + /* We can only have one block in each side ... */ + if (nir_if_last_then_node(if_stmt) != then_node || + nir_if_last_else_node(if_stmt) != else_node) + return true; + + nir_block *then_block = nir_cf_node_as_block(then_node); + nir_block *else_block = nir_cf_node_as_block(else_node); + + /* ... and those blocks must only contain "allowed" instructions. */ + if (!block_check_for_allowed_instrs(then_block) || + !block_check_for_allowed_instrs(else_block)) + return true; + + /* condition should be ssa too, which simplifies flatten_block: */ + if (!if_stmt->condition.is_ssa) + return true; + + /* At this point, we know that the previous CFG node is an if-then + * statement containing only moves to phi nodes in this block. We can + * just remove that entire CF node and replace all of the phi nodes with + * selects. + */ + + nir_block *prev_block = nir_cf_node_as_block(nir_cf_node_prev(prev_node)); + assert(prev_block->cf_node.type == nir_cf_node_block); + + /* First, we move the remaining instructions from the blocks to the + * block before. There are a few things that need handling specially + * like discard/discard_if. + */ + flatten_block(&state->b, then_block, prev_block, + if_stmt->condition.ssa, false); + flatten_block(&state->b, else_block, prev_block, + if_stmt->condition.ssa, true); + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_phi) + break; + + nir_phi_instr *phi = nir_instr_as_phi(instr); + nir_alu_instr *sel = nir_alu_instr_create(state->mem_ctx, nir_op_bcsel); + nir_src_copy(&sel->src[0].src, &if_stmt->condition, state->mem_ctx); + /* Splat the condition to all channels */ + memset(sel->src[0].swizzle, 0, sizeof sel->src[0].swizzle); + + assert(exec_list_length(&phi->srcs) == 2); + nir_foreach_phi_src(phi, src) { + assert(src->pred == then_block || src->pred == else_block); + assert(src->src.is_ssa); + + unsigned idx = src->pred == then_block ? 1 : 2; + nir_src_copy(&sel->src[idx].src, &src->src, state->mem_ctx); + } + + nir_ssa_dest_init(&sel->instr, &sel->dest.dest, + phi->dest.ssa.num_components, phi->dest.ssa.name); + sel->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1; + + nir_ssa_def_rewrite_uses(&phi->dest.ssa, + nir_src_for_ssa(&sel->dest.dest.ssa), + state->mem_ctx); + + nir_instr_insert_before(&phi->instr, &sel->instr); + nir_instr_remove(&phi->instr); + } + + nir_cf_node_remove(&if_stmt->cf_node); + state->progress = true; + + return true; +} + +static bool +lower_if_else_impl(nir_function_impl *impl) +{ + struct lower_state state; + + state.mem_ctx = ralloc_parent(impl); + state.progress = false; + nir_builder_init(&state.b, impl); + + nir_foreach_block(impl, lower_if_else_block, &state); + + if (state.progress) + nir_metadata_preserve(impl, nir_metadata_none); + + return state.progress; +} + +bool +ir3_nir_lower_if_else(nir_shader *shader) +{ + bool progress = false; + + nir_foreach_overload(shader, overload) { + if (overload->impl) + progress |= lower_if_else_impl(overload->impl); + } + + return progress; +} From robclark at kemper.freedesktop.org Fri Apr 17 15:52:47 2015 From: robclark at kemper.freedesktop.org (Rob Clark) Date: Fri, 17 Apr 2015 08:52:47 -0700 (PDT) Subject: Mesa (master): freedreno/ir3/nir: few little fixes Message-ID: <20150417155247.03BC27626E@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 95e68adcd9f2589ae6d998328c72b84ffc49edc7 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=95e68adcd9f2589ae6d998328c72b84ffc49edc7 Author: Rob Clark Date: Thu Apr 16 15:35:50 2015 -0400 freedreno/ir3/nir: few little fixes isaml needs to scale up coords based on LoD. Also fix bogus bary.f varying # when there are non-bary frag shader inputs. And use sub.s of a positive immediate rather than add.s of negative (since CP is better about figuring out that those can be collapsed into the cat2 instr). Signed-off-by: Rob Clark --- .../drivers/freedreno/ir3/ir3_compiler_nir.c | 49 +++++++++++--------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 0b42cd7..e5f6c2a 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -566,13 +566,13 @@ create_frag_coord(struct ir3_compile *ctx, unsigned comp) * to subtract (integer) 8 and divide by 16 (right- * shift by 4) then convert to float: * - * add.s tmp, src, -8 + * sub.s tmp, src, 8 * shr.b tmp, tmp, 4 * mov.u32f32 dst, tmp * */ - instr = ir3_ADD_S(block, ctx->frag_coord[comp], 0, - create_immed(block, -8), 0); + instr = ir3_SUB_S(block, ctx->frag_coord[comp], 0, + create_immed(block, 8), 0); instr = ir3_SHR_B(block, instr, 0, create_immed(block, 4), 0); instr = ir3_COV(block, instr, TYPE_U32, TYPE_F32); @@ -1381,6 +1381,29 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex) } } + switch (tex->op) { + case nir_texop_tex: opc = OPC_SAM; break; + case nir_texop_txb: opc = OPC_SAMB; break; + case nir_texop_txl: opc = OPC_SAML; break; + case nir_texop_txd: opc = OPC_SAMGQ; break; + case nir_texop_txf: opc = OPC_ISAML; break; + case nir_texop_txf_ms: + case nir_texop_txs: + case nir_texop_lod: + case nir_texop_tg4: + case nir_texop_query_levels: + compile_error(ctx, "Unhandled NIR tex type: %d\n", tex->op); + return; + } + + tex_info(tex, &flags, &coords); + + /* scale up integer coords for TXF based on the LOD */ + if (opc == OPC_ISAML) { + assert(has_lod); + for (i = 0; i < coords; i++) + coord[i] = ir3_SHL_B(b, coord[i], 0, lod, 0); + } /* * lay out the first argument in the proper order: * - actual coordinates first @@ -1392,8 +1415,6 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex) * bias/lod go into the second arg */ - tex_info(tex, &flags, &coords); - /* insert tex coords: */ for (i = 0; i < coords; i++) src0[nsrc0++] = coord[i]; @@ -1450,21 +1471,6 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex) src1[nsrc1++] = lod; } - switch (tex->op) { - case nir_texop_tex: opc = OPC_SAM; break; - case nir_texop_txb: opc = OPC_SAMB; break; - case nir_texop_txl: opc = OPC_SAML; break; - case nir_texop_txd: opc = OPC_SAMGQ; break; - case nir_texop_txf: opc = OPC_ISAML; break; - case nir_texop_txf_ms: - case nir_texop_txs: - case nir_texop_lod: - case nir_texop_tg4: - case nir_texop_query_levels: - compile_error(ctx, "Unhandled NIR tex type: %d\n", tex->op); - return; - } - switch (tex->dest_type) { case nir_type_invalid: case nir_type_float: @@ -1694,7 +1700,8 @@ setup_input(struct ir3_compile *ctx, nir_variable *in) so->inputs[n].bary = true; - instr = create_frag_input(ctx, idx, use_ldlv); + instr = create_frag_input(ctx, + so->inputs[n].inloc + i - 8, use_ldlv); } } else { instr = create_input(ctx->block, NULL, idx); From jekstrand at kemper.freedesktop.org Fri Apr 17 18:08:12 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Fri, 17 Apr 2015 11:08:12 -0700 (PDT) Subject: Mesa (master): i965/fs: Use the source type when looking for UD negations in copy prop Message-ID: <20150417180812.D79797626E@kemper.freedesktop.org> Module: Mesa Branch: master Commit: bb99a58e7710acd19463646c38cdddbd926e89c4 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=bb99a58e7710acd19463646c38cdddbd926e89c4 Author: Jason Ekstrand Date: Fri Apr 3 12:15:48 2015 -0700 i965/fs: Use the source type when looking for UD negations in copy prop There can be problems with floats and conditional modifiers when copy-propagating a negated UD source. The problem arises when a source modifier is applied to a UD value. In this case, a 33-bit representation is internally used. If you do the following: 1: mov foo:UD 7U 2: mov bar:UD -foo:UD 3: mov out:F bar:UD the out register will have the value (float)(unt32_t)-7 which is some very large floating-point number. However, if we allow copy-propagation of the second mov, we get 1: mov foo:UD 7U 3: mov out:f -bar:UD and, since the negation is computed in 33-bits, we get a value of -7.0f which is clearly not the same. This is a similar problem if the instruction has a conditional modifier where the 33-bit value is used in the comparison and not the 32-bit version. Previously, we checked the source to be copied for the negate and then checked the source being propagated to for the type. This isn't quite what we want because we are really just looking for negated UD sources. A check later in the file ensures that both ends of the propagate have the right type so it works. However, if we relax the restriction that both ends of the propagation have the same type, it ends up causing us to bail early in cases we don't want. Signed-off-by: Jason Ekstrand Reviewed-by: Anuj Phogat Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index 764741d..e8d092c 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -307,7 +307,7 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) * instead. See also resolve_ud_negate() and comment in * fs_generator::generate_code. */ - if (inst->src[arg].type == BRW_REGISTER_TYPE_UD && + if (entry->src.type == BRW_REGISTER_TYPE_UD && entry->src.negate) return false; From jekstrand at kemper.freedesktop.org Fri Apr 17 18:08:12 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Fri, 17 Apr 2015 11:08:12 -0700 (PDT) Subject: Mesa (master): nir: Allow abs/neg in select peephole pass. Message-ID: <20150417180812.EB2297626E@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 4dacb212fdcc82def02b8c9233f94caa5a8a3000 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4dacb212fdcc82def02b8c9233f94caa5a8a3000 Author: Matt Turner Date: Thu Apr 2 10:21:16 2015 -0700 nir: Allow abs/neg in select peephole pass. total instructions in shared programs: 4314531 -> 4308949 (-0.13%) instructions in affected programs: 429085 -> 423503 (-1.30%) helped: 1680 HURT: 0 GAINED: 0 LOST: 111 Reviewed-by: Jason Ekstrand --- src/glsl/nir/nir_opt_peephole_select.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/glsl/nir/nir_opt_peephole_select.c b/src/glsl/nir/nir_opt_peephole_select.c index b89451b..f400cfd 100644 --- a/src/glsl/nir/nir_opt_peephole_select.c +++ b/src/glsl/nir/nir_opt_peephole_select.c @@ -84,7 +84,9 @@ block_check_for_allowed_instrs(nir_block *block) case nir_instr_type_alu: { /* It must be a move operation */ nir_alu_instr *mov = nir_instr_as_alu(instr); - if (mov->op != nir_op_fmov && mov->op != nir_op_imov) + if (mov->op != nir_op_fmov && mov->op != nir_op_imov && + mov->op != nir_op_fneg && mov->op != nir_op_ineg && + mov->op != nir_op_fabs && mov->op != nir_op_iabs) return false; /* Can't handle saturate */ From jekstrand at kemper.freedesktop.org Fri Apr 17 18:08:12 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Fri, 17 Apr 2015 11:08:12 -0700 (PDT) Subject: Mesa (master): i965/fs: Change SEL and MOV types as needed to propagate source modifiers Message-ID: <20150417180812.E0D387626F@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 472ef9a02f2e5c5d0caa2809cb736a0f4f0d4693 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=472ef9a02f2e5c5d0caa2809cb736a0f4f0d4693 Author: Jason Ekstrand Date: Fri Apr 3 11:07:47 2015 -0700 i965/fs: Change SEL and MOV types as needed to propagate source modifiers SEL and MOV instructions, as long as they don't have source modifiers, are just copying bits around. This commit adds support to copy propagation to switch the type of a SEL or MOV instruction as needed so that it can propagate source modifiers. This is needed because NIR generates integer SEL and MOV instructions whenver it doesn't know what else to generate. shader-db results with NIR: total FS instructions in shared programs: 4360910 -> 4360186 (-0.02%) FS instructions in affected programs: 59094 -> 58370 (-1.23%) helped: 341 HURT: 0 GAINED: 2 LOST: 0 Signed-off-by: Jason Ekstrand Reviewed-by: Anuj Phogat Reviewed-by: Matt Turner --- .../drivers/dri/i965/brw_fs_copy_propagation.cpp | 34 +++++++++++++++++--- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index e8d092c..6a8e7bf 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -275,6 +275,16 @@ is_logic_op(enum opcode opcode) opcode == BRW_OPCODE_NOT); } +static bool +can_change_source_types(fs_inst *inst) +{ + return !inst->src[0].abs && !inst->src[0].negate && + (inst->opcode == BRW_OPCODE_MOV || + (inst->opcode == BRW_OPCODE_SEL && + inst->predicate != BRW_PREDICATE_NONE && + !inst->src[1].abs && !inst->src[1].negate)); +} + bool fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) { @@ -346,7 +356,9 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) type_sz(inst->src[arg].type)) % type_sz(entry->src.type) != 0) return false; - if (has_source_modifiers && entry->dst.type != inst->src[arg].type) + if (has_source_modifiers && + entry->dst.type != inst->src[arg].type && + !can_change_source_types(inst)) return false; if (brw->gen >= 8 && (entry->src.negate || entry->src.abs) && @@ -412,9 +424,23 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) break; } - if (!inst->src[arg].abs) { - inst->src[arg].abs = entry->src.abs; - inst->src[arg].negate ^= entry->src.negate; + if (has_source_modifiers) { + if (entry->dst.type != inst->src[arg].type) { + /* We are propagating source modifiers from a MOV with a different + * type. If we got here, then we can just change the source and + * destination types of the instruction and keep going. + */ + assert(can_change_source_types(inst)); + for (int i = 0; i < inst->sources; i++) { + inst->src[i].type = entry->dst.type; + } + inst->dst.type = entry->dst.type; + } + + if (!inst->src[arg].abs) { + inst->src[arg].abs = entry->src.abs; + inst->src[arg].negate ^= entry->src.negate; + } } return true; From aphogat at kemper.freedesktop.org Fri Apr 17 18:50:46 2015 From: aphogat at kemper.freedesktop.org (Anuj Phogat) Date: Fri, 17 Apr 2015 11:50:46 -0700 (PDT) Subject: Mesa (master): i965: Update the comment about platforms supporting blorp Message-ID: <20150417185046.192C67626E@kemper.freedesktop.org> Module: Mesa Branch: master Commit: c6b0922c31983c76f1b2b9df66ae50b25b966bee URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c6b0922c31983c76f1b2b9df66ae50b25b966bee Author: Anuj Phogat Date: Mon Apr 13 10:20:29 2015 -0700 i965: Update the comment about platforms supporting blorp Signed-off-by: Anuj Phogat Reviewed-by: Chad Versace --- src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp index d25e201..1561b59 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp @@ -223,8 +223,8 @@ brw_blorp_copytexsubimage(struct brw_context *brw, struct intel_mipmap_tree *src_mt = src_irb->mt; struct intel_mipmap_tree *dst_mt = intel_image->mt; - /* BLORP is not supported before Gen6. */ - if (brw->gen < 6 || brw->gen >= 8) + /* BLORP is only supported for Gen6-7. */ + if (brw->gen < 6 || brw->gen > 7) return false; if (_mesa_get_format_base_format(src_rb->Format) != From aphogat at kemper.freedesktop.org Fri Apr 17 18:50:46 2015 From: aphogat at kemper.freedesktop.org (Anuj Phogat) Date: Fri, 17 Apr 2015 11:50:46 -0700 (PDT) Subject: Mesa (master): i965: Render R16G16B16X16 as R16G16B16A16 Message-ID: <20150417185046.215237626E@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 79010c9a531f45d2b6740ac26f4b04c169f5dc7a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=79010c9a531f45d2b6740ac26f4b04c169f5dc7a Author: Anuj Phogat Date: Fri Apr 10 04:35:24 2015 -0700 i965: Render R16G16B16X16 as R16G16B16A16 This enables using _mesa_meta_pbo_TexSubImage() to upload data to R16G16B16X16 texture. Earlier it fell back to slower paths. Jenkins run shows no piglit regressions. Signed-off-by: Anuj Phogat Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_surface_formats.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_surface_formats.c b/src/mesa/drivers/dri/i965/brw_surface_formats.c index c7fb707..c5fde35 100644 --- a/src/mesa/drivers/dri/i965/brw_surface_formats.c +++ b/src/mesa/drivers/dri/i965/brw_surface_formats.c @@ -590,6 +590,12 @@ brw_init_surface_formats(struct brw_context *brw) case BRW_SURFACEFORMAT_L16_UNORM: render = BRW_SURFACEFORMAT_R16_UNORM; break; + case BRW_SURFACEFORMAT_R16G16B16X16_UNORM: + render = BRW_SURFACEFORMAT_R16G16B16A16_UNORM; + break; + case BRW_SURFACEFORMAT_R16G16B16X16_FLOAT: + render = BRW_SURFACEFORMAT_R16G16B16A16_FLOAT; + break; case BRW_SURFACEFORMAT_B8G8R8X8_UNORM: /* XRGB is handled as ARGB because the chips in this family * cannot render to XRGB targets. This means that we have to From mattst88 at kemper.freedesktop.org Fri Apr 17 19:05:34 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Fri, 17 Apr 2015 12:05:34 -0700 (PDT) Subject: Mesa (master): configure.ac: fix bashism Message-ID: <20150417190534.2638C7626E@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 52e4e4712f0da7e7e1d1164d9487f2d38f80c441 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=52e4e4712f0da7e7e1d1164d9487f2d38f80c441 Author: Tobias Nygren Date: Fri Apr 17 20:18:48 2015 +0200 configure.ac: fix bashism Reviewed-by: Matt Turner Signed-off-by: Tobias Nygren --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 6ccf3b4..325b936 100644 --- a/configure.ac +++ b/configure.ac @@ -1641,7 +1641,7 @@ if test "x$enable_nine" = xyes; then if ! echo "$with_gallium_drivers" | grep -q 'swrast'; then AC_MSG_ERROR([nine requires the gallium swrast driver]) fi - if test "x$with_gallium_drivers" == xswrast; then + if test "x$with_gallium_drivers" = xswrast; then AC_MSG_ERROR([nine requires at least one non-swrast gallium driver]) fi if test "x$enable_dri3" = xno; then From mattst88 at kemper.freedesktop.org Fri Apr 17 19:05:34 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Fri, 17 Apr 2015 12:05:34 -0700 (PDT) Subject: Mesa (master): adjust a couple of ifdefs to handle NetBSD correctly Message-ID: <20150417190534.2D8D47626F@kemper.freedesktop.org> Module: Mesa Branch: master Commit: cfab4ea9c654819c96272a62d7b1664e9038fe91 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=cfab4ea9c654819c96272a62d7b1664e9038fe91 Author: Tobias Nygren Date: Fri Apr 17 20:27:55 2015 +0200 adjust a couple of ifdefs to handle NetBSD correctly Acked-by: Matt Turner Signed-off-by: Tobias Nygren --- src/gallium/include/pipe/p_config.h | 2 +- src/mesa/drivers/dri/common/xmlconfig.c | 2 +- src/mesa/x86/common_x86.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gallium/include/pipe/p_config.h b/src/gallium/include/pipe/p_config.h index 5b6db7d..794aabe 100644 --- a/src/gallium/include/pipe/p_config.h +++ b/src/gallium/include/pipe/p_config.h @@ -157,7 +157,7 @@ # define PIPE_ARCH_BIG_ENDIAN #endif -#elif defined(__OpenBSD__) +#elif defined(__OpenBSD__) || defined(__NetBSD__) #include #include diff --git a/src/mesa/drivers/dri/common/xmlconfig.c b/src/mesa/drivers/dri/common/xmlconfig.c index 2b284cc..f17693e 100644 --- a/src/mesa/drivers/dri/common/xmlconfig.c +++ b/src/mesa/drivers/dri/common/xmlconfig.c @@ -56,7 +56,7 @@ extern char *program_invocation_name, *program_invocation_short_name; # include # define GET_PROGRAM_NAME() getprogname() # endif -#elif defined(__NetBSD__) && defined(__NetBSD_Version) && (__NetBSD_Version >= 106000100) +#elif defined(__NetBSD__) && defined(__NetBSD_Version__) && (__NetBSD_Version__ >= 106000100) # include # define GET_PROGRAM_NAME() getprogname() #elif defined(__APPLE__) diff --git a/src/mesa/x86/common_x86.c b/src/mesa/x86/common_x86.c index 86fbca9..1c86405 100644 --- a/src/mesa/x86/common_x86.c +++ b/src/mesa/x86/common_x86.c @@ -42,7 +42,7 @@ #include #include #endif -#if defined(USE_SSE_ASM) && defined(__OpenBSD__) +#if defined(USE_SSE_ASM) && (defined(__OpenBSD__) || defined(__NetBSD__)) #include #include #include From kwg at kemper.freedesktop.org Fri Apr 17 19:15:45 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Fri, 17 Apr 2015 12:15:45 -0700 (PDT) Subject: Mesa (master): i965: Make shader_time store names/ ids instead of referencing shaders. Message-ID: <20150417191545.828BA76270@kemper.freedesktop.org> Module: Mesa Branch: master Commit: cd9058fae3eddaa0e88cd3557684700852f86a5a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=cd9058fae3eddaa0e88cd3557684700852f86a5a Author: Kenneth Graunke Date: Wed Apr 15 02:16:47 2015 -0700 i965: Make shader_time store names/ids instead of referencing shaders. Jason noticed that shader_time was bumping the reference count on the gl_shader_program and gl_program structures, in code called during compilation. Not only were these never unreferenced, but it meant fragment shaders might be referenced twice (SIMD8 and SIMD16)...or only once. We don't actually need the programs. We just need their numeric ID and their language (GLSL/ARB/FF) or KHR_debug label. If there's a label, we have to strdup it since the underlying program could be deleted. To be fair, we're not exactly cleaning that up either, but we at least ralloc it out of the shader_time arrays, so if we ever bother cleaning those up, they'll go away properly. Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand Acked-by: Anuj Phogat --- src/mesa/drivers/dri/i965/brw_context.h | 4 +-- src/mesa/drivers/dri/i965/brw_program.c | 52 ++++++++++--------------------- 2 files changed, 19 insertions(+), 37 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 0bd0ed1..a6d6787 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1454,8 +1454,8 @@ struct brw_context struct { drm_intel_bo *bo; - struct gl_shader_program **shader_programs; - struct gl_program **programs; + const char **names; + int *ids; enum shader_time_shader_type *types; uint64_t *cumulative; int num_entries; diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 7ea08e6..81a0c19 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -294,10 +294,8 @@ brw_init_shader_time(struct brw_context *brw) brw->shader_time.bo = drm_intel_bo_alloc(brw->bufmgr, "shader time", max_entries * SHADER_TIME_STRIDE, 4096); - brw->shader_time.shader_programs = rzalloc_array(brw, struct gl_shader_program *, - max_entries); - brw->shader_time.programs = rzalloc_array(brw, struct gl_program *, - max_entries); + brw->shader_time.names = rzalloc_array(brw, const char *, max_entries); + brw->shader_time.ids = rzalloc_array(brw, int, max_entries); brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type, max_entries); brw->shader_time.cumulative = rzalloc_array(brw, uint64_t, @@ -434,36 +432,15 @@ brw_report_shader_time(struct brw_context *brw) fprintf(stderr, "\n"); fprintf(stderr, "type ID cycles spent %% of total\n"); for (int s = 0; s < brw->shader_time.num_entries; s++) { - const char *shader_name; const char *stage; /* Work back from the sorted pointers times to a time to print. */ int i = sorted[s] - scaled; - struct gl_shader_program *prog = brw->shader_time.shader_programs[i]; if (scaled[i] == 0) continue; - int shader_num = 0; - if (prog) { - shader_num = prog->Name; - - if (prog->Label) { - shader_name = prog->Label; - } else if (shader_num == 0) { - shader_name = "ff"; - } else { - shader_name = "glsl"; - } - } else if (brw->shader_time.programs[i]) { - shader_num = brw->shader_time.programs[i]->Id; - if (shader_num == 0) { - shader_name = "ff"; - } else { - shader_name = "prog"; - } - } else { - shader_name = "other"; - } + int shader_num = brw->shader_time.ids[i]; + const char *shader_name = brw->shader_time.names[i]; switch (brw->shader_time.types[i]) { case ST_VS: @@ -543,19 +520,24 @@ brw_get_shader_time_index(struct brw_context *brw, struct gl_program *prog, enum shader_time_shader_type type) { - struct gl_context *ctx = &brw->ctx; - int shader_time_index = brw->shader_time.num_entries++; assert(shader_time_index < brw->shader_time.max_entries); brw->shader_time.types[shader_time_index] = type; - _mesa_reference_shader_program(ctx, - &brw->shader_time.shader_programs[shader_time_index], - shader_prog); + int id = shader_prog ? shader_prog->Name : prog->Id; + const char *name; + if (id == 0) { + name = "ff"; + } else if (!shader_prog) { + name = "prog"; + } else if (shader_prog->Label) { + name = ralloc_strdup(brw->shader_time.names, shader_prog->Label); + } else { + name = "glsl"; + } - _mesa_reference_program(ctx, - &brw->shader_time.programs[shader_time_index], - prog); + brw->shader_time.names[shader_time_index] = name; + brw->shader_time.ids[shader_time_index] = id; return shader_time_index; } From kwg at kemper.freedesktop.org Fri Apr 17 19:15:45 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Fri, 17 Apr 2015 12:15:45 -0700 (PDT) Subject: Mesa (master): i965: Delete some unnecessary code in brw_report_shader_time(). Message-ID: <20150417191545.77C297626F@kemper.freedesktop.org> Module: Mesa Branch: master Commit: eb6e770889536e44da0947b64955e9a923ba98f6 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=eb6e770889536e44da0947b64955e9a923ba98f6 Author: Kenneth Graunke Date: Wed Apr 15 02:04:17 2015 -0700 i965: Delete some unnecessary code in brw_report_shader_time(). It is true that a gl_shader_program with ID 0 will be a fixed-function fragment program; a gl_program with ID 0 but NULL gl_shader_program means that it's a fixed-function vertex shader. But that's not terribly interesting or relevant to what we're doing. We just need to know that ID 0 means "fixed function". Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand Reviewed-by: Anuj Phogat --- src/mesa/drivers/dri/i965/brw_program.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 0ca63de..7ea08e6 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -447,14 +447,9 @@ brw_report_shader_time(struct brw_context *brw) if (prog) { shader_num = prog->Name; - /* The fixed function fragment shader generates GLSL IR with a Name - * of 0, and nothing else does. - */ if (prog->Label) { shader_name = prog->Label; - } else if (shader_num == 0 && - (brw->shader_time.types[i] == ST_FS8 || - brw->shader_time.types[i] == ST_FS16)) { + } else if (shader_num == 0) { shader_name = "ff"; } else { shader_name = "glsl"; From kwg at kemper.freedesktop.org Fri Apr 17 19:15:45 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Fri, 17 Apr 2015 12:15:45 -0700 (PDT) Subject: Mesa (master): i965: Issue perf_debug messages for unsynchronized maps on !LLC systems. Message-ID: <20150417191545.8E75F76272@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 1d6829813ef2b002fca488e6a8051e3090bf87bd URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1d6829813ef2b002fca488e6a8051e3090bf87bd Author: Kenneth Graunke Date: Tue Feb 24 21:34:30 2015 -0800 i965: Issue perf_debug messages for unsynchronized maps on !LLC systems. We haven't implemented proper unsynchronized map support on !LLC systems (pre-SNB, Atom). MapBufferRange with GL_MAP_UNSYNCHRONIZE_BIT will actually do a synchronized map, probably killing performance. Also warn on BufferSubData, when we should be doing an unsynchronized upload, but instead have to do a synchronous map. v2: Only complain if the buffer is actually busy - we use unsynchronized maps internally for vertex upload and such, but expect those to not be busy. Signed-off-by: Kenneth Graunke Reviewed-by: Ben Widawsky Tested-by: Ben Widawsky --- src/mesa/drivers/dri/i965/intel_buffer_objects.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c b/src/mesa/drivers/dri/i965/intel_buffer_objects.c index 3b0a206..627c487 100644 --- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c @@ -254,9 +254,9 @@ brw_buffer_subdata(struct gl_context *ctx, * (otherwise, an app that might occasionally stall but mostly not will end * up with blitting all the time, at the cost of bandwidth) */ - if (brw->has_llc) { - if (offset + size <= intel_obj->gpu_active_start || - intel_obj->gpu_active_end <= offset) { + if (offset + size <= intel_obj->gpu_active_start || + intel_obj->gpu_active_end <= offset) { + if (brw->has_llc) { drm_intel_gem_bo_map_unsynchronized(intel_obj->buffer); memcpy(intel_obj->buffer->virtual + offset, data, size); drm_intel_bo_unmap(intel_obj->buffer); @@ -264,6 +264,8 @@ brw_buffer_subdata(struct gl_context *ctx, if (intel_obj->gpu_active_end > intel_obj->gpu_active_start) intel_obj->prefer_stall_to_blit = true; return; + } else { + perf_debug("BufferSubData could be unsynchronized, but !LLC doesn't support it yet\n"); } } @@ -437,9 +439,13 @@ brw_map_buffer_range(struct gl_context *ctx, return obj->Mappings[index].Pointer; } - if (access & GL_MAP_UNSYNCHRONIZED_BIT) + if (access & GL_MAP_UNSYNCHRONIZED_BIT) { + if (!brw->has_llc && brw->perf_debug && + drm_intel_bo_busy(intel_obj->buffer)) { + perf_debug("MapBufferRange with GL_MAP_UNSYNCHRONIZED_BIT stalling (it's actually synchronized on non-LLC platforms)\n"); + } drm_intel_gem_bo_map_unsynchronized(intel_obj->buffer); - else if (!brw->has_llc && (!(access & GL_MAP_READ_BIT) || + } else if (!brw->has_llc && (!(access & GL_MAP_READ_BIT) || (access & GL_MAP_PERSISTENT_BIT))) { drm_intel_gem_bo_map_gtt(intel_obj->buffer); mark_buffer_inactive(intel_obj); From kwg at kemper.freedesktop.org Fri Apr 17 19:15:45 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Fri, 17 Apr 2015 12:15:45 -0700 (PDT) Subject: Mesa (master): i965: Make shader_time use 0 instead of -1 for " no meaningful ID". Message-ID: <20150417191545.6F8777626E@kemper.freedesktop.org> Module: Mesa Branch: master Commit: e9efd667defe13c89881f138cb5719c72573ff73 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e9efd667defe13c89881f138cb5719c72573ff73 Author: Kenneth Graunke Date: Wed Apr 15 01:57:52 2015 -0700 i965: Make shader_time use 0 instead of -1 for "no meaningful ID". 0 is not a valid GLSL shader or ARB program ID. For some reason, shader_time used -1 instead...so we had code to detect 0, then override it to -1. We can just delete that. Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand Reviewed-by: Anuj Phogat --- src/mesa/drivers/dri/i965/brw_program.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 9e27c2a..0ca63de 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -346,7 +346,7 @@ print_shader_time_line(const char *stage, const char *name, { fprintf(stderr, "%-6s%-18s", stage, name); - if (shader_num != -1) + if (shader_num != 0) fprintf(stderr, "%4d: ", shader_num); else fprintf(stderr, " : "); @@ -443,7 +443,7 @@ brw_report_shader_time(struct brw_context *brw) if (scaled[i] == 0) continue; - int shader_num = -1; + int shader_num = 0; if (prog) { shader_num = prog->Name; @@ -456,7 +456,6 @@ brw_report_shader_time(struct brw_context *brw) (brw->shader_time.types[i] == ST_FS8 || brw->shader_time.types[i] == ST_FS16)) { shader_name = "ff"; - shader_num = -1; } else { shader_name = "glsl"; } @@ -464,7 +463,6 @@ brw_report_shader_time(struct brw_context *brw) shader_num = brw->shader_time.programs[i]->Id; if (shader_num == 0) { shader_name = "ff"; - shader_num = -1; } else { shader_name = "prog"; } @@ -495,10 +493,10 @@ brw_report_shader_time(struct brw_context *brw) } fprintf(stderr, "\n"); - print_shader_time_line("total", "vs", -1, total_by_type[ST_VS], total); - print_shader_time_line("total", "gs", -1, total_by_type[ST_GS], total); - print_shader_time_line("total", "fs8", -1, total_by_type[ST_FS8], total); - print_shader_time_line("total", "fs16", -1, total_by_type[ST_FS16], total); + print_shader_time_line("total", "vs", 0, total_by_type[ST_VS], total); + print_shader_time_line("total", "gs", 0, total_by_type[ST_GS], total); + print_shader_time_line("total", "fs8", 0, total_by_type[ST_FS8], total); + print_shader_time_line("total", "fs16", 0, total_by_type[ST_FS16], total); } static void From cwabbott0 at kemper.freedesktop.org Fri Apr 17 19:24:37 2015 From: cwabbott0 at kemper.freedesktop.org (Connor Abbott) Date: Fri, 17 Apr 2015 12:24:37 -0700 (PDT) Subject: Mesa (master): mesa/main: add autogenerated format-info.c to gitignore Message-ID: <20150417192437.363A37626E@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 65f13352b901ba08c69d3e0a884f6029297f7fda URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=65f13352b901ba08c69d3e0a884f6029297f7fda Author: Connor Abbott Date: Fri Apr 17 12:50:30 2015 -0400 mesa/main: add autogenerated format-info.c to gitignore v2: move to right after format-info.h Signed-off-by: Connor Abbott Reviewed-by: Ilia Mirkin --- src/mesa/main/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/main/.gitignore b/src/mesa/main/.gitignore index 8256ad7..355b426 100644 --- a/src/mesa/main/.gitignore +++ b/src/mesa/main/.gitignore @@ -7,5 +7,6 @@ remap_helper.h get_hash.h get_hash.h.tmp format_info.h +format_info.c format_pack.c format_unpack.c From cwabbott0 at kemper.freedesktop.org Fri Apr 17 19:24:37 2015 From: cwabbott0 at kemper.freedesktop.org (Connor Abbott) Date: Fri, 17 Apr 2015 12:24:37 -0700 (PDT) Subject: Mesa (master): mesa: add .mesa-install-links files to gitignore Message-ID: <20150417192437.3E2F87626F@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 1eac3ae1a6ebecf353054d937dd603a11ea33fb3 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1eac3ae1a6ebecf353054d937dd603a11ea33fb3 Author: Connor Abbott Date: Fri Apr 17 12:53:53 2015 -0400 mesa: add .mesa-install-links files to gitignore Signed-off-by: Connor Abbott Reviewed-by: Ilia Mirkin --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 00e3ce3..21aa35c 100644 --- a/.gitignore +++ b/.gitignore @@ -45,3 +45,4 @@ manifest.txt .libs/ Makefile Makefile.in +.install-mesa-links From imirkin at kemper.freedesktop.org Sat Apr 18 22:56:21 2015 From: imirkin at kemper.freedesktop.org (Ilia Mirkin) Date: Sat, 18 Apr 2015 15:56:21 -0700 (PDT) Subject: Mesa (master): freedreno/a3xx: fix integer and 32-bit float border colors Message-ID: <20150418225621.6FAC8761E7@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 63576016284e15e881fcd8bd6d3939b5d8b53572 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=63576016284e15e881fcd8bd6d3939b5d8b53572 Author: Ilia Mirkin Date: Mon Apr 6 01:15:09 2015 -0400 freedreno/a3xx: fix integer and 32-bit float border colors Signed-off-by: Ilia Mirkin --- src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 31 ++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index f961fc0..ee473e6 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -217,6 +217,7 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, uint16_t *bcolor = (uint16_t *)((uint8_t *)ptr + (BORDERCOLOR_SIZE * tex_off[sb]) + (BORDERCOLOR_SIZE * i)); + uint32_t *bcolor32 = (uint32_t *)&bcolor[16]; /* * XXX HACK ALERT XXX @@ -231,7 +232,35 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, const struct util_format_description *desc = util_format_description(tex->textures[i]->format); for (j = 0; j < 4; j++) { - if (desc->swizzle[j] < 4) + if (desc->swizzle[j] >= 4) + continue; + + const struct util_format_channel_description *chan = + &desc->channel[desc->swizzle[j]]; + int size = chan->size; + + /* The Z16 texture format we use seems to look in the + * 32-bit border color slots + */ + if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) + size = 32; + + /* Formats like R11G11B10 or RGB9_E5 don't specify + * per-channel sizes properly. + */ + if (desc->layout == UTIL_FORMAT_LAYOUT_OTHER) + size = 16; + + if (chan->pure_integer && size > 16) + bcolor32[desc->swizzle[j] + 4] = + sampler->base.border_color.i[j]; + else if (size > 16) + bcolor32[desc->swizzle[j]] = + fui(sampler->base.border_color.f[j]); + else if (chan->pure_integer) + bcolor[desc->swizzle[j] + 8] = + sampler->base.border_color.i[j]; + else bcolor[desc->swizzle[j]] = util_float_to_half(sampler->base.border_color.f[j]); } From imirkin at kemper.freedesktop.org Sat Apr 18 22:56:21 2015 From: imirkin at kemper.freedesktop.org (Ilia Mirkin) Date: Sat, 18 Apr 2015 15:56:21 -0700 (PDT) Subject: Mesa (master): indices: fix provoking vertex for quads/quadstrips Message-ID: <20150418225621.905C0761E7@kemper.freedesktop.org> Module: Mesa Branch: master Commit: b2e871bd484db229978cfe4b7efa12dfd79067a8 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b2e871bd484db229978cfe4b7efa12dfd79067a8 Author: Ilia Mirkin Date: Tue Apr 7 01:42:05 2015 -0400 indices: fix provoking vertex for quads/quadstrips This allows drivers to provide consistent flat shading for quads. Otherwise a driver that only supported tris would have to force last provoking vertex when drawing quads (and would have to say that quads don't follow the provoking vertex convention). Signed-off-by: Ilia Mirkin Reviewed-by: Rob Clark --- src/gallium/auxiliary/indices/u_indices_gen.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/gallium/auxiliary/indices/u_indices_gen.py b/src/gallium/auxiliary/indices/u_indices_gen.py index 687a717..97c8e0d 100644 --- a/src/gallium/auxiliary/indices/u_indices_gen.py +++ b/src/gallium/auxiliary/indices/u_indices_gen.py @@ -142,8 +142,12 @@ def do_tri( intype, outtype, ptr, v0, v1, v2, inpv, outpv ): tri( intype, outtype, ptr, v2, v0, v1 ) def do_quad( intype, outtype, ptr, v0, v1, v2, v3, inpv, outpv ): - do_tri( intype, outtype, ptr+'+0', v0, v1, v3, inpv, outpv ); - do_tri( intype, outtype, ptr+'+3', v1, v2, v3, inpv, outpv ); + if inpv == LAST: + do_tri( intype, outtype, ptr+'+0', v0, v1, v3, inpv, outpv ); + do_tri( intype, outtype, ptr+'+3', v1, v2, v3, inpv, outpv ); + else: + do_tri( intype, outtype, ptr+'+0', v0, v1, v2, inpv, outpv ); + do_tri( intype, outtype, ptr+'+3', v0, v2, v3, inpv, outpv ); def name(intype, outtype, inpv, outpv, pr, prim): if intype == GENERATE: @@ -331,7 +335,10 @@ def quadstrip(intype, outtype, inpv, outpv, pr): print ' i += 4;' print ' goto restart;' print ' }' - do_quad( intype, outtype, 'out+j', 'i+2', 'i+0', 'i+1', 'i+3', inpv, outpv ); + if inpv == LAST: + do_quad( intype, outtype, 'out+j', 'i+2', 'i+0', 'i+1', 'i+3', inpv, outpv ); + else: + do_quad( intype, outtype, 'out+j', 'i+0', 'i+1', 'i+3', 'i+2', inpv, outpv ); print ' }' postamble() From imirkin at kemper.freedesktop.org Sat Apr 18 22:56:21 2015 From: imirkin at kemper.freedesktop.org (Ilia Mirkin) Date: Sat, 18 Apr 2015 15:56:21 -0700 (PDT) Subject: Mesa (master): freedreno/a3xx: add support for float R/RG render targets Message-ID: <20150418225621.64365761E7@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 6895c3554eadaf187642a513bb95fef1a7e07f57 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6895c3554eadaf187642a513bb95fef1a7e07f57 Author: Ilia Mirkin Date: Sun Apr 5 21:38:57 2015 -0400 freedreno/a3xx: add support for float R/RG render targets Signed-off-by: Ilia Mirkin --- src/gallium/drivers/freedreno/a3xx/fd3_format.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_format.c b/src/gallium/drivers/freedreno/a3xx/fd3_format.c index 03f8d04..939693d 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_format.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_format.c @@ -98,7 +98,7 @@ static struct fd3_format formats[PIPE_FORMAT_COUNT] = { VT(R16_SINT, 16_SINT, R16_SINT, WZYX), V_(R16_USCALED, 16_UINT, NONE, WZYX), V_(R16_SSCALED, 16_UINT, NONE, WZYX), - VT(R16_FLOAT, 16_FLOAT, NONE, WZYX), + VT(R16_FLOAT, 16_FLOAT, R16_FLOAT,WZYX), _T(A16_UINT, 16_UINT, NONE, WZYX), _T(A16_SINT, 16_SINT, NONE, WZYX), @@ -136,7 +136,7 @@ static struct fd3_format formats[PIPE_FORMAT_COUNT] = { VT(R32_SINT, 32_SINT, R32_SINT, WZYX), V_(R32_USCALED, 32_UINT, NONE, WZYX), V_(R32_SSCALED, 32_UINT, NONE, WZYX), - VT(R32_FLOAT, 32_FLOAT, NONE, WZYX), + VT(R32_FLOAT, 32_FLOAT, R32_FLOAT,WZYX), V_(R32_FIXED, 32_FIXED, NONE, WZYX), _T(A32_UINT, 32_UINT, NONE, WZYX), @@ -152,7 +152,7 @@ static struct fd3_format formats[PIPE_FORMAT_COUNT] = { VT(R16G16_SINT, 16_16_SINT, R16G16_SINT, WZYX), V_(R16G16_USCALED, 16_16_UINT, NONE, WZYX), V_(R16G16_SSCALED, 16_16_SINT, NONE, WZYX), - VT(R16G16_FLOAT, 16_16_FLOAT, NONE, WZYX), + VT(R16G16_FLOAT, 16_16_FLOAT, R16G16_FLOAT,WZYX), _T(L16A16_UINT, 16_16_UINT, NONE, WZYX), _T(L16A16_SINT, 16_16_SINT, NONE, WZYX), @@ -222,7 +222,7 @@ static struct fd3_format formats[PIPE_FORMAT_COUNT] = { VT(R32G32_SINT, 32_32_SINT, R32G32_SINT, WZYX), V_(R32G32_USCALED, 32_32_UINT, NONE, WZYX), V_(R32G32_SSCALED, 32_32_SINT, NONE, WZYX), - VT(R32G32_FLOAT, 32_32_FLOAT, NONE, WZYX), + VT(R32G32_FLOAT, 32_32_FLOAT, R32G32_FLOAT,WZYX), V_(R32G32_FIXED, 32_32_FIXED, NONE, WZYX), _T(L32A32_UINT, 32_32_UINT, NONE, WZYX), @@ -335,6 +335,8 @@ fd3_fs_output_format(enum pipe_format format) if (util_format_is_srgb(format)) return RB_R16G16B16A16_FLOAT; switch (format) { + case PIPE_FORMAT_R16_FLOAT: + case PIPE_FORMAT_R16G16_FLOAT: case PIPE_FORMAT_R11G11B10_FLOAT: return RB_R16G16B16A16_FLOAT; default: From imirkin at kemper.freedesktop.org Sat Apr 18 22:56:21 2015 From: imirkin at kemper.freedesktop.org (Ilia Mirkin) Date: Sat, 18 Apr 2015 15:56:21 -0700 (PDT) Subject: Mesa (master): primconvert: select pv convention only from flatshade_first Message-ID: <20150418225621.85A7F761E7@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 1cdb01d716cb8112c67f8538f7d71d0765153eb2 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1cdb01d716cb8112c67f8538f7d71d0765153eb2 Author: Ilia Mirkin Date: Tue Apr 7 11:43:52 2015 -0400 primconvert: select pv convention only from flatshade_first This should match to how drivers program hardware. flatshade relates to whether color inputs are interpolated, not the provoking vertex convention. Signed-off-by: Ilia Mirkin Reviewed-by: Rob Clark --- src/gallium/auxiliary/indices/u_primconvert.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/gallium/auxiliary/indices/u_primconvert.c b/src/gallium/auxiliary/indices/u_primconvert.c index 00e65aa..70d3e85 100644 --- a/src/gallium/auxiliary/indices/u_primconvert.c +++ b/src/gallium/auxiliary/indices/u_primconvert.c @@ -104,8 +104,7 @@ util_primconvert_save_rasterizer_state(struct primconvert_context *pc, * we would actually need to save/restore rasterizer state. As * it is, we just need to make note of the pv. */ - pc->api_pv = (rast->flatshade - && !rast->flatshade_first) ? PV_LAST : PV_FIRST; + pc->api_pv = rast->flatshade_first ? PV_FIRST : PV_LAST; } void From imirkin at kemper.freedesktop.org Sat Apr 18 22:56:21 2015 From: imirkin at kemper.freedesktop.org (Ilia Mirkin) Date: Sat, 18 Apr 2015 15:56:21 -0700 (PDT) Subject: Mesa (master): freedreno/a3xx: enable polymode setting with non-fill modes Message-ID: <20150418225621.7AE9F761E7@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 0904774af1ff9202863e7c935ff9bc076d533fb3 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=0904774af1ff9202863e7c935ff9bc076d533fb3 Author: Ilia Mirkin Date: Tue Apr 7 00:57:34 2015 -0400 freedreno/a3xx: enable polymode setting with non-fill modes Signed-off-by: Ilia Mirkin --- src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c b/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c index 345f688..94f6d6e 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c @@ -82,6 +82,10 @@ fd3_rasterizer_state_create(struct pipe_context *pctx, A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(fd_polygon_mode(cso->fill_front)) | A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(fd_polygon_mode(cso->fill_back)); + if (cso->fill_front != PIPE_POLYGON_MODE_FILL || + cso->fill_back != PIPE_POLYGON_MODE_FILL) + so->pc_prim_vtx_cntl |= A3XX_PC_PRIM_VTX_CNTL_POLYMODE_ENABLE; + if (cso->cull_face & PIPE_FACE_FRONT) so->gras_su_mode_control |= A3XX_GRAS_SU_MODE_CONTROL_CULL_FRONT; if (cso->cull_face & PIPE_FACE_BACK) From idr at kemper.freedesktop.org Sun Apr 19 23:30:47 2015 From: idr at kemper.freedesktop.org (Ian Romanick) Date: Sun, 19 Apr 2015 16:30:47 -0700 (PDT) Subject: Mesa (master): doc: Add GL_ARB_shader_image_size dependency for OpenGL ES 3.1 Message-ID: <20150419233047.F0573761E8@kemper.freedesktop.org> Module: Mesa Branch: master Commit: c015008ee07745d29dde3502c1e2b34a1127d33e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c015008ee07745d29dde3502c1e2b34a1127d33e Author: Ian Romanick Date: Thu Apr 16 12:17:19 2015 -0700 doc: Add GL_ARB_shader_image_size dependency for OpenGL ES 3.1 imageSize() is in the GLSL ES 3.1 spec. Trivial. Signed-off-by: Ian Romanick --- docs/GL3.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/GL3.txt b/docs/GL3.txt index 433a012..6cc08b9 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -220,6 +220,7 @@ GLES3.1, GLSL ES 3.1 GL_ARB_program_interface_query DONE (all drivers) GL_ARB_shader_atomic_counters DONE (i965) GL_ARB_shader_image_load_store in progress (curro) + GL_ARB_shader_image_size not started GL_ARB_shader_storage_buffer_object not started GL_ARB_shading_language_packing DONE (all drivers) GL_ARB_separate_shader_objects DONE (all drivers) From daenzer at kemper.freedesktop.org Mon Apr 20 06:18:44 2015 From: daenzer at kemper.freedesktop.org (Michel Dänzer) Date: Sun, 19 Apr 2015 23:18:44 -0700 (PDT) Subject: Mesa (master): gallivm: Fix build against LLVM 3.7 SVN r235265 Message-ID: <20150420061844.22FA6761E8@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 645f77fe50c1f04eb77d03b3feaaa5fd36d1d1a4 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=645f77fe50c1f04eb77d03b3feaaa5fd36d1d1a4 Author: Nick Sarnie Date: Sun Apr 19 23:51:26 2015 -0400 gallivm: Fix build against LLVM 3.7 SVN r235265 LLVM removed JITEmitDebugInfo from TargetOptions since they weren't used v2: Be consistent with the LLVM version check (Aaron Watry) Signed-off-by: Nick Sarnie Reviewed-and-Tested-by: Michel D?nzer --- src/gallium/auxiliary/gallivm/lp_bld_debug.cpp | 2 +- src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp index 65d2896..be3e834 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp @@ -270,7 +270,7 @@ disassemble(const void* func, llvm::raw_ostream & Out) } TargetOptions options; -#if defined(DEBUG) +#if defined(DEBUG) && HAVE_LLVM < 0x0307 options.JITEmitDebugInfo = true; #endif #if defined(PIPE_ARCH_X86) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp index 4ede90b..5e8a634 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp @@ -429,7 +429,7 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT, #endif #endif -#if defined(DEBUG) +#if defined(DEBUG) && HAVE_LLVM < 0x0307 options.JITEmitDebugInfo = true; #endif From airlied at kemper.freedesktop.org Mon Apr 20 08:46:34 2015 From: airlied at kemper.freedesktop.org (Dave Airlie) Date: Mon, 20 Apr 2015 01:46:34 -0700 (PDT) Subject: Mesa (master): docs/GL3.txt: update ARB_shader_subroutine status Message-ID: <20150420084634.9CB4F761E8@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 3282e57bcfc44cce751afac94a421fc035d831f3 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3282e57bcfc44cce751afac94a421fc035d831f3 Author: Dave Airlie Date: Mon Apr 20 18:41:52 2015 +1000 docs/GL3.txt: update ARB_shader_subroutine status Admit to having started working on this, I don't admit to ever finishing it Signed-off-by: Dave Airlie --- docs/GL3.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/GL3.txt b/docs/GL3.txt index 6cc08b9..2dbd987 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -111,7 +111,7 @@ GL 4.0, GLSL 4.00: - New overload resolution rules DONE GL_ARB_gpu_shader_fp64 DONE (nvc0, softpipe) GL_ARB_sample_shading DONE (i965, nv50, nvc0, r600, radeonsi) - GL_ARB_shader_subroutine not started + GL_ARB_shader_subroutine started (Dave) GL_ARB_tessellation_shader started (Chris, Ilia) GL_ARB_texture_buffer_object_rgb32 DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe) GL_ARB_texture_cube_map_array DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe) From tstellar at kemper.freedesktop.org Mon Apr 20 19:18:41 2015 From: tstellar at kemper.freedesktop.org (Tom Stellard) Date: Mon, 20 Apr 2015 12:18:41 -0700 (PDT) Subject: Mesa (master): clover: remove pre llvm 3.5.0 compatibility code Message-ID: <20150420191841.670157626F@kemper.freedesktop.org> Module: Mesa Branch: master Commit: c1485f4b7d044724b3dbc1011f3c3a8a53132010 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c1485f4b7d044724b3dbc1011f3c3a8a53132010 Author: EdB Date: Sun Apr 19 10:50:19 2015 +0200 clover: remove pre llvm 3.5.0 compatibility code Acked-by: Francisco Jerez Reviewed-by: Tom Stellard --- .../state_trackers/clover/llvm/invocation.cpp | 23 ++------------------ 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp index 4da62b9..e07d95b 100644 --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp @@ -29,21 +29,14 @@ #include #include #include -#if HAVE_LLVM < 0x0305 -#include -#else #include #include #include -#endif #include #include #include #include #include -#if HAVE_LLVM < 0x0305 -#include -#endif #if HAVE_LLVM >= 0x0307 #include #else @@ -328,9 +321,7 @@ namespace { llvm::Function *kernel = *I; export_list.push_back(kernel->getName().data()); } -#if HAVE_LLVM < 0x0305 - PM.add(new llvm::DataLayout(mod)); -#elif HAVE_LLVM < 0x0306 +#if HAVE_LLVM < 0x0306 PM.add(new llvm::DataLayoutPass(mod)); #elif HAVE_LLVM < 0x0307 PM.add(new llvm::DataLayoutPass()); @@ -356,11 +347,7 @@ namespace { compat::vector args; llvm::Function *kernel_func = mod->getFunction(kernel_name); -#if HAVE_LLVM < 0x0305 - llvm::DataLayout TD(kernel_func->getParent()->getDataLayout()); -#else - llvm::DataLayout TD(mod); -#endif + llvm::DataLayout TD(mod); for (llvm::Function::const_arg_iterator I = kernel_func->arg_begin(), E = kernel_func->arg_end(); I != E; ++I) { @@ -651,8 +638,6 @@ namespace { return m; } -#if HAVE_LLVM >= 0x0305 - void diagnostic_handler(const llvm::DiagnosticInfo &di, void *data) { if (di.getSeverity() == llvm::DS_Error) { @@ -667,8 +652,6 @@ namespace { } } -#endif - void init_targets() { static bool targets_initialized = false; @@ -721,9 +704,7 @@ clover::compile_program_llvm(const compat::string &source, llvm::LLVMContext llvm_ctx; unsigned optimization_level; -#if HAVE_LLVM >= 0x0305 llvm_ctx.setDiagnosticHandler(diagnostic_handler, &r_log); -#endif if (get_debug_flags() & DBG_CLC) debug_log(source, ".cl"); From tstellar at kemper.freedesktop.org Mon Apr 20 19:18:41 2015 From: tstellar at kemper.freedesktop.org (Tom Stellard) Date: Mon, 20 Apr 2015 12:18:41 -0700 (PDT) Subject: Mesa (master): clover: make llvm >= 3.5.0 and c++11 mandatory Message-ID: <20150420191841.5E3E9761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: f39cd716189f4b025f9c04cb3426c6b25b9eaf46 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f39cd716189f4b025f9c04cb3426c6b25b9eaf46 Author: EdB Date: Mon Apr 20 20:28:09 2015 +0200 clover: make llvm >= 3.5.0 and c++11 mandatory Clover not longer compile with llvm <= 3.5.0 since e1d363b3. e1d363b3 implies c++11 and llvm 3.5.0 CXXFLAGS provided it. No one seems to have noticed it, it's now official. Acked-by: Francisco Jerez Reviewed-by: Tom Stellard --- configure.ac | 32 +++++++++++--------------------- 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/configure.ac b/configure.ac index 325b936..095e23e 100644 --- a/configure.ac +++ b/configure.ac @@ -1865,6 +1865,13 @@ strip_unwanted_llvm_flags() { -e 's/-fstack-protector-strong\>//g' } +llvm_check_version_for() { + if test "${LLVM_VERSION_INT}${LLVM_VERSION_PATCH}" -lt "${1}0${2}${3}"; then + AC_MSG_ERROR([LLVM $1.$2.$3 or newer is required for $4]) + fi +} + + if test -z "$with_gallium_drivers"; then enable_gallium_llvm=no @@ -1919,22 +1926,10 @@ if test "x$enable_gallium_llvm" = xyes; then fi if test "x$enable_opencl" = xyes; then + llvm_check_version_for "3" "5" "0" "opencl" + LLVM_COMPONENTS="${LLVM_COMPONENTS} all-targets ipo linker instrumentation" - # LLVM 3.3 >= 177971 requires IRReader - if $LLVM_CONFIG --components | grep -qw 'irreader'; then - LLVM_COMPONENTS="${LLVM_COMPONENTS} irreader" - fi - # LLVM 3.4 requires Option - if $LLVM_CONFIG --components | grep -qw 'option'; then - LLVM_COMPONENTS="${LLVM_COMPONENTS} option" - fi - # Current OpenCL/Clover and LLVM 3.5 require ObjCARCOpts and ProfileData - if $LLVM_CONFIG --components | grep -qw 'objcarcopts'; then - LLVM_COMPONENTS="${LLVM_COMPONENTS} objcarcopts" - fi - if $LLVM_CONFIG --components | grep -qw 'profiledata'; then - LLVM_COMPONENTS="${LLVM_COMPONENTS} profiledata" - fi + LLVM_COMPONENTS="${LLVM_COMPONENTS} irreader option objcarcopts profiledata" fi DEFINES="${DEFINES} -DHAVE_LLVM=0x0$LLVM_VERSION_INT -DLLVM_VERSION_PATCH=$LLVM_VERSION_PATCH" MESA_LLVM=1 @@ -2058,12 +2053,7 @@ radeon_llvm_check() { if test "x$enable_gallium_llvm" != "xyes"; then AC_MSG_ERROR([--enable-gallium-llvm is required when building $1]) fi - LLVM_REQUIRED_VERSION_MAJOR="3" - LLVM_REQUIRED_VERSION_MINOR="4" - LLVM_REQUIRED_VERSION_PATCH="2" - if test "${LLVM_VERSION_INT}${LLVM_VERSION_PATCH}" -lt "${LLVM_REQUIRED_VERSION_MAJOR}0${LLVM_REQUIRED_VERSION_MINOR}${LLVM_REQUIRED_VERSION_PATCH}"; then - AC_MSG_ERROR([LLVM $LLVM_REQUIRED_VERSION_MAJOR.$LLVM_REQUIRED_VERSION_MINOR.$LLVM_REQUIRED_VERSION_PATCH or newer is required for $1]) - fi + llvm_check_version_for "3" "4" "2" $1 if test true && $LLVM_CONFIG --targets-built | grep -qvw 'R600' ; then AC_MSG_ERROR([LLVM R600 Target not enabled. You can enable it when building the LLVM sources with the --enable-experimental-targets=R600 From nroberts at kemper.freedesktop.org Tue Apr 21 05:07:20 2015 From: nroberts at kemper.freedesktop.org (Neil Roberts) Date: Mon, 20 Apr 2015 22:07:20 -0700 (PDT) Subject: Mesa (master): i965/skl: Fix the qpitch value Message-ID: <20150421050720.340827600C@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 7004632b28d8a31b16acc553a1fb31202767bd80 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7004632b28d8a31b16acc553a1fb31202767bd80 Author: Neil Roberts Date: Wed Feb 18 18:41:58 2015 +0000 i965/skl: Fix the qpitch value On Skylake the qpitch value is uploaded as part of the surface state so we don't need to add the extra rows that are done for other generations. However for 3D textures it needs to be aligned to the tile height and for depth/stencil textures it needs to be a multiple of 8. Unlike previous generations the qpitch is measured as a multiple of the block size for compressed surfaces. When the horizontal mipmap layout is used for 1D textures then the qpitch is measured in pixels instead of rows. v2: Align the depth/stencil textures to a multiple of 8 v3: Add an assert that ALL_SLICES_AT_EACH_LOD is not used. Ignore the vertical alignment when picking the qpitch for 1D_ARRAY textures. Reviewed-by: Ben Widawsky Reviewed-by: Anuj Phogat --- src/mesa/drivers/dri/i965/brw_tex_layout.c | 62 +++++++++++++++++++++---- src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 10 ++-- 2 files changed, 59 insertions(+), 13 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index 75b409c..440ba6c 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -301,24 +301,66 @@ static void brw_miptree_layout_texture_array(struct brw_context *brw, struct intel_mipmap_tree *mt) { - int h0, h1; unsigned height = mt->physical_height0; bool layout_1d = use_linear_1d_layout(brw, mt); - - h0 = ALIGN(mt->physical_height0, mt->align_h); - h1 = ALIGN(minify(mt->physical_height0, 1), mt->align_h); - if (mt->array_layout == ALL_SLICES_AT_EACH_LOD) - mt->qpitch = h0; - else - mt->qpitch = (h0 + h1 + (brw->gen >= 7 ? 12 : 11) * mt->align_h); - - int physical_qpitch = mt->compressed ? mt->qpitch / 4 : mt->qpitch; + int physical_qpitch; if (layout_1d) gen9_miptree_layout_1d(mt); else brw_miptree_layout_2d(mt); + if (layout_1d) { + physical_qpitch = 1; + /* When using the horizontal layout the qpitch specifies the distance in + * pixels between array slices. The total_width is forced to be a + * multiple of the horizontal alignment in brw_miptree_layout_1d (in + * this case it's always 64). The vertical alignment is ignored. + */ + mt->qpitch = mt->total_width; + } else if (brw->gen >= 9) { + GLenum base_format; + + /* ALL_SLICES_AT_EACH_LOD isn't supported on Gen8+ but this code will + * effectively end up with a packed qpitch anyway whenever + * mt->first_level == mt->last_level. + */ + assert(mt->array_layout != ALL_SLICES_AT_EACH_LOD); + + /* On Gen9 we can pick whatever qpitch we like as long as it's aligned + * to the vertical alignment so we don't need to add any extra rows. + */ + mt->qpitch = mt->total_height; + + /* If the surface might be used as a stencil buffer or HiZ buffer then + * it needs to be a multiple of 8. + */ + base_format = _mesa_get_format_base_format(mt->format); + if (_mesa_is_depth_or_stencil_format(base_format)) + mt->qpitch = ALIGN(mt->qpitch, 8); + + /* 3D textures need to be aligned to the tile height. At this point we + * don't know which tiling will be used so let's just align it to 32 + */ + if (mt->target == GL_TEXTURE_3D) + mt->qpitch = ALIGN(mt->qpitch, 32); + + /* Unlike previous generations the qpitch is now a multiple of the + * compressed block size so physical_qpitch matches mt->qpitch. + */ + physical_qpitch = mt->qpitch; + } else { + int h0 = ALIGN(mt->physical_height0, mt->align_h); + int h1 = ALIGN(minify(mt->physical_height0, 1), mt->align_h); + + if (mt->array_layout == ALL_SLICES_AT_EACH_LOD) + mt->qpitch = h0; + else + mt->qpitch = (h0 + h1 + (brw->gen >= 7 ? 12 : 11) * mt->align_h); + + physical_qpitch = mt->compressed ? mt->qpitch / 4 : mt->qpitch; + } + for (unsigned level = mt->first_level; level <= mt->last_level; level++) { unsigned img_height; img_height = ALIGN(height, mt->align_h); diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h index 0796059..77b0294 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h @@ -403,10 +403,14 @@ struct intel_mipmap_tree enum miptree_array_layout array_layout; /** - * The distance in rows between array slices in an uncompressed surface. + * The distance in between array slices. * - * For compressed surfaces, slices are stored closer together physically; - * the real distance is (qpitch / block height). + * The value is the one that is sent in the surface state. The actual + * meaning depends on certain criteria. Usually it is simply the number of + * uncompressed rows between each slice. However on Gen9+ for compressed + * surfaces it is the number of blocks. For 1D array surfaces that have the + * mipmap tree stored horizontally it is the number of pixels between each + * slice. */ uint32_t qpitch; From nroberts at kemper.freedesktop.org Tue Apr 21 05:07:20 2015 From: nroberts at kemper.freedesktop.org (Neil Roberts) Date: Mon, 20 Apr 2015 22:07:20 -0700 (PDT) Subject: Mesa (master): i965/skl: Don't use ALL_SLICES_AT_EACH_LOD Message-ID: <20150421050720.2D499761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 584f8e1ec56b45057b53e161233308f38e1c3b09 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=584f8e1ec56b45057b53e161233308f38e1c3b09 Author: Neil Roberts Date: Fri Feb 20 19:11:46 2015 +0000 i965/skl: Don't use ALL_SLICES_AT_EACH_LOD The render surface state command for Skylake doesn't have the surface array spacing bit so it's not possible to select this layout. I think it was only used in order to make it pick a tightly-packed qpitch value that doesn't include space for the mipmaps. However this won't be necessary after the next patch because it will automatically pick a packed qpitch value whenever first_level==last_level. It is better to remove this layout entirely on Gen8+ because although it can effectively be implemented with a small qpitch value when there are no mipmaps it isn't possible to support the case where there are mipmaps because in that case the layout is very different. It could be good to make a similar change for Gen8 if we also change the layouting code to pick the qpitch value in a similar way. v2: Make the commit message and comments more convincing Reviewed-by: Ben Widawsky Tested-by: Ben Widawsky --- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 30 ++++++++++++++++--------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 9e311f06..24a5c3d 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -388,19 +388,29 @@ intel_miptree_create_layout(struct brw_context *brw, } } - /* Set array_layout to ALL_SLICES_AT_EACH_LOD when gen7+ array_spacing_lod0 - * can be used. array_spacing_lod0 is only used for non-IMS MSAA surfaces. + /* Set array_layout to ALL_SLICES_AT_EACH_LOD when array_spacing_lod0 can + * be used. array_spacing_lod0 is only used for non-IMS MSAA surfaces on + * Gen 7 and 8. On Gen 8 and 9 this layout is not available but it is still + * used on Gen8 to make it pick a qpitch value which doesn't include space + * for the mipmaps. On Gen9 this is not necessary because it will + * automatically pick a packed qpitch value whenever mt->first_level == + * mt->last_level. * TODO: can we use it elsewhere? + * TODO: also disable this on Gen8 and pick the qpitch value like Gen9 */ - switch (mt->msaa_layout) { - case INTEL_MSAA_LAYOUT_NONE: - case INTEL_MSAA_LAYOUT_IMS: + if (brw->gen >= 9) { mt->array_layout = ALL_LOD_IN_EACH_SLICE; - break; - case INTEL_MSAA_LAYOUT_UMS: - case INTEL_MSAA_LAYOUT_CMS: - mt->array_layout = ALL_SLICES_AT_EACH_LOD; - break; + } else { + switch (mt->msaa_layout) { + case INTEL_MSAA_LAYOUT_NONE: + case INTEL_MSAA_LAYOUT_IMS: + mt->array_layout = ALL_LOD_IN_EACH_SLICE; + break; + case INTEL_MSAA_LAYOUT_UMS: + case INTEL_MSAA_LAYOUT_CMS: + mt->array_layout = ALL_SLICES_AT_EACH_LOD; + break; + } } if (target == GL_TEXTURE_CUBE_MAP) { From tpalli at kemper.freedesktop.org Tue Apr 21 11:40:22 2015 From: tpalli at kemper.freedesktop.org (Tapani Pälli) Date: Tue, 21 Apr 2015 04:40:22 -0700 (PDT) Subject: Mesa (master): mesa: add missing break in switch statement Message-ID: <20150421114022.C106D761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: ad5ae271e7514e35e97c58f032b2949e3eee62c3 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ad5ae271e7514e35e97c58f032b2949e3eee62c3 Author: Tapani P?lli Date: Tue Apr 21 08:26:03 2015 +0300 mesa: add missing break in switch statement Signed-off-by: Tapani P?lli Reviewed-By: Martin Peres --- src/mesa/main/shader_query.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp index 1428058..336598d 100644 --- a/src/mesa/main/shader_query.cpp +++ b/src/mesa/main/shader_query.cpp @@ -634,7 +634,7 @@ _mesa_program_resource_find_index(struct gl_shader_program *shProg, case GL_ATOMIC_COUNTER_BUFFER: if (_mesa_program_resource_index(shProg, res) == index) return res; - + break; case GL_TRANSFORM_FEEDBACK_VARYING: case GL_PROGRAM_INPUT: case GL_PROGRAM_OUTPUT: From tpalli at kemper.freedesktop.org Tue Apr 21 11:40:22 2015 From: tpalli at kemper.freedesktop.org (Tapani Pälli) Date: Tue, 21 Apr 2015 04:40:22 -0700 (PDT) Subject: Mesa (master): mesa: fix UBO queries for active uniforms Message-ID: <20150421114022.B3A3F761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 054c7dc7eb091e631a01ade3e6a46d6cc77fc9f3 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=054c7dc7eb091e631a01ade3e6a46d6cc77fc9f3 Author: Tapani P?lli Date: Mon Apr 20 15:41:06 2015 +0300 mesa: fix UBO queries for active uniforms Commit 34df5eb introduced regression to GetActiveUniformBlockiv when querying one of the following properties: GL_UNIFORM_BLOCK_ACTIVE_UNIFORMS GL_UNIFORM_BLOCK_ACTIVE_UNIFORM_INDICES Implementation counted all uniforms in ubo directly while query should check first if the uniform in question is _active_. Signed-off-by: Tapani P?lli Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90109 Reviewed-By: Martin Peres --- src/mesa/main/shader_query.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp index b5f1d08..1428058 100644 --- a/src/mesa/main/shader_query.cpp +++ b/src/mesa/main/shader_query.cpp @@ -860,13 +860,23 @@ get_buffer_property(struct gl_shader_program *shProg, *val = RESOURCE_UBO(res)->UniformBufferSize; return 1; case GL_NUM_ACTIVE_VARIABLES: - *val = RESOURCE_UBO(res)->NumUniforms; + *val = 0; + for (unsigned i = 0; i < RESOURCE_UBO(res)->NumUniforms; i++) { + const char *iname = RESOURCE_UBO(res)->Uniforms[i].IndexName; + struct gl_program_resource *uni = + _mesa_program_resource_find_name(shProg, GL_UNIFORM, iname); + if (!uni) + continue; + (*val)++; + } return 1; case GL_ACTIVE_VARIABLES: for (unsigned i = 0; i < RESOURCE_UBO(res)->NumUniforms; i++) { const char *iname = RESOURCE_UBO(res)->Uniforms[i].IndexName; struct gl_program_resource *uni = _mesa_program_resource_find_name(shProg, GL_UNIFORM, iname); + if (!uni) + continue; *val++ = _mesa_program_resource_index(shProg, uni); } From tpalli at kemper.freedesktop.org Tue Apr 21 11:40:22 2015 From: tpalli at kemper.freedesktop.org (Tapani Pälli) Date: Tue, 21 Apr 2015 04:40:22 -0700 (PDT) Subject: Mesa (master): glsl: add fallthrough comment on switch Message-ID: <20150421114022.BA3547626F@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 5917ca349ae88b35779179a6a0d949ec910bc680 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5917ca349ae88b35779179a6a0d949ec910bc680 Author: Tapani P?lli Date: Tue Apr 21 08:25:16 2015 +0300 glsl: add fallthrough comment on switch Signed-off-by: Tapani P?lli Reviewed-By: Martin Peres --- src/glsl/linker.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index b6baa5d..9aa1389 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -2570,7 +2570,8 @@ add_interface_variables(struct gl_shader_program *shProg, if (var->data.location != SYSTEM_VALUE_VERTEX_ID && var->data.location != SYSTEM_VALUE_VERTEX_ID_ZERO_BASE && var->data.location != SYSTEM_VALUE_INSTANCE_ID) - continue; + continue; + /* FALLTHROUGH */ case ir_var_shader_in: if (programInterface != GL_PROGRAM_INPUT) continue; From evelikov at kemper.freedesktop.org Tue Apr 21 12:09:08 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Tue, 21 Apr 2015 05:09:08 -0700 (PDT) Subject: Mesa (master): main: remove __FUNCTION__ defined because it is obsolete Message-ID: <20150421120908.F280B7626F@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 958b4965a23f69085dc04df165d6fccc2707d60e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=958b4965a23f69085dc04df165d6fccc2707d60e Author: Marius Predut Date: Thu Apr 16 13:36:07 2015 +0300 main: remove __FUNCTION__ defined because it is obsolete Consistently just use C99's __func__ everywhere. No functional changes. Signed-off-by: Marius Predut Reviewed-by: Emil Velikov --- src/mesa/main/compiler.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/mesa/main/compiler.h b/src/mesa/main/compiler.h index 55152fd..93b4e6f 100644 --- a/src/mesa/main/compiler.h +++ b/src/mesa/main/compiler.h @@ -60,11 +60,6 @@ extern "C" { #endif -/* XXX: Use standard `__func__` instead */ -#ifndef __FUNCTION__ -# define __FUNCTION__ __func__ -#endif - /** * Either define MESA_BIG_ENDIAN or MESA_LITTLE_ENDIAN, and CPU_TO_LE32. * Do not use these unless absolutely necessary! From evelikov at kemper.freedesktop.org Tue Apr 21 12:09:08 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Tue, 21 Apr 2015 05:09:08 -0700 (PDT) Subject: Mesa (master): radeon: replace __FUNCTION__ with __func__ Message-ID: <20150421120908.EA7AE761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: d8b14a57a98f4bad6528eda8dd1406c15bdcce75 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d8b14a57a98f4bad6528eda8dd1406c15bdcce75 Author: Marius Predut Date: Thu Apr 16 13:35:50 2015 +0300 radeon: replace __FUNCTION__ with __func__ Consistently just use C99's __func__ everywhere. No functional changes. Signed-off-by: Marius Predut Acked-by: Michel D?nzer Reviewed-by: Emil Velikov --- src/mesa/drivers/dri/r200/r200_blit.c | 2 +- src/mesa/drivers/dri/r200/r200_cmdbuf.c | 10 +++---- src/mesa/drivers/dri/r200/r200_ioctl.c | 2 +- src/mesa/drivers/dri/r200/r200_sanity.c | 2 +- src/mesa/drivers/dri/r200/r200_state.c | 22 +++++++-------- src/mesa/drivers/dri/r200/r200_state.h | 2 +- src/mesa/drivers/dri/r200/r200_swtcl.c | 2 +- src/mesa/drivers/dri/r200/r200_tcl.c | 4 +-- src/mesa/drivers/dri/r200/r200_tex.c | 14 +++++----- src/mesa/drivers/dri/r200/r200_texstate.c | 6 ++--- src/mesa/drivers/dri/r200/r200_vertprog.c | 16 +++++------ src/mesa/drivers/dri/radeon/radeon_blit.c | 2 +- src/mesa/drivers/dri/radeon/radeon_cmdbuf.h | 6 ++--- src/mesa/drivers/dri/radeon/radeon_common.c | 12 ++++----- .../drivers/dri/radeon/radeon_common_context.c | 10 +++---- src/mesa/drivers/dri/radeon/radeon_debug.h | 2 +- src/mesa/drivers/dri/radeon/radeon_dma.c | 28 ++++++++++---------- src/mesa/drivers/dri/radeon/radeon_fbo.c | 2 +- src/mesa/drivers/dri/radeon/radeon_ioctl.c | 14 +++++----- src/mesa/drivers/dri/radeon/radeon_maos_arrays.c | 8 +++--- src/mesa/drivers/dri/radeon/radeon_maos_vbtmp.h | 2 +- src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c | 6 ++--- src/mesa/drivers/dri/radeon/radeon_queryobj.c | 16 +++++------ src/mesa/drivers/dri/radeon/radeon_screen.c | 2 +- src/mesa/drivers/dri/radeon/radeon_state.c | 12 ++++----- src/mesa/drivers/dri/radeon/radeon_state.h | 2 +- src/mesa/drivers/dri/radeon/radeon_swtcl.c | 4 +-- src/mesa/drivers/dri/radeon/radeon_tcl.c | 4 +-- src/mesa/drivers/dri/radeon/radeon_tex.c | 10 +++---- src/mesa/drivers/dri/radeon/radeon_tex_copy.c | 2 +- src/mesa/drivers/dri/radeon/radeon_texstate.c | 4 +-- src/mesa/drivers/dri/radeon/radeon_texture.c | 2 +- 32 files changed, 116 insertions(+), 116 deletions(-) diff --git a/src/mesa/drivers/dri/r200/r200_blit.c b/src/mesa/drivers/dri/r200/r200_blit.c index a33f338..3adc694 100644 --- a/src/mesa/drivers/dri/r200/r200_blit.c +++ b/src/mesa/drivers/dri/r200/r200_blit.c @@ -527,7 +527,7 @@ unsigned r200_blit(struct gl_context *ctx, /* Flush is needed to make sure that source buffer has correct data */ radeonFlush(&r200->radeon.glCtx); - rcommonEnsureCmdBufSpace(&r200->radeon, 102, __FUNCTION__); + rcommonEnsureCmdBufSpace(&r200->radeon, 102, __func__); if (!validate_buffers(r200, src_bo, dst_bo)) return GL_FALSE; diff --git a/src/mesa/drivers/dri/r200/r200_cmdbuf.c b/src/mesa/drivers/dri/r200/r200_cmdbuf.c index 13ac5af..83846d6 100644 --- a/src/mesa/drivers/dri/r200/r200_cmdbuf.c +++ b/src/mesa/drivers/dri/r200/r200_cmdbuf.c @@ -124,7 +124,7 @@ void r200EmitVbufPrim( r200ContextPtr rmesa, radeonEmitState(&rmesa->radeon); radeon_print(RADEON_RENDER|RADEON_SWRENDER,RADEON_VERBOSE, - "%s cmd_used/4: %d prim %x nr %d\n", __FUNCTION__, + "%s cmd_used/4: %d prim %x nr %d\n", __func__, rmesa->store.cmd_used/4, primitive, vertex_nr); BEGIN_BATCH(3); @@ -162,7 +162,7 @@ void r200FlushElts(struct gl_context *ctx) r200ContextPtr rmesa = R200_CONTEXT(ctx); int nr, elt_used = rmesa->tcl.elt_used; - radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s %x %d\n", __FUNCTION__, rmesa->tcl.hw_primitive, elt_used); + radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s %x %d\n", __func__, rmesa->tcl.hw_primitive, elt_used); assert( rmesa->radeon.dma.flush == r200FlushElts ); rmesa->radeon.dma.flush = NULL; @@ -187,7 +187,7 @@ GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa, { GLushort *retval; - radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s %d prim %x\n", __FUNCTION__, min_nr, primitive); + radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s %d prim %x\n", __func__, min_nr, primitive); assert((primitive & R200_VF_PRIM_WALK_IND)); @@ -225,7 +225,7 @@ void r200EmitVertexAOS( r200ContextPtr rmesa, BATCH_LOCALS(&rmesa->radeon); radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, "%s: vertex_size 0x%x offset 0x%x \n", - __FUNCTION__, vertex_size, offset); + __func__, vertex_size, offset); BEGIN_BATCH(7); @@ -245,7 +245,7 @@ void r200EmitAOS(r200ContextPtr rmesa, GLuint nr, GLuint offset) radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s: nr=%d, ofs=0x%08x\n", - __FUNCTION__, nr, offset); + __func__, nr, offset); BEGIN_BATCH(sz+2+ (nr*2)); OUT_BATCH_PACKET3(R200_CP_CMD_3D_LOAD_VBPNTR, sz - 1); diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.c b/src/mesa/drivers/dri/r200/r200_ioctl.c index d665c8b..90232d4 100644 --- a/src/mesa/drivers/dri/r200/r200_ioctl.c +++ b/src/mesa/drivers/dri/r200/r200_ioctl.c @@ -68,7 +68,7 @@ static void r200Clear( struct gl_context *ctx, GLbitfield mask ) if ( swmask ) { if (R200_DEBUG & RADEON_FALLBACKS) - fprintf(stderr, "%s: swrast clear, mask: %x\n", __FUNCTION__, swmask); + fprintf(stderr, "%s: swrast clear, mask: %x\n", __func__, swmask); _swrast_Clear( ctx, swmask ); } diff --git a/src/mesa/drivers/dri/r200/r200_sanity.c b/src/mesa/drivers/dri/r200/r200_sanity.c index ab922e5..6628674 100644 --- a/src/mesa/drivers/dri/r200/r200_sanity.c +++ b/src/mesa/drivers/dri/r200/r200_sanity.c @@ -1447,7 +1447,7 @@ int r200SanityCmdBuffer( r200ContextPtr rmesa, } } - fprintf(stderr, "leaving %s\n\n\n", __FUNCTION__); + fprintf(stderr, "leaving %s\n\n\n", __func__); return 0; } diff --git a/src/mesa/drivers/dri/r200/r200_state.c b/src/mesa/drivers/dri/r200/r200_state.c index e4f07b3..b0a6bd5 100644 --- a/src/mesa/drivers/dri/r200/r200_state.c +++ b/src/mesa/drivers/dri/r200/r200_state.c @@ -261,7 +261,7 @@ static void r200_set_blend_state( struct gl_context * ctx ) default: fprintf( stderr, "[%s:%u] Invalid RGB blend equation (0x%04x).\n", - __FUNCTION__, __LINE__, ctx->Color.Blend[0].EquationRGB ); + __func__, __LINE__, ctx->Color.Blend[0].EquationRGB ); return; } @@ -295,7 +295,7 @@ static void r200_set_blend_state( struct gl_context * ctx ) default: fprintf( stderr, "[%s:%u] Invalid A blend equation (0x%04x).\n", - __FUNCTION__, __LINE__, ctx->Color.Blend[0].EquationA ); + __func__, __LINE__, ctx->Color.Blend[0].EquationA ); return; } @@ -723,7 +723,7 @@ static void r200PolygonOffset( struct gl_context *ctx, /* factor *= 2; */ /* constant *= 2; */ -/* fprintf(stderr, "%s f:%f u:%f\n", __FUNCTION__, factor, constant); */ +/* fprintf(stderr, "%s f:%f u:%f\n", __func__, factor, constant); */ R200_STATECHANGE( rmesa, zbs ); rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_FACTOR] = factoru.ui32; @@ -867,7 +867,7 @@ static void update_light_colors( struct gl_context *ctx, GLuint p ) { struct gl_light *l = &ctx->Light.Light[p]; -/* fprintf(stderr, "%s\n", __FUNCTION__); */ +/* fprintf(stderr, "%s\n", __func__); */ if (l->Enabled) { r200ContextPtr rmesa = R200_CONTEXT(ctx); @@ -996,7 +996,7 @@ void r200UpdateMaterial( struct gl_context *ctx ) mask &= ~ctx->Light._ColorMaterialBitmask; if (R200_DEBUG & RADEON_STATE) - fprintf(stderr, "%s\n", __FUNCTION__); + fprintf(stderr, "%s\n", __func__); if (mask & MAT_BIT_FRONT_EMISSION) { fcmd[MTL_EMMISSIVE_RED] = mat[MAT_ATTRIB_FRONT_EMISSION][0]; @@ -1668,7 +1668,7 @@ static void r200Enable( struct gl_context *ctx, GLenum cap, GLboolean state ) GLuint p, flag; if ( R200_DEBUG & RADEON_STATE ) - fprintf( stderr, "%s( %s = %s )\n", __FUNCTION__, + fprintf( stderr, "%s( %s = %s )\n", __func__, _mesa_lookup_enum_by_nr( cap ), state ? "GL_TRUE" : "GL_FALSE" ); @@ -2050,7 +2050,7 @@ void r200LightingSpaceChange( struct gl_context *ctx ) GLboolean tmp; if (R200_DEBUG & RADEON_STATE) - fprintf(stderr, "%s %d BEFORE %x\n", __FUNCTION__, ctx->_NeedEyeCoords, + fprintf(stderr, "%s %d BEFORE %x\n", __func__, ctx->_NeedEyeCoords, rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0]); if (ctx->_NeedEyeCoords) @@ -2066,7 +2066,7 @@ void r200LightingSpaceChange( struct gl_context *ctx ) } if (R200_DEBUG & RADEON_STATE) - fprintf(stderr, "%s %d AFTER %x\n", __FUNCTION__, ctx->_NeedEyeCoords, + fprintf(stderr, "%s %d AFTER %x\n", __func__, ctx->_NeedEyeCoords, rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0]); } @@ -2109,7 +2109,7 @@ static void update_texturematrix( struct gl_context *ctx ) int unit; if (R200_DEBUG & RADEON_STATE) - fprintf(stderr, "%s before COMPSEL: %x\n", __FUNCTION__, + fprintf(stderr, "%s before COMPSEL: %x\n", __func__, rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL]); rmesa->TexMatEnabled = 0; @@ -2167,7 +2167,7 @@ GLboolean r200ValidateBuffers(struct gl_context *ctx) int i, ret; if (RADEON_DEBUG & RADEON_IOCTL) - fprintf(stderr, "%s\n", __FUNCTION__); + fprintf(stderr, "%s\n", __func__); radeon_cs_space_reset_bos(rmesa->radeon.cmdbuf.cs); rrb = radeon_get_colorbuffer(&rmesa->radeon); @@ -2317,7 +2317,7 @@ static void r200WrapRunPipeline( struct gl_context *ctx ) GLboolean has_material; if (0) - fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->radeon.NewGLState); + fprintf(stderr, "%s, newstate: %x\n", __func__, rmesa->radeon.NewGLState); /* Validate state: */ diff --git a/src/mesa/drivers/dri/r200/r200_state.h b/src/mesa/drivers/dri/r200/r200_state.h index 9111981..3b719b5 100644 --- a/src/mesa/drivers/dri/r200/r200_state.h +++ b/src/mesa/drivers/dri/r200/r200_state.h @@ -54,7 +54,7 @@ extern void r200_vtbl_update_scissor( struct gl_context *ctx ); extern void r200Fallback( struct gl_context *ctx, GLuint bit, GLboolean mode ); #define FALLBACK( rmesa, bit, mode ) do { \ if ( 0 ) fprintf( stderr, "FALLBACK in %s: #%d=%d\n", \ - __FUNCTION__, bit, mode ); \ + __func__, bit, mode ); \ r200Fallback( &rmesa->radeon.glCtx, bit, mode ); \ } while (0) diff --git a/src/mesa/drivers/dri/r200/r200_swtcl.c b/src/mesa/drivers/dri/r200/r200_swtcl.c index e8a06c8..c39b71d 100644 --- a/src/mesa/drivers/dri/r200/r200_swtcl.c +++ b/src/mesa/drivers/dri/r200/r200_swtcl.c @@ -210,7 +210,7 @@ static void r200_predict_emit_size( r200ContextPtr rmesa ) if (rcommonEnsureCmdBufSpace(&rmesa->radeon, state_size + vertex_array_size + prim_size, - __FUNCTION__)) + __func__)) rmesa->radeon.swtcl.emit_prediction = radeonCountStateEmitSize(&rmesa->radeon); else rmesa->radeon.swtcl.emit_prediction = state_size; diff --git a/src/mesa/drivers/dri/r200/r200_tcl.c b/src/mesa/drivers/dri/r200/r200_tcl.c index ccc8f42..3eccaa7 100644 --- a/src/mesa/drivers/dri/r200/r200_tcl.c +++ b/src/mesa/drivers/dri/r200/r200_tcl.c @@ -340,7 +340,7 @@ static GLuint r200EnsureEmitSize( struct gl_context * ctx , GLubyte* vimap_rev ) "%s space %u, aos %d\n", __func__, space_required, AOS_BUFSZ(nr_aos) ); /* flush the buffer in case we need more than is left. */ - if (rcommonEnsureCmdBufSpace(&rmesa->radeon, space_required + state_size, __FUNCTION__)) + if (rcommonEnsureCmdBufSpace(&rmesa->radeon, space_required + state_size, __func__)) return space_required + radeonCountStateEmitSize( &rmesa->radeon ); else return space_required + state_size; @@ -374,7 +374,7 @@ static GLboolean r200_run_tcl_render( struct gl_context *ctx, if (rmesa->radeon.TclFallback) return GL_TRUE; /* fallback to software t&l */ - radeon_print(RADEON_RENDER, RADEON_NORMAL, "%s\n", __FUNCTION__); + radeon_print(RADEON_RENDER, RADEON_NORMAL, "%s\n", __func__); if (VB->Count == 0) return GL_FALSE; diff --git a/src/mesa/drivers/dri/r200/r200_tex.c b/src/mesa/drivers/dri/r200/r200_tex.c index 17ae7b9..083a184 100644 --- a/src/mesa/drivers/dri/r200/r200_tex.c +++ b/src/mesa/drivers/dri/r200/r200_tex.c @@ -104,7 +104,7 @@ static void r200SetTexWrap( radeonTexObjPtr t, GLenum swrap, GLenum twrap, GLenu is_clamp_to_border = GL_TRUE; break; default: - _mesa_problem(NULL, "bad S wrap mode in %s", __FUNCTION__); + _mesa_problem(NULL, "bad S wrap mode in %s", __func__); } if (tObj->Target != GL_TEXTURE_1D) { @@ -138,7 +138,7 @@ static void r200SetTexWrap( radeonTexObjPtr t, GLenum swrap, GLenum twrap, GLenu is_clamp_to_border = GL_TRUE; break; default: - _mesa_problem(NULL, "bad T wrap mode in %s", __FUNCTION__); + _mesa_problem(NULL, "bad T wrap mode in %s", __func__); } } @@ -174,7 +174,7 @@ static void r200SetTexWrap( radeonTexObjPtr t, GLenum swrap, GLenum twrap, GLenu is_clamp_to_border = GL_TRUE; break; default: - _mesa_problem(NULL, "bad R wrap mode in %s", __FUNCTION__); + _mesa_problem(NULL, "bad R wrap mode in %s", __func__); } if ( is_clamp_to_border ) { @@ -302,7 +302,7 @@ static void r200TexEnv( struct gl_context *ctx, GLenum target, struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; radeon_print(RADEON_TEXTURE | RADEON_STATE, RADEON_VERBOSE, "%s( %s )\n", - __FUNCTION__, _mesa_lookup_enum_by_nr( pname ) ); + __func__, _mesa_lookup_enum_by_nr( pname ) ); /* This is incorrect: Need to maintain this data for each of * GL_TEXTURE_{123}D, GL_TEXTURE_RECTANGLE_NV, etc, and switch @@ -383,7 +383,7 @@ static void r200TexParameter( struct gl_context *ctx, radeon_print(RADEON_TEXTURE | RADEON_STATE, RADEON_VERBOSE, "%s(%p, tex %p) pname %s\n", - __FUNCTION__, ctx, texObj, + __func__, ctx, texObj, _mesa_lookup_enum_by_nr( pname ) ); switch ( pname ) { @@ -413,7 +413,7 @@ static void r200DeleteTexture(struct gl_context * ctx, struct gl_texture_object radeonTexObj* t = radeon_tex_obj(texObj); radeon_print(RADEON_TEXTURE | RADEON_STATE, RADEON_NORMAL, - "%s( %p (target = %s) )\n", __FUNCTION__, + "%s( %p (target = %s) )\n", __func__, (void *)texObj, _mesa_lookup_enum_by_nr(texObj->Target)); @@ -472,7 +472,7 @@ static struct gl_texture_object *r200NewTextureObject(struct gl_context * ctx, radeon_print(RADEON_STATE | RADEON_TEXTURE, RADEON_NORMAL, "%s(%p) target %s, new texture %p.\n", - __FUNCTION__, ctx, + __func__, ctx, _mesa_lookup_enum_by_nr(target), t); _mesa_initialize_texture_object(ctx, &t->base, name, target); diff --git a/src/mesa/drivers/dri/r200/r200_texstate.c b/src/mesa/drivers/dri/r200/r200_texstate.c index 43ecdb9..cc13ccb 100644 --- a/src/mesa/drivers/dri/r200/r200_texstate.c +++ b/src/mesa/drivers/dri/r200/r200_texstate.c @@ -297,7 +297,7 @@ static GLboolean r200UpdateTextureEnv( struct gl_context *ctx, int unit, int slo R200_TXA_TFACTOR_SEL_MASK | R200_TXA_TFACTOR1_SEL_MASK); if ( R200_DEBUG & RADEON_TEXTURE ) { - fprintf( stderr, "%s( %p, %d )\n", __FUNCTION__, (void *)ctx, unit ); + fprintf( stderr, "%s( %p, %d )\n", __func__, (void *)ctx, unit ); } /* Set the texture environment state. Isn't this nice and clean? @@ -1182,7 +1182,7 @@ static GLboolean r200_validate_texgen( struct gl_context *ctx, GLuint unit ) (unit * 4)); if (0) - fprintf(stderr, "%s unit %d\n", __FUNCTION__, unit); + fprintf(stderr, "%s unit %d\n", __func__, unit); if (texUnit->TexGenEnabled & S_BIT) { mode = texUnit->GenS.Mode; @@ -1392,7 +1392,7 @@ static void setup_hardware_state(r200ContextPtr rmesa, radeonTexObj *t) } else { _mesa_problem(NULL, "unexpected texture format in %s", - __FUNCTION__); + __func__); return; } } diff --git a/src/mesa/drivers/dri/r200/r200_vertprog.c b/src/mesa/drivers/dri/r200/r200_vertprog.c index fdfd27e..d43eaf9 100644 --- a/src/mesa/drivers/dri/r200/r200_vertprog.c +++ b/src/mesa/drivers/dri/r200/r200_vertprog.c @@ -115,7 +115,7 @@ static GLboolean r200VertexProgUpdateParams(struct gl_context *ctx, struct r200_ paramList = mesa_vp->Base.Parameters; if(paramList->NumParameters > R200_VSF_MAX_PARAM){ - fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__); + fprintf(stderr, "%s:Params exhausted\n", __func__); return GL_FALSE; } @@ -130,7 +130,7 @@ static GLboolean r200VertexProgUpdateParams(struct gl_context *ctx, struct r200_ *fcmd++ = paramList->ParameterValues[pi][3].f; break; default: - _mesa_problem(NULL, "Bad param type in %s", __FUNCTION__); + _mesa_problem(NULL, "Bad param type in %s", __func__); break; } if (pi == 95) { @@ -186,7 +186,7 @@ static unsigned long t_dst(struct prog_dst_register *dst) case VARYING_SLOT_PSIZ: return R200_VSF_OUT_CLASS_RESULT_POINTSIZE; default: - fprintf(stderr, "problem in %s, unknown dst output reg %d\n", __FUNCTION__, dst->Index); + fprintf(stderr, "problem in %s, unknown dst output reg %d\n", __func__, dst->Index); exit(0); return 0; } @@ -194,7 +194,7 @@ static unsigned long t_dst(struct prog_dst_register *dst) assert (dst->Index == 0); return R200_VSF_OUT_CLASS_ADDR; default: - fprintf(stderr, "problem in %s, unknown register type %d\n", __FUNCTION__, dst->File); + fprintf(stderr, "problem in %s, unknown register type %d\n", __func__, dst->File); exit(0); return 0; } @@ -218,7 +218,7 @@ static unsigned long t_src_class(gl_register_file file) case PROGRAM_ADDRESS: */ default: - fprintf(stderr, "problem in %s", __FUNCTION__); + fprintf(stderr, "problem in %s", __func__); exit(0); } } @@ -235,7 +235,7 @@ static void vp_dump_inputs(struct r200_vertex_program *vp, char *caller) int i; if(vp == NULL){ - fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__, caller); + fprintf(stderr, "vp null in call to %s from %s\n", __func__, caller); return ; } @@ -263,7 +263,7 @@ static unsigned long t_src_index(struct r200_vertex_program *vp, struct prog_src vp->inputs[src->Index] = max_reg+1;*/ - //vp_dump_inputs(vp, __FUNCTION__); + //vp_dump_inputs(vp, __func__); assert(vp->inputs[src->Index] != -1); return vp->inputs[src->Index]; } else { @@ -325,7 +325,7 @@ static unsigned long t_opcode(enum prog_opcode opcode) case OPCODE_SLT: return R200_VPI_OUT_OP_SLT; default: - fprintf(stderr, "%s: Should not be called with opcode %d!", __FUNCTION__, opcode); + fprintf(stderr, "%s: Should not be called with opcode %d!", __func__, opcode); } exit(-1); return 0; diff --git a/src/mesa/drivers/dri/radeon/radeon_blit.c b/src/mesa/drivers/dri/radeon/radeon_blit.c index 8c6a086..0de1751 100644 --- a/src/mesa/drivers/dri/radeon/radeon_blit.c +++ b/src/mesa/drivers/dri/radeon/radeon_blit.c @@ -405,7 +405,7 @@ unsigned r100_blit(struct gl_context *ctx, /* Flush is needed to make sure that source buffer has correct data */ radeonFlush(ctx); - rcommonEnsureCmdBufSpace(&r100->radeon, 59, __FUNCTION__); + rcommonEnsureCmdBufSpace(&r100->radeon, 59, __func__); if (!validate_buffers(r100, src_bo, dst_bo)) return GL_FALSE; diff --git a/src/mesa/drivers/dri/radeon/radeon_cmdbuf.h b/src/mesa/drivers/dri/radeon/radeon_cmdbuf.h index 5f454cb..da179a7 100644 --- a/src/mesa/drivers/dri/radeon/radeon_cmdbuf.h +++ b/src/mesa/drivers/dri/radeon/radeon_cmdbuf.h @@ -31,7 +31,7 @@ void rcommonBeginBatch(radeonContextPtr rmesa, * Prepare writing n dwords to the command buffer. Does not cause automatic * state emits. */ -#define BEGIN_BATCH(n) rcommonBeginBatch(b_l_rmesa, n, __FILE__, __FUNCTION__, __LINE__) +#define BEGIN_BATCH(n) rcommonBeginBatch(b_l_rmesa, n, __FILE__, __func__, __LINE__) /** * Write one dword to the command buffer. @@ -49,7 +49,7 @@ void rcommonBeginBatch(radeonContextPtr rmesa, int __offset = (offset); \ if (0 && __offset) { \ fprintf(stderr, "(%s:%s:%d) offset : %d\n", \ - __FILE__, __FUNCTION__, __LINE__, __offset); \ + __FILE__, __func__, __LINE__, __offset); \ } \ radeon_cs_write_dword(b_l_rmesa->cmdbuf.cs, __offset); \ radeon_cs_write_reloc(b_l_rmesa->cmdbuf.cs, \ @@ -72,7 +72,7 @@ void rcommonBeginBatch(radeonContextPtr rmesa, */ #define END_BATCH() \ do { \ - radeon_cs_end(b_l_rmesa->cmdbuf.cs, __FILE__, __FUNCTION__, __LINE__);\ + radeon_cs_end(b_l_rmesa->cmdbuf.cs, __FILE__, __func__, __LINE__);\ } while(0) /** diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c index 3bcc7f2..0ca526d 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common.c +++ b/src/mesa/drivers/dri/radeon/radeon_common.c @@ -167,7 +167,7 @@ uint32_t radeonGetAge(radeonContextPtr radeon) ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_GETPARAM, &gp, sizeof(gp)); if (ret) { - fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, + fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __func__, ret); exit(1); } @@ -342,7 +342,7 @@ void radeon_draw_buffer(struct gl_context *ctx, struct gl_framebuffer *fb) void radeonDrawBuffer( struct gl_context *ctx, GLenum mode ) { if (RADEON_DEBUG & RADEON_DRI) - fprintf(stderr, "%s %s\n", __FUNCTION__, + fprintf(stderr, "%s %s\n", __func__, _mesa_lookup_enum_by_nr( mode )); if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) { @@ -531,7 +531,7 @@ static inline void radeonEmitAtoms(radeonContextPtr radeon, GLboolean emitAll) void radeonEmitState(radeonContextPtr radeon) { - radeon_print(RADEON_STATE, RADEON_NORMAL, "%s\n", __FUNCTION__); + radeon_print(RADEON_STATE, RADEON_NORMAL, "%s\n", __func__); if (radeon->vtbl.pre_emit_state) radeon->vtbl.pre_emit_state(radeon); @@ -562,7 +562,7 @@ void radeonFlush(struct gl_context *ctx) { radeonContextPtr radeon = RADEON_CONTEXT(ctx); if (RADEON_DEBUG & RADEON_IOCTL) - fprintf(stderr, "%s %d\n", __FUNCTION__, radeon->cmdbuf.cs->cdw); + fprintf(stderr, "%s %d\n", __func__, radeon->cmdbuf.cs->cdw); /* okay if we have no cmds in the buffer && we have no DMA flush && @@ -576,7 +576,7 @@ void radeonFlush(struct gl_context *ctx) radeon->dma.flush( ctx ); if (radeon->cmdbuf.cs->cdw) - rcommonFlushCmdBuf(radeon, __FUNCTION__); + rcommonFlushCmdBuf(radeon, __func__); flush_front: if (_mesa_is_winsys_fbo(ctx->DrawBuffer) && radeon->front_buffer_dirty) { @@ -635,7 +635,7 @@ int rcommonFlushCmdBufLocked(radeonContextPtr rmesa, const char *caller) rmesa->cmdbuf.flushing = 1; if (RADEON_DEBUG & RADEON_IOCTL) { - fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); + fprintf(stderr, "%s from %s\n", __func__, caller); } radeonEmitQueryEnd(&rmesa->glCtx); diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c index f28f380..9699dcb 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c @@ -236,7 +236,7 @@ void radeonDestroyContext(__DRIcontext *driContextPriv ) radeon_firevertices(radeon); if (!is_empty_list(&radeon->dma.reserved)) { - rcommonFlushCmdBuf( radeon, __FUNCTION__ ); + rcommonFlushCmdBuf( radeon, __func__ ); } radeonFreeDmaRegions(radeon); @@ -276,7 +276,7 @@ GLboolean radeonUnbindContext(__DRIcontext * driContextPriv) radeonContextPtr radeon = (radeonContextPtr) driContextPriv->driverPrivate; if (RADEON_DEBUG & RADEON_DRI) - fprintf(stderr, "%s ctx %p\n", __FUNCTION__, + fprintf(stderr, "%s ctx %p\n", __func__, &radeon->glCtx); /* Unset current context and dispath table */ @@ -578,7 +578,7 @@ GLboolean radeonMakeCurrent(__DRIcontext * driContextPriv, if (!driContextPriv) { if (RADEON_DEBUG & RADEON_DRI) - fprintf(stderr, "%s ctx is null\n", __FUNCTION__); + fprintf(stderr, "%s ctx is null\n", __func__); _mesa_make_current(NULL, NULL, NULL); return GL_TRUE; } @@ -602,7 +602,7 @@ GLboolean radeonMakeCurrent(__DRIcontext * driContextPriv, &(radeon_get_renderbuffer(drfb, BUFFER_DEPTH)->base.Base)); if (RADEON_DEBUG & RADEON_DRI) - fprintf(stderr, "%s ctx %p dfb %p rfb %p\n", __FUNCTION__, &radeon->glCtx, drfb, readfb); + fprintf(stderr, "%s ctx %p dfb %p rfb %p\n", __func__, &radeon->glCtx, drfb, readfb); if(driDrawPriv) driUpdateFramebufferSize(&radeon->glCtx, driDrawPriv); @@ -625,7 +625,7 @@ GLboolean radeonMakeCurrent(__DRIcontext * driContextPriv, if (RADEON_DEBUG & RADEON_DRI) - fprintf(stderr, "End %s\n", __FUNCTION__); + fprintf(stderr, "End %s\n", __func__); return GL_TRUE; } diff --git a/src/mesa/drivers/dri/radeon/radeon_debug.h b/src/mesa/drivers/dri/radeon/radeon_debug.h index b2436eb..df2f1ab 100644 --- a/src/mesa/drivers/dri/radeon/radeon_debug.h +++ b/src/mesa/drivers/dri/radeon/radeon_debug.h @@ -156,7 +156,7 @@ static inline void radeon_debug_remove_indent(void) if(__warn_once){ \ radeon_warning("*********************************WARN_ONCE*********************************\n"); \ radeon_warning("File %s function %s line %d\n", \ - __FILE__, __FUNCTION__, __LINE__); \ + __FILE__, __func__, __LINE__); \ radeon_warning(__VA_ARGS__);\ radeon_warning("***************************************************************************\n"); \ __warn_once=0;\ diff --git a/src/mesa/drivers/dri/radeon/radeon_dma.c b/src/mesa/drivers/dri/radeon/radeon_dma.c index 5b98eff..4c1c5d0 100644 --- a/src/mesa/drivers/dri/radeon/radeon_dma.c +++ b/src/mesa/drivers/dri/radeon/radeon_dma.c @@ -61,7 +61,7 @@ void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count) if (RADEON_DEBUG & RADEON_VERTS) fprintf(stderr, "%s count %d stride %d out %p data %p\n", - __FUNCTION__, count, stride, (void *)out, (void *)data); + __func__, count, stride, (void *)out, (void *)data); if (stride == 4) COPY_DWORDS(out, data, count); @@ -79,7 +79,7 @@ void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count) if (RADEON_DEBUG & RADEON_VERTS) fprintf(stderr, "%s count %d stride %d out %p data %p\n", - __FUNCTION__, count, stride, (void *)out, (void *)data); + __func__, count, stride, (void *)out, (void *)data); if (stride == 8) COPY_DWORDS(out, data, count * 2); @@ -98,7 +98,7 @@ void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count) if (RADEON_DEBUG & RADEON_VERTS) fprintf(stderr, "%s count %d stride %d out %p data %p\n", - __FUNCTION__, count, stride, (void *)out, (void *)data); + __func__, count, stride, (void *)out, (void *)data); if (stride == 12) { COPY_DWORDS(out, data, count * 3); @@ -119,7 +119,7 @@ void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count) if (RADEON_DEBUG & RADEON_VERTS) fprintf(stderr, "%s count %d stride %d out %p data %p\n", - __FUNCTION__, count, stride, (void *)out, (void *)data); + __func__, count, stride, (void *)out, (void *)data); if (stride == 16) COPY_DWORDS(out, data, count * 4); @@ -176,7 +176,7 @@ void rcommon_emit_vecfog(struct gl_context *ctx, struct radeon_aos *aos, if (RADEON_DEBUG & RADEON_VERTS) fprintf(stderr, "%s count %d stride %d\n", - __FUNCTION__, count, stride); + __func__, count, stride); if (stride == 0) { radeonAllocDmaRegion( rmesa, &aos->bo, &aos->offset, size * 4, 32 ); @@ -218,7 +218,7 @@ void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size) rmesa->dma.minimum_size = (size + 15) & (~15); radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %Zi\n", - __FUNCTION__, size, rmesa->dma.minimum_size); + __func__, size, rmesa->dma.minimum_size); if (is_empty_list(&rmesa->dma.free) || last_elem(&rmesa->dma.free)->bo->size < size) { @@ -231,7 +231,7 @@ again_alloc: RADEON_GEM_DOMAIN_GTT, 0); if (!dma_bo->bo) { - rcommonFlushCmdBuf(rmesa, __FUNCTION__); + rcommonFlushCmdBuf(rmesa, __func__); goto again_alloc; } insert_at_head(&rmesa->dma.reserved, dma_bo); @@ -267,7 +267,7 @@ void radeonAllocDmaRegion(radeonContextPtr rmesa, int bytes, int alignment) { if (RADEON_DEBUG & RADEON_IOCTL) - fprintf(stderr, "%s %d\n", __FUNCTION__, bytes); + fprintf(stderr, "%s %d\n", __func__, bytes); if (rmesa->dma.flush) rmesa->dma.flush(&rmesa->glCtx); @@ -297,7 +297,7 @@ void radeonFreeDmaRegions(radeonContextPtr rmesa) struct radeon_dma_bo *dma_bo; struct radeon_dma_bo *temp; if (RADEON_DEBUG & RADEON_DMA) - fprintf(stderr, "%s\n", __FUNCTION__); + fprintf(stderr, "%s\n", __func__); foreach_s(dma_bo, temp, &rmesa->dma.free) { remove_from_list(dma_bo); @@ -324,7 +324,7 @@ void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes) return; if (RADEON_DEBUG & RADEON_IOCTL) - fprintf(stderr, "%s %d\n", __FUNCTION__, return_bytes); + fprintf(stderr, "%s %d\n", __func__, return_bytes); rmesa->dma.current_used -= return_bytes; rmesa->dma.current_vertexptr = rmesa->dma.current_used; } @@ -361,7 +361,7 @@ void radeonReleaseDmaRegions(radeonContextPtr rmesa) ++reserved; fprintf(stderr, "%s: free %zu, wait %zu, reserved %zu, minimum_size: %zu\n", - __FUNCTION__, free, wait, reserved, rmesa->dma.minimum_size); + __func__, free, wait, reserved, rmesa->dma.minimum_size); } /* move waiting bos to free list. @@ -424,7 +424,7 @@ void rcommon_flush_last_swtcl_prim( struct gl_context *ctx ) struct radeon_dma *dma = &rmesa->dma; if (RADEON_DEBUG & RADEON_IOCTL) - fprintf(stderr, "%s\n", __FUNCTION__); + fprintf(stderr, "%s\n", __func__); dma->flush = NULL; radeon_bo_unmap(rmesa->swtcl.bo); @@ -454,7 +454,7 @@ rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize ) GLuint bytes = vsize * nverts; void *head; if (RADEON_DEBUG & RADEON_IOCTL) - fprintf(stderr, "%s\n", __FUNCTION__); + fprintf(stderr, "%s\n", __func__); if(is_empty_list(&rmesa->dma.reserved) ||rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) { @@ -496,7 +496,7 @@ void radeonReleaseArrays( struct gl_context *ctx, GLuint newinputs ) radeonContextPtr radeon = RADEON_CONTEXT( ctx ); int i; if (RADEON_DEBUG & RADEON_IOCTL) - fprintf(stderr, "%s\n", __FUNCTION__); + fprintf(stderr, "%s\n", __func__); if (radeon->dma.flush) { radeon->dma.flush(&radeon->glCtx); diff --git a/src/mesa/drivers/dri/radeon/radeon_fbo.c b/src/mesa/drivers/dri/radeon/radeon_fbo.c index 110b030..97022f9 100644 --- a/src/mesa/drivers/dri/radeon/radeon_fbo.c +++ b/src/mesa/drivers/dri/radeon/radeon_fbo.c @@ -856,7 +856,7 @@ radeon_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb) fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED; radeon_print(RADEON_TEXTURE, RADEON_TRACE, "%s: HW doesn't support format %s as output format of attachment %d\n", - __FUNCTION__, _mesa_get_format_name(mesa_format), i); + __func__, _mesa_get_format_name(mesa_format), i); return; } } diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.c b/src/mesa/drivers/dri/radeon/radeon_ioctl.c index 3b853e3..58fb9f1 100644 --- a/src/mesa/drivers/dri/radeon/radeon_ioctl.c +++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.c @@ -173,7 +173,7 @@ void radeonFlushElts( struct gl_context *ctx ) int dwords = (rmesa->radeon.cmdbuf.cs->section_ndw - rmesa->radeon.cmdbuf.cs->section_cdw); if (RADEON_DEBUG & RADEON_IOCTL) - fprintf(stderr, "%s\n", __FUNCTION__); + fprintf(stderr, "%s\n", __func__); assert( rmesa->radeon.dma.flush == radeonFlushElts ); rmesa->radeon.dma.flush = NULL; @@ -205,7 +205,7 @@ void radeonFlushElts( struct gl_context *ctx ) END_BATCH(); if (RADEON_DEBUG & RADEON_SYNC) { - fprintf(stderr, "%s: Syncing\n", __FUNCTION__); + fprintf(stderr, "%s: Syncing\n", __func__); radeonFinish( &rmesa->radeon.glCtx ); } @@ -221,7 +221,7 @@ GLushort *radeonAllocEltsOpenEnded( r100ContextPtr rmesa, BATCH_LOCALS(&rmesa->radeon); if (RADEON_DEBUG & RADEON_IOCTL) - fprintf(stderr, "%s %d prim %x\n", __FUNCTION__, min_nr, primitive); + fprintf(stderr, "%s %d prim %x\n", __func__, min_nr, primitive); assert((primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND)); @@ -262,7 +262,7 @@ GLushort *radeonAllocEltsOpenEnded( r100ContextPtr rmesa, if (RADEON_DEBUG & RADEON_RENDER) fprintf(stderr, "%s: header prim %x \n", - __FUNCTION__, primitive); + __func__, primitive); assert(!rmesa->radeon.dma.flush); rmesa->radeon.glCtx.Driver.NeedFlush |= FLUSH_STORED_VERTICES; @@ -284,7 +284,7 @@ void radeonEmitVertexAOS( r100ContextPtr rmesa, if (RADEON_DEBUG & (RADEON_PRIMS|RADEON_IOCTL)) fprintf(stderr, "%s: vertex_size 0x%x offset 0x%x \n", - __FUNCTION__, vertex_size, offset); + __func__, vertex_size, offset); BEGIN_BATCH(7); OUT_BATCH_PACKET3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, 2); @@ -315,7 +315,7 @@ void radeonEmitAOS( r100ContextPtr rmesa, int i; if (RADEON_DEBUG & RADEON_IOCTL) - fprintf(stderr, "%s\n", __FUNCTION__); + fprintf(stderr, "%s\n", __func__); BEGIN_BATCH(sz+2+(nr * 2)); OUT_BATCH_PACKET3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, sz - 1); @@ -399,7 +399,7 @@ static void radeonClear( struct gl_context *ctx, GLbitfield mask ) if ( swmask ) { if (RADEON_DEBUG & RADEON_FALLBACKS) - fprintf(stderr, "%s: swrast clear, mask: %x\n", __FUNCTION__, swmask); + fprintf(stderr, "%s: swrast clear, mask: %x\n", __func__, swmask); _swrast_Clear( ctx, swmask ); } diff --git a/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c b/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c index 6104a21..b6520c3 100644 --- a/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c +++ b/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c @@ -53,7 +53,7 @@ static void emit_s0_vec(uint32_t *out, GLvoid *data, int stride, int count) int i; if (RADEON_DEBUG & RADEON_VERTS) fprintf(stderr, "%s count %d stride %d\n", - __FUNCTION__, count, stride); + __func__, count, stride); for (i = 0; i < count; i++) { out[0] = *(int *)data; @@ -69,7 +69,7 @@ static void emit_stq_vec(uint32_t *out, GLvoid *data, int stride, int count) if (RADEON_DEBUG & RADEON_VERTS) fprintf(stderr, "%s count %d stride %d\n", - __FUNCTION__, count, stride); + __func__, count, stride); for (i = 0; i < count; i++) { out[0] = *(int *)data; @@ -88,7 +88,7 @@ static void emit_tex_vector(struct gl_context *ctx, struct radeon_aos *aos, uint32_t *out; if (RADEON_DEBUG & RADEON_VERTS) - fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size); + fprintf(stderr, "%s %d/%d\n", __func__, count, size); switch (size) { case 4: emitsize = 3; break; @@ -152,7 +152,7 @@ void radeonEmitArrays( struct gl_context *ctx, GLuint inputs ) #if 0 if (RADEON_DEBUG & RADEON_VERTS) - _tnl_print_vert_flags( __FUNCTION__, inputs ); + _tnl_print_vert_flags( __func__, inputs ); #endif if (1) { diff --git a/src/mesa/drivers/dri/radeon/radeon_maos_vbtmp.h b/src/mesa/drivers/dri/radeon/radeon_maos_vbtmp.h index b89f187..c4ffef1 100644 --- a/src/mesa/drivers/dri/radeon/radeon_maos_vbtmp.h +++ b/src/mesa/drivers/dri/radeon/radeon_maos_vbtmp.h @@ -54,7 +54,7 @@ static void TAG(emit)( struct gl_context *ctx, union emit_union *v = (union emit_union *)dest; - radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, "%s\n", __FUNCTION__); + radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, "%s\n", __func__); coord = (GLuint (*)[4])VB->AttribPtr[_TNL_ATTRIB_POS]->data; coord_stride = VB->AttribPtr[_TNL_ATTRIB_POS]->stride; diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c index 363f9e0..28591ca 100644 --- a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c +++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c @@ -522,7 +522,7 @@ int radeon_validate_texture_miptree(struct gl_context * ctx, radeon_print(RADEON_TEXTURE, RADEON_NORMAL, "%s: Validating texture %p now, minLod = %d, maxLod = %d\n", - __FUNCTION__, texObj ,t->minLod, t->maxLod); + __func__, texObj ,t->minLod, t->maxLod); dst_miptree = get_biggest_matching_miptree(t, t->base.BaseLevel, t->base._MaxLevel); @@ -531,12 +531,12 @@ int radeon_validate_texture_miptree(struct gl_context * ctx, radeon_try_alloc_miptree(rmesa, t); radeon_print(RADEON_TEXTURE, RADEON_NORMAL, "%s: No matching miptree found, allocated new one %p\n", - __FUNCTION__, t->mt); + __func__, t->mt); } else { radeon_miptree_reference(dst_miptree, &t->mt); radeon_print(RADEON_TEXTURE, RADEON_NORMAL, - "%s: Using miptree %p\n", __FUNCTION__, t->mt); + "%s: Using miptree %p\n", __func__, t->mt); } const unsigned faces = _mesa_num_tex_faces(texObj->Target); diff --git a/src/mesa/drivers/dri/radeon/radeon_queryobj.c b/src/mesa/drivers/dri/radeon/radeon_queryobj.c index fe4aa4a..c5fbc60 100644 --- a/src/mesa/drivers/dri/radeon/radeon_queryobj.c +++ b/src/mesa/drivers/dri/radeon/radeon_queryobj.c @@ -41,7 +41,7 @@ static void radeonQueryGetResult(struct gl_context *ctx, struct gl_query_object radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s: query id %d, result %d\n", - __FUNCTION__, query->Base.Id, (int) query->Base.Result); + __func__, query->Base.Id, (int) query->Base.Result); radeon_bo_map(query->bo, GL_FALSE); result = query->bo->ptr; @@ -66,7 +66,7 @@ static struct gl_query_object * radeonNewQueryObject(struct gl_context *ctx, GLu query->Base.Active = GL_FALSE; query->Base.Ready = GL_TRUE; - radeon_print(RADEON_STATE, RADEON_VERBOSE,"%s: query id %d\n", __FUNCTION__, query->Base.Id); + radeon_print(RADEON_STATE, RADEON_VERBOSE,"%s: query id %d\n", __func__, query->Base.Id); return &query->Base; } @@ -75,7 +75,7 @@ static void radeonDeleteQuery(struct gl_context *ctx, struct gl_query_object *q) { struct radeon_query_object *query = (struct radeon_query_object *)q; - radeon_print(RADEON_STATE, RADEON_NORMAL, "%s: query id %d\n", __FUNCTION__, q->Id); + radeon_print(RADEON_STATE, RADEON_NORMAL, "%s: query id %d\n", __func__, q->Id); if (query->bo) { radeon_bo_unref(query->bo); @@ -93,7 +93,7 @@ static void radeonWaitQuery(struct gl_context *ctx, struct gl_query_object *q) if (radeon_bo_is_referenced_by_cs(query->bo, radeon->cmdbuf.cs)) ctx->Driver.Flush(ctx); - radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s: query id %d, bo %p, offset %d\n", __FUNCTION__, q->Id, query->bo, query->curr_offset); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s: query id %d, bo %p, offset %d\n", __func__, q->Id, query->bo, query->curr_offset); radeonQueryGetResult(ctx, q); @@ -106,7 +106,7 @@ static void radeonBeginQuery(struct gl_context *ctx, struct gl_query_object *q) radeonContextPtr radeon = RADEON_CONTEXT(ctx); struct radeon_query_object *query = (struct radeon_query_object *)q; - radeon_print(RADEON_STATE, RADEON_NORMAL, "%s: query id %d\n", __FUNCTION__, q->Id); + radeon_print(RADEON_STATE, RADEON_NORMAL, "%s: query id %d\n", __func__, q->Id); assert(radeon->query.current == NULL); @@ -135,7 +135,7 @@ void radeonEmitQueryEnd(struct gl_context *ctx) if (query->emitted_begin == GL_FALSE) return; - radeon_print(RADEON_STATE, RADEON_NORMAL, "%s: query id %d, bo %p, offset %d\n", __FUNCTION__, query->Base.Id, query->bo, query->curr_offset); + radeon_print(RADEON_STATE, RADEON_NORMAL, "%s: query id %d, bo %p, offset %d\n", __func__, query->Base.Id, query->bo, query->curr_offset); radeon_cs_space_check_with_bo(radeon->cmdbuf.cs, query->bo, @@ -148,7 +148,7 @@ static void radeonEndQuery(struct gl_context *ctx, struct gl_query_object *q) { radeonContextPtr radeon = RADEON_CONTEXT(ctx); - radeon_print(RADEON_STATE, RADEON_NORMAL, "%s: query id %d\n", __FUNCTION__, q->Id); + radeon_print(RADEON_STATE, RADEON_NORMAL, "%s: query id %d\n", __func__, q->Id); if (radeon->dma.flush) radeon->dma.flush(&radeon->glCtx); @@ -159,7 +159,7 @@ static void radeonEndQuery(struct gl_context *ctx, struct gl_query_object *q) static void radeonCheckQuery(struct gl_context *ctx, struct gl_query_object *q) { - radeon_print(RADEON_STATE, RADEON_TRACE, "%s: query id %d\n", __FUNCTION__, q->Id); + radeon_print(RADEON_STATE, RADEON_TRACE, "%s: query id %d\n", __func__, q->Id); \ #ifdef DRM_RADEON_GEM_BUSY radeonContextPtr radeon = RADEON_CONTEXT(ctx); diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 044e212..45d9b2b 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -571,7 +571,7 @@ radeonCreateScreen2(__DRIscreen *sPriv) /* Allocate the private area */ screen = calloc(1, sizeof(*screen)); if ( !screen ) { - fprintf(stderr, "%s: Could not allocate memory for screen structure", __FUNCTION__); + fprintf(stderr, "%s: Could not allocate memory for screen structure", __func__); fprintf(stderr, "leaving here\n"); return NULL; } diff --git a/src/mesa/drivers/dri/radeon/radeon_state.c b/src/mesa/drivers/dri/radeon/radeon_state.c index 66a50a9..c45bb51 100644 --- a/src/mesa/drivers/dri/radeon/radeon_state.c +++ b/src/mesa/drivers/dri/radeon/radeon_state.c @@ -683,7 +683,7 @@ static void update_light_colors( struct gl_context *ctx, GLuint p ) { struct gl_light *l = &ctx->Light.Light[p]; -/* fprintf(stderr, "%s\n", __FUNCTION__); */ +/* fprintf(stderr, "%s\n", __func__); */ if (l->Enabled) { r100ContextPtr rmesa = R100_CONTEXT(ctx); @@ -799,7 +799,7 @@ void radeonUpdateMaterial( struct gl_context *ctx ) mask &= ~ctx->Light._ColorMaterialBitmask; if (RADEON_DEBUG & RADEON_STATE) - fprintf(stderr, "%s\n", __FUNCTION__); + fprintf(stderr, "%s\n", __func__); if (mask & MAT_BIT_FRONT_EMISSION) { @@ -1451,7 +1451,7 @@ static void radeonEnable( struct gl_context *ctx, GLenum cap, GLboolean state ) GLuint p, flag; if ( RADEON_DEBUG & RADEON_STATE ) - fprintf( stderr, "%s( %s = %s )\n", __FUNCTION__, + fprintf( stderr, "%s( %s = %s )\n", __func__, _mesa_lookup_enum_by_nr( cap ), state ? "GL_TRUE" : "GL_FALSE" ); @@ -1745,7 +1745,7 @@ static void radeonLightingSpaceChange( struct gl_context *ctx ) RADEON_STATECHANGE( rmesa, tcl ); if (RADEON_DEBUG & RADEON_STATE) - fprintf(stderr, "%s %d BEFORE %x\n", __FUNCTION__, ctx->_NeedEyeCoords, + fprintf(stderr, "%s %d BEFORE %x\n", __func__, ctx->_NeedEyeCoords, rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]); if (ctx->_NeedEyeCoords) @@ -1760,7 +1760,7 @@ static void radeonLightingSpaceChange( struct gl_context *ctx ) } if (RADEON_DEBUG & RADEON_STATE) - fprintf(stderr, "%s %d AFTER %x\n", __FUNCTION__, ctx->_NeedEyeCoords, + fprintf(stderr, "%s %d AFTER %x\n", __func__, ctx->_NeedEyeCoords, rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]); } @@ -2081,7 +2081,7 @@ static void radeonWrapRunPipeline( struct gl_context *ctx ) GLboolean has_material; if (0) - fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->radeon.NewGLState); + fprintf(stderr, "%s, newstate: %x\n", __func__, rmesa->radeon.NewGLState); /* Validate state: */ diff --git a/src/mesa/drivers/dri/radeon/radeon_state.h b/src/mesa/drivers/dri/radeon/radeon_state.h index 1624576..2ff5e42 100644 --- a/src/mesa/drivers/dri/radeon/radeon_state.h +++ b/src/mesa/drivers/dri/radeon/radeon_state.h @@ -56,7 +56,7 @@ extern GLboolean radeonValidateState( struct gl_context *ctx ); extern void radeonFallback( struct gl_context *ctx, GLuint bit, GLboolean mode ); #define FALLBACK( rmesa, bit, mode ) do { \ if ( 0 ) fprintf( stderr, "FALLBACK in %s: #%d=%d\n", \ - __FUNCTION__, bit, mode ); \ + __func__, bit, mode ); \ radeonFallback( &rmesa->radeon.glCtx, bit, mode ); \ } while (0) diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.c b/src/mesa/drivers/dri/radeon/radeon_swtcl.c index 9848797..8a1fbab 100644 --- a/src/mesa/drivers/dri/radeon/radeon_swtcl.c +++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c @@ -227,7 +227,7 @@ static void radeonSetVertexFormat( struct gl_context *ctx ) rmesa->radeon.swtcl.vertex_size /= 4; rmesa->radeon.tnl_index_bitset = index_bitset; radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, - "%s: vertex_size= %d floats\n", __FUNCTION__, rmesa->radeon.swtcl.vertex_size); + "%s: vertex_size= %d floats\n", __func__, rmesa->radeon.swtcl.vertex_size); } } @@ -243,7 +243,7 @@ static void radeon_predict_emit_size( r100ContextPtr rmesa ) if (rcommonEnsureCmdBufSpace(&rmesa->radeon, state_size + (scissor_size + prims_size + vertex_size), - __FUNCTION__)) + __func__)) rmesa->radeon.swtcl.emit_prediction = radeonCountStateEmitSize( &rmesa->radeon ); else rmesa->radeon.swtcl.emit_prediction = state_size; diff --git a/src/mesa/drivers/dri/radeon/radeon_tcl.c b/src/mesa/drivers/dri/radeon/radeon_tcl.c index 271d5f9..1d07459 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tcl.c +++ b/src/mesa/drivers/dri/radeon/radeon_tcl.c @@ -339,7 +339,7 @@ static GLuint radeonEnsureEmitSize( struct gl_context * ctx , GLuint inputs ) space_required += SCISSOR_BUFSZ; } /* flush the buffer in case we need more than is left. */ - if (rcommonEnsureCmdBufSpace(&rmesa->radeon, space_required, __FUNCTION__)) + if (rcommonEnsureCmdBufSpace(&rmesa->radeon, space_required, __func__)) return space_required + radeonCountStateEmitSize( &rmesa->radeon ); else return space_required + state_size; @@ -508,7 +508,7 @@ static void transition_to_hwtnl( struct gl_context *ctx ) // if (rmesa->swtcl.indexed_verts.buf) // radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, - // __FUNCTION__ ); + // __func__ ); if (RADEON_DEBUG & RADEON_FALLBACKS) fprintf(stderr, "Radeon end tcl fallback\n"); diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.c b/src/mesa/drivers/dri/radeon/radeon_tex.c index 6e8ab56..353fdb0 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tex.c +++ b/src/mesa/drivers/dri/radeon/radeon_tex.c @@ -95,7 +95,7 @@ static void radeonSetTexWrap( radeonTexObjPtr t, GLenum swrap, GLenum twrap ) is_clamp_to_border = GL_TRUE; break; default: - _mesa_problem(NULL, "bad S wrap mode in %s", __FUNCTION__); + _mesa_problem(NULL, "bad S wrap mode in %s", __func__); } if (t->base.Target != GL_TEXTURE_1D) { @@ -129,7 +129,7 @@ static void radeonSetTexWrap( radeonTexObjPtr t, GLenum swrap, GLenum twrap ) is_clamp_to_border = GL_TRUE; break; default: - _mesa_problem(NULL, "bad T wrap mode in %s", __FUNCTION__); + _mesa_problem(NULL, "bad T wrap mode in %s", __func__); } } @@ -263,7 +263,7 @@ static void radeonTexEnv( struct gl_context *ctx, GLenum target, if ( RADEON_DEBUG & RADEON_STATE ) { fprintf( stderr, "%s( %s )\n", - __FUNCTION__, _mesa_lookup_enum_by_nr( pname ) ); + __func__, _mesa_lookup_enum_by_nr( pname ) ); } switch ( pname ) { @@ -334,7 +334,7 @@ static void radeonTexParameter( struct gl_context *ctx, { radeonTexObj* t = radeon_tex_obj(texObj); - radeon_print(RADEON_TEXTURE, RADEON_VERBOSE, "%s( %s )\n", __FUNCTION__, + radeon_print(RADEON_TEXTURE, RADEON_VERBOSE, "%s( %s )\n", __func__, _mesa_lookup_enum_by_nr( pname ) ); switch ( pname ) { @@ -358,7 +358,7 @@ static void radeonDeleteTexture( struct gl_context *ctx, int i; radeon_print(RADEON_TEXTURE, RADEON_NORMAL, - "%s( %p (target = %s) )\n", __FUNCTION__, (void *)texObj, + "%s( %p (target = %s) )\n", __func__, (void *)texObj, _mesa_lookup_enum_by_nr( texObj->Target ) ); if ( rmesa ) { diff --git a/src/mesa/drivers/dri/radeon/radeon_tex_copy.c b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c index e527304..93313c2 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tex_copy.c +++ b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c @@ -86,7 +86,7 @@ do_copy_texsubimage(struct gl_context *ctx, if (0) { fprintf(stderr, "%s: copying to face %d, level %d\n", - __FUNCTION__, face, level); + __func__, face, level); fprintf(stderr, "to: x %d, y %d, offset %d\n", dstx, dsty, (uint32_t) dst_offset); fprintf(stderr, "from (%dx%d) width %d, height %d, offset %d, pitch %d\n", x, y, rrb->base.Base.Width, rrb->base.Base.Height, (uint32_t) src_offset, rrb->pitch/rrb->cpp); diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c index 0439f6d..45667ef 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texstate.c +++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c @@ -258,7 +258,7 @@ static GLboolean radeonUpdateTextureEnv( struct gl_context *ctx, int unit ) if ( RADEON_DEBUG & RADEON_TEXTURE ) { - fprintf( stderr, "%s( %p, %d )\n", __FUNCTION__, (void *)ctx, unit ); + fprintf( stderr, "%s( %p, %d )\n", __func__, (void *)ctx, unit ); } /* Set the texture environment state. Isn't this nice and clean? @@ -976,7 +976,7 @@ static GLboolean setup_hardware_state(r100ContextPtr rmesa, radeonTexObj *t, int t->pp_txfilter |= table[ firstImage->TexFormat ].filter; } else { _mesa_problem(NULL, "unexpected texture format in %s", - __FUNCTION__); + __func__); return GL_FALSE; } } diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c index 5bf6541..edfd48b 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texture.c +++ b/src/mesa/drivers/dri/radeon/radeon_texture.c @@ -492,7 +492,7 @@ static void teximage_assign_miptree(radeonContextPtr rmesa, radeon_print(RADEON_TEXTURE, RADEON_NORMAL, "%s: texObj %p, texImage %p, " "texObj miptree doesn't match, allocated new miptree %p\n", - __FUNCTION__, texObj, texImage, t->mt); + __func__, texObj, texImage, t->mt); } /* Miptree alocation may have failed, From mattst88 at kemper.freedesktop.org Tue Apr 21 17:04:07 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Tue, 21 Apr 2015 10:04:07 -0700 (PDT) Subject: Mesa (master): i965/fs: Ensure delta_x/ y are even-aligned registers on Gen6. Message-ID: <20150421170407.0E041761EA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 3b4abdae041802183fa7d3792a21bf9ca10df96e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3b4abdae041802183fa7d3792a21bf9ca10df96e Author: Matt Turner Date: Thu Apr 2 16:57:10 2015 -0700 i965/fs: Ensure delta_x/y are even-aligned registers on Gen6. The BSpec says this applies to Gen6 as well. Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 2 +- src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index b06a947..a732a89 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -428,7 +428,7 @@ fs_generator::generate_linterp(fs_inst *inst, if (brw->has_pln && delta_y.nr == delta_x.nr + 1 && - (brw->gen >= 6 || (delta_x.nr & 1) == 0)) { + (brw->gen >= 7 || (delta_x.nr & 1) == 0)) { brw_PLN(p, dst, interp, delta_x); } else { brw_LINE(p, brw_null_reg(), interp, delta_x); diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 2dfafdf..2a4054a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -245,9 +245,9 @@ brw_alloc_reg_set(struct intel_screen *screen, int reg_width) assert(reg == ra_reg_count); /* Add a special class for aligned pairs, which we'll put delta_x/y - * in on gen5 so that we can do PLN. + * in on Gen <= 6 so that we can do PLN. */ - if (devinfo->has_pln && reg_width == 1 && devinfo->gen < 6) { + if (devinfo->has_pln && reg_width == 1 && devinfo->gen <= 6) { aligned_pairs_class = ra_alloc_reg_class(regs); for (int i = 0; i < pairs_reg_count; i++) { From mattst88 at kemper.freedesktop.org Tue Apr 21 17:04:07 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Tue, 21 Apr 2015 10:04:07 -0700 (PDT) Subject: Mesa (master): i965: Replace guess_execution_size with something simpler. Message-ID: <20150421170407.1A968761EA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: dd5c8250537640f92dbc1ee63d516c6e3e2aaf77 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=dd5c8250537640f92dbc1ee63d516c6e3e2aaf77 Author: Matt Turner Date: Tue Apr 14 12:40:34 2015 -0700 i965: Replace guess_execution_size with something simpler. guess_execution_size() does two things: 1. Cope with small destination registers. 2. Cope with SIMD8 vs SIMD16 mode. This patch replaces the first with a simple if block in brw_set_dest: if the destination register width is less than 8, you probably want the execution size to match. (I didn't put this in the 3src block because it doesn't seem to matter.) Since only the FS compiler cares about SIMD16 mode, it's easy to just set the default execution size there. This pattern was already been proven in the Gen8+ generator, but we didn't port it back to the existing generator when we combined the two. This is based on a patch from Ken from about a year ago. I've rebased it and and fixed a few bugs. Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_eu.c | 7 ++++++ src/mesa/drivers/dri/i965/brw_eu.h | 1 + src/mesa/drivers/dri/i965/brw_eu_emit.c | 32 +++++++----------------- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 22 +++++++++++++--- 4 files changed, 35 insertions(+), 27 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c index 146202b..c21d14d 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.c +++ b/src/mesa/drivers/dri/i965/brw_eu.c @@ -110,6 +110,12 @@ brw_swap_cmod(uint32_t cmod) } } +void +brw_set_default_exec_size(struct brw_compile *p, unsigned value) +{ + brw_inst_set_exec_size(p->brw, p->current, value); +} + void brw_set_default_predicate_control( struct brw_compile *p, unsigned pc ) { brw_inst_set_pred_control(p->brw, p->current, pc); @@ -228,6 +234,7 @@ brw_init_compile(struct brw_context *brw, struct brw_compile *p, void *mem_ctx) /* Some defaults? */ + brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_set_default_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */ brw_set_default_saturate(p, 0); brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index f8fd155..31c1492 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -99,6 +99,7 @@ struct brw_compile { void brw_pop_insn_state( struct brw_compile *p ); void brw_push_insn_state( struct brw_compile *p ); +void brw_set_default_exec_size(struct brw_compile *p, unsigned value); void brw_set_default_mask_control( struct brw_compile *p, unsigned value ); void brw_set_default_saturate( struct brw_compile *p, bool enable ); void brw_set_default_access_mode( struct brw_compile *p, unsigned access_mode ); diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 1fe9e7b..706b66b 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -36,25 +36,6 @@ #include "util/ralloc.h" -/*********************************************************************** - * Internal helper for constructing instructions - */ - -static void guess_execution_size(struct brw_compile *p, - brw_inst *insn, - struct brw_reg reg) -{ - const struct brw_context *brw = p->brw; - - if (reg.width == BRW_WIDTH_8 && p->compressed) { - brw_inst_set_exec_size(brw, insn, BRW_EXECUTE_16); - } else { - /* Register width definitions are compatible with BRW_EXECUTE_* enums. */ - brw_inst_set_exec_size(brw, insn, reg.width); - } -} - - /** * Prior to Sandybridge, the SEND instruction accepted non-MRF source * registers, implicitly moving the operand to a message register. @@ -76,6 +57,7 @@ gen6_resolve_implied_move(struct brw_compile *p, if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) { brw_push_insn_state(p); + brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD), @@ -215,10 +197,12 @@ brw_set_dest(struct brw_compile *p, brw_inst *inst, struct brw_reg dest) } } - /* NEW: Set the execution size based on dest.width and - * inst->compression_control: + /* Generators should set a default exec_size of either 8 (SIMD4x2 or SIMD8) + * or 16 (SIMD16), as that's normally correct. However, when dealing with + * small registers, we automatically reduce it to match the register size. */ - guess_execution_size(p, inst, dest); + if (dest.width < BRW_EXECUTE_8) + brw_inst_set_exec_size(brw, inst, dest.width); } extern int reg_type_size[]; @@ -874,7 +858,6 @@ brw_alu3(struct brw_compile *p, unsigned opcode, struct brw_reg dest, brw_inst_set_3src_dst_reg_nr(brw, inst, dest.nr); brw_inst_set_3src_dst_subreg_nr(brw, inst, dest.subnr / 16); brw_inst_set_3src_dst_writemask(brw, inst, dest.dw1.bits.writemask); - guess_execution_size(p, inst, dest); assert(src0.file == BRW_GENERAL_REGISTER_FILE); assert(src0.address_mode == BRW_ADDRESS_DIRECT); @@ -2015,6 +1998,7 @@ void brw_oword_block_write_scratch(struct brw_compile *p, */ { brw_push_insn_state(p); + brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); @@ -2135,6 +2119,7 @@ brw_oword_block_read_scratch(struct brw_compile *p, { brw_push_insn_state(p); + brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); brw_set_default_mask_control(p, BRW_MASK_DISABLE); @@ -2228,6 +2213,7 @@ void brw_oword_block_read(struct brw_compile *p, mrf = retype(mrf, BRW_REGISTER_TYPE_UD); brw_push_insn_state(p); + brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); brw_set_default_mask_control(p, BRW_MASK_DISABLE); diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index a732a89..ba2a12f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -207,6 +207,7 @@ fs_generator::fire_fb_write(fs_inst *inst, if (brw->gen < 6) { brw_push_insn_state(p); + brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); @@ -287,11 +288,13 @@ fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload) } if (brw->gen >= 6) { + brw_push_insn_state(p); + brw_set_default_exec_size(p, BRW_EXECUTE_16); brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED); brw_MOV(p, retype(payload, BRW_REGISTER_TYPE_UD), retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); - brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); + brw_pop_insn_state(p); if (inst->target > 0 && key->replicate_alpha) { /* Set "Source0 Alpha Present to RenderTarget" bit in message @@ -448,12 +451,14 @@ fs_generator::generate_math_gen6(fs_inst *inst, if (dispatch_width == 8) { gen6_math(p, dst, op, src0, src1); } else if (dispatch_width == 16) { + brw_push_insn_state(p); + brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); gen6_math(p, firsthalf(dst), op, firsthalf(src0), firsthalf(src1)); brw_set_default_compression_control(p, BRW_COMPRESSION_2NDHALF); gen6_math(p, sechalf(dst), op, sechalf(src0), binop ? sechalf(src1) : brw_null_reg()); - brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED); + brw_pop_insn_state(p); } } @@ -472,6 +477,7 @@ fs_generator::generate_math_gen4(fs_inst *inst, inst->base_mrf, src, BRW_MATH_PRECISION_FULL); } else if (dispatch_width == 16) { + brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); gen4_math(p, firsthalf(dst), op, @@ -718,6 +724,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src } brw_push_insn_state(p); + brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); /* Explicitly set up the message header by copying g0 to the MRF. */ @@ -1333,6 +1340,7 @@ fs_generator::generate_set_simd4x2_offset(fs_inst *inst, assert(value.file == BRW_IMMEDIATE_VALUE); brw_push_insn_state(p); + brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_MOV(p, retype(brw_vec1_reg(dst.file, dst.nr, 0), value.type), value); @@ -1358,8 +1366,6 @@ fs_generator::generate_set_omask(fs_inst *inst, assert(stride_8_8_1 || stride_0_1_0); assert(dst.type == BRW_REGISTER_TYPE_UW); - if (dispatch_width == 16) - dst = vec16(dst); brw_push_insn_state(p); brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); brw_set_default_mask_control(p, BRW_MASK_DISABLE); @@ -1387,6 +1393,7 @@ fs_generator::generate_set_sample_id(fs_inst *inst, src0.type == BRW_REGISTER_TYPE_UD); brw_push_insn_state(p); + brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); brw_set_default_mask_control(p, BRW_MASK_DISABLE); struct brw_reg reg = retype(stride(src1, 1, 4, 0), BRW_REGISTER_TYPE_UW); @@ -1606,6 +1613,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) brw_set_default_saturate(p, inst->saturate); brw_set_default_mask_control(p, inst->force_writemask_all); brw_set_default_acc_write_control(p, inst->writes_accumulator); + brw_set_default_exec_size(p, cvt(inst->exec_size) - 1); switch (inst->exec_size) { case 1: @@ -1653,6 +1661,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) assert(brw->gen >= 6); brw_set_default_access_mode(p, BRW_ALIGN_16); if (dispatch_width == 16 && !brw_supports_simd16_3src(brw)) { + brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); brw_inst *f = brw_MAD(p, firsthalf(dst), firsthalf(src[0]), firsthalf(src[1]), firsthalf(src[2])); brw_set_default_compression_control(p, BRW_COMPRESSION_2NDHALF); @@ -1674,6 +1683,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) assert(brw->gen >= 6); brw_set_default_access_mode(p, BRW_ALIGN_16); if (dispatch_width == 16 && !brw_supports_simd16_3src(brw)) { + brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); brw_inst *f = brw_LRP(p, firsthalf(dst), firsthalf(src[0]), firsthalf(src[1]), firsthalf(src[2])); brw_set_default_compression_control(p, BRW_COMPRESSION_2NDHALF); @@ -1747,6 +1757,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) */ if (dispatch_width == 16 && brw->gen == 7 && !brw->is_haswell) { if (dst.file == BRW_GENERAL_REGISTER_FILE) { + brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); brw_CMP(p, firsthalf(dst), inst->conditional_mod, firsthalf(src[0]), firsthalf(src[1])); @@ -1811,6 +1822,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) assert(brw->gen >= 7); brw_set_default_access_mode(p, BRW_ALIGN_16); if (dispatch_width == 16 && !brw_supports_simd16_3src(brw)) { + brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); brw_BFE(p, firsthalf(dst), firsthalf(src[0]), firsthalf(src[1]), firsthalf(src[2])); brw_set_default_compression_control(p, BRW_COMPRESSION_2NDHALF); @@ -1830,6 +1842,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) * "Force BFI instructions to be executed always in SIMD8." */ if (dispatch_width == 16 && brw->is_haswell) { + brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); brw_BFI1(p, firsthalf(dst), firsthalf(src[0]), firsthalf(src[1])); brw_set_default_compression_control(p, BRW_COMPRESSION_2NDHALF); @@ -1852,6 +1865,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) */ if (dispatch_width == 16 && (brw->is_haswell || !brw_supports_simd16_3src(brw))) { + brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); brw_BFI2(p, firsthalf(dst), firsthalf(src[0]), firsthalf(src[1]), firsthalf(src[2])); brw_set_default_compression_control(p, BRW_COMPRESSION_2NDHALF); From mattst88 at kemper.freedesktop.org Tue Apr 21 17:04:07 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Tue, 21 Apr 2015 10:04:07 -0700 (PDT) Subject: Mesa (master): i965: Make type_sz() return unsigned. Message-ID: <20150421170407.245A3761EA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 45a13486126fdf0cbb68b7a888cff642c32c1d12 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=45a13486126fdf0cbb68b7a888cff642c32c1d12 Author: Matt Turner Date: Sat Apr 11 14:49:50 2015 -0700 i965: Make type_sz() return unsigned. Avoids annoying warnings when comparing with sizeof(...). Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_reg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h index 924b059..3a50e86 100644 --- a/src/mesa/drivers/dri/i965/brw_reg.h +++ b/src/mesa/drivers/dri/i965/brw_reg.h @@ -265,7 +265,7 @@ struct brw_indirect { }; -static inline int +static inline unsigned type_sz(unsigned type) { switch(type) { From mattst88 at kemper.freedesktop.org Tue Apr 21 17:04:07 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Tue, 21 Apr 2015 10:04:07 -0700 (PDT) Subject: Mesa (master): i965/fs: Add LINTERP's src0 to fs_inst::regs_read(). Message-ID: <20150421170407.47B71761EA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: a1dd2f0bb6f9bf61d4a40d033740140b86c060e0 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a1dd2f0bb6f9bf61d4a40d033740140b86c060e0 Author: Matt Turner Date: Sat Apr 11 15:00:19 2015 -0700 i965/fs: Add LINTERP's src0 to fs_inst::regs_read(). LINTERP's src0 is PLN's src1, and PLN's src1 reads exec_size / 4 registers. Having that information lets us drop the delta_x/y special case code in split_virtual_grfs(). Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_fs.cpp | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 7cc88ea..5cdc19c 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -953,6 +953,8 @@ fs_inst::regs_read(int arg) const return mlen; } else if (opcode == FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET && arg == 0) { return mlen; + } else if (opcode == FS_OPCODE_LINTERP && arg == 0) { + return exec_size / 4; } switch (src[arg].file) { @@ -1984,17 +1986,6 @@ fs_visitor::split_virtual_grfs() } } - if (brw->has_pln && - this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].file == GRF) { - /* PLN opcodes rely on the delta_xy being contiguous. We only have to - * check this for BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC, because prior to - * Gen6, that was the only supported interpolation mode, and since Gen6, - * delta_x and delta_y are in fixed hardware registers. - */ - int vgrf = this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].reg; - split_points[vgrf_to_reg[vgrf] + 1] = false; - } - foreach_block_and_inst(block, fs_inst, inst, cfg) { if (inst->dst.file == GRF) { int reg = vgrf_to_reg[inst->dst.reg] + inst->dst.reg_offset; From mattst88 at kemper.freedesktop.org Tue Apr 21 17:04:07 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Tue, 21 Apr 2015 10:04:07 -0700 (PDT) Subject: Mesa (master): i965/fs: Manually set source regioning on PLN instructions. Message-ID: <20150421170407.5514F761EA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: b14313e45295d91b5737775ec788c76d8f0c2f93 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b14313e45295d91b5737775ec788c76d8f0c2f93 Author: Matt Turner Date: Mon Apr 6 21:46:54 2015 -0700 i965/fs: Manually set source regioning on PLN instructions. Like LINE (commit 92346db0), src0 must have a scalar region. Setting src1's region to <8,8,1> lets us pass a properly sized combined delta_xy argument in a few commits without getting a bogus <16,16,1> region. Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_eu_emit.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 44b9ffb..b9faf36 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -1008,7 +1008,6 @@ ALU2(DP4) ALU2(DPH) ALU2(DP3) ALU2(DP2) -ALU2(PLN) ALU3F(MAD) ALU3F(LRP) ALU1(BFREV) @@ -1113,6 +1112,19 @@ brw_LINE(struct brw_compile *p, struct brw_reg dest, } brw_inst * +brw_PLN(struct brw_compile *p, struct brw_reg dest, + struct brw_reg src0, struct brw_reg src1) +{ + src0.vstride = BRW_VERTICAL_STRIDE_0; + src0.width = BRW_WIDTH_1; + src0.hstride = BRW_HORIZONTAL_STRIDE_0; + src1.vstride = BRW_VERTICAL_STRIDE_8; + src1.width = BRW_WIDTH_8; + src1.hstride = BRW_HORIZONTAL_STRIDE_1; + return brw_alu2(p, BRW_OPCODE_PLN, dest, src0, src1); +} + +brw_inst * brw_F32TO16(struct brw_compile *p, struct brw_reg dst, struct brw_reg src) { const struct brw_context *brw = p->brw; From mattst88 at kemper.freedesktop.org Tue Apr 21 17:04:07 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Tue, 21 Apr 2015 10:04:07 -0700 (PDT) Subject: Mesa (master): i965/fs: Emit ADDs for gl_FragCoord, not virtual opcodes. Message-ID: <20150421170407.65305761EA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: fde3100fe65a175f034c77e7989601839c9983bb URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=fde3100fe65a175f034c77e7989601839c9983bb Author: Matt Turner Date: Thu Apr 2 16:15:53 2015 -0700 i965/fs: Emit ADDs for gl_FragCoord, not virtual opcodes. These were used only on Gen4 and 5. emit_interpolation_setup_gen6() emits ADDs directly. The virtual opcodes weren't providing anything useful. I'm going to repurpose these opcodes, so deleting and readding them makes it simpler to see what's going on. Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_defines.h | 2 -- src/mesa/drivers/dri/i965/brw_fs.h | 1 - src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 40 ------------------------ src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 11 +++++-- src/mesa/drivers/dri/i965/brw_shader.cpp | 5 --- 5 files changed, 8 insertions(+), 51 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index a97a944..5962b00 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -923,8 +923,6 @@ enum opcode { */ FS_OPCODE_DDY_COARSE, FS_OPCODE_DDY_FINE, - FS_OPCODE_PIXEL_X, - FS_OPCODE_PIXEL_Y, FS_OPCODE_CINTERP, FS_OPCODE_LINTERP, FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 32063f0..d625d91 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -557,7 +557,6 @@ private: void generate_fb_write(fs_inst *inst, struct brw_reg payload); void generate_urb_write(fs_inst *inst, struct brw_reg payload); void generate_blorp_fb_write(fs_inst *inst); - void generate_pixel_xy(struct brw_reg dst, bool is_x); void generate_linterp(fs_inst *inst, struct brw_reg dst, struct brw_reg *src); void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src, diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 397d825..353f35a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -387,40 +387,6 @@ fs_generator::generate_blorp_fb_write(fs_inst *inst) inst->header_present); } -/* Computes the integer pixel x,y values from the origin. - * - * This is the basis of gl_FragCoord computation, but is also used - * pre-gen6 for computing the deltas from v0 for computing - * interpolation. - */ -void -fs_generator::generate_pixel_xy(struct brw_reg dst, bool is_x) -{ - struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); - struct brw_reg src; - struct brw_reg deltas; - - if (is_x) { - src = stride(suboffset(g1_uw, 4), 2, 4, 0); - deltas = brw_imm_v(0x10101010); - } else { - src = stride(suboffset(g1_uw, 5), 2, 4, 0); - deltas = brw_imm_v(0x11001100); - } - - if (dispatch_width == 16) { - dst = vec16(dst); - } - - /* We do this SIMD8 or SIMD16, but since the destination is UW we - * don't do compression in the SIMD16 case. - */ - brw_push_insn_state(p); - brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); - brw_ADD(p, dst, src, deltas); - brw_pop_insn_state(p); -} - void fs_generator::generate_linterp(fs_inst *inst, struct brw_reg dst, struct brw_reg *src) @@ -1949,12 +1915,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) generate_math_gen4(inst, dst, src[0]); } break; - case FS_OPCODE_PIXEL_X: - generate_pixel_xy(dst, true); - break; - case FS_OPCODE_PIXEL_Y: - generate_pixel_xy(dst, false); - break; case FS_OPCODE_CINTERP: brw_MOV(p, dst, src[0]); break; diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 4e99366..98c6988 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -3428,14 +3428,19 @@ fs_visitor::interp_reg(int location, int channel) void fs_visitor::emit_interpolation_setup_gen4() { + struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); + this->current_annotation = "compute pixel centers"; this->pixel_x = vgrf(glsl_type::uint_type); this->pixel_y = vgrf(glsl_type::uint_type); this->pixel_x.type = BRW_REGISTER_TYPE_UW; this->pixel_y.type = BRW_REGISTER_TYPE_UW; - - emit(FS_OPCODE_PIXEL_X, this->pixel_x); - emit(FS_OPCODE_PIXEL_Y, this->pixel_y); + emit(ADD(this->pixel_x, + fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), + fs_reg(brw_imm_v(0x10101010)))); + emit(ADD(this->pixel_y, + fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), + fs_reg(brw_imm_v(0x11001100)))); this->current_annotation = "compute pixel deltas from v0"; if (brw->has_pln) { diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 0d6ac0c..d0d5cf9 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -512,11 +512,6 @@ brw_instruction_name(enum opcode op) case FS_OPCODE_DDY_FINE: return "ddy_fine"; - case FS_OPCODE_PIXEL_X: - return "pixel_x"; - case FS_OPCODE_PIXEL_Y: - return "pixel_y"; - case FS_OPCODE_CINTERP: return "cinterp"; case FS_OPCODE_LINTERP: From mattst88 at kemper.freedesktop.org Tue Apr 21 17:04:07 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Tue, 21 Apr 2015 10:04:07 -0700 (PDT) Subject: Mesa (master): i965/fs: Calculate delta_x and delta_y together. Message-ID: <20150421170407.781A5761EA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 5af0604d528733af9113a6f8711c39796ce0ae40 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5af0604d528733af9113a6f8711c39796ce0ae40 Author: Matt Turner Date: Mon Apr 6 17:44:40 2015 -0700 i965/fs: Calculate delta_x and delta_y together. This lets SIMD16 programs on G45 and Gen5 use the PLN instruction. On Ironlake: total instructions in shared programs: 5634757 -> 5518055 (-2.07%) instructions in affected programs: 1745837 -> 1629135 (-6.68%) helped: 11439 HURT: 4 Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_fs.cpp | 46 ++++++------------- src/mesa/drivers/dri/i965/brw_fs.h | 3 +- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 25 ++++++++-- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 13 +++--- src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 8 ++-- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 51 ++++++++++----------- src/mesa/drivers/dri/i965/brw_reg.h | 7 +++ 7 files changed, 79 insertions(+), 74 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 5cdc19c..cf1c385 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1265,8 +1265,7 @@ fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer, emit(MOV(wpos, fs_reg(brw_vec8_grf(payload.source_depth_reg, 0)))); } else { emit(FS_OPCODE_LINTERP, wpos, - this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], - this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], + this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], interp_reg(VARYING_SLOT_POS, 2)); } wpos = offset(wpos, 1); @@ -1308,8 +1307,7 @@ fs_visitor::emit_linterp(const fs_reg &attr, const fs_reg &interp, barycoord_mode = BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC; } return emit(FS_OPCODE_LINTERP, attr, - this->delta_x[barycoord_mode], - this->delta_y[barycoord_mode], interp); + this->delta_xy[barycoord_mode], interp); } void @@ -1859,8 +1857,8 @@ fs_visitor::assign_urb_setup() */ foreach_block_and_inst(block, fs_inst, inst, cfg) { if (inst->opcode == FS_OPCODE_LINTERP) { - assert(inst->src[2].file == HW_REG); - inst->src[2].fixed_hw_reg.nr += urb_start; + assert(inst->src[1].file == HW_REG); + inst->src[1].fixed_hw_reg.nr += urb_start; } if (inst->opcode == FS_OPCODE_CINTERP) { @@ -2114,25 +2112,16 @@ fs_visitor::compact_virtual_grfs() } } - /* Patch all the references to delta_x/delta_y, since they're used in - * register allocation. If they're unused, switch them to BAD_FILE so - * we don't think some random VGRF is delta_x/delta_y. + /* Patch all the references to delta_xy, since they're used in register + * allocation. If they're unused, switch them to BAD_FILE so we don't + * think some random VGRF is delta_xy. */ - for (unsigned i = 0; i < ARRAY_SIZE(delta_x); i++) { - if (delta_x[i].file == GRF) { - if (remap_table[delta_x[i].reg] != -1) { - delta_x[i].reg = remap_table[delta_x[i].reg]; + for (unsigned i = 0; i < ARRAY_SIZE(delta_xy); i++) { + if (delta_xy[i].file == GRF) { + if (remap_table[delta_xy[i].reg] != -1) { + delta_xy[i].reg = remap_table[delta_xy[i].reg]; } else { - delta_x[i].file = BAD_FILE; - } - } - } - for (unsigned i = 0; i < ARRAY_SIZE(delta_y); i++) { - if (delta_y[i].file == GRF) { - if (remap_table[delta_y[i].reg] != -1) { - delta_y[i].reg = remap_table[delta_y[i].reg]; - } else { - delta_y[i].file = BAD_FILE; + delta_xy[i].file = BAD_FILE; } } } @@ -2685,14 +2674,9 @@ fs_visitor::opt_register_renaming() if (progress) { invalidate_live_intervals(); - for (unsigned i = 0; i < ARRAY_SIZE(delta_x); i++) { - if (delta_x[i].file == GRF && remap[delta_x[i].reg] != -1) { - delta_x[i].reg = remap[delta_x[i].reg]; - } - } - for (unsigned i = 0; i < ARRAY_SIZE(delta_y); i++) { - if (delta_y[i].file == GRF && remap[delta_y[i].reg] != -1) { - delta_y[i].reg = remap[delta_y[i].reg]; + for (unsigned i = 0; i < ARRAY_SIZE(delta_xy); i++) { + if (delta_xy[i].file == GRF && remap[delta_xy[i].reg] != -1) { + delta_xy[i].reg = remap[delta_xy[i].reg]; } } } diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index d625d91..24ca43c 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -514,8 +514,7 @@ public: fs_reg pixel_y; fs_reg wpos_w; fs_reg pixel_w; - fs_reg delta_x[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT]; - fs_reg delta_y[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT]; + fs_reg delta_xy[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT]; fs_reg shader_start_time; fs_reg userplane[MAX_CLIP_PLANES]; diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 353f35a..4955640 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -391,12 +391,31 @@ void fs_generator::generate_linterp(fs_inst *inst, struct brw_reg dst, struct brw_reg *src) { + /* PLN reads: + * / in SIMD16 \ + * ----------------------------------- + * | src1+0 | src1+1 | src1+2 | src1+3 | + * |-----------------------------------| + * |(x0, x1)|(y0, y1)|(x2, x3)|(y2, y3)| + * ----------------------------------- + * + * but for the LINE/MAC pair, the LINE reads Xs and the MAC reads Ys: + * + * ----------------------------------- + * | src1+0 | src1+1 | src1+2 | src1+3 | + * |-----------------------------------| + * |(x0, x1)|(y0, y1)| | | in SIMD8 + * |-----------------------------------| + * |(x0, x1)|(x2, x3)|(y0, y1)|(y2, y3)| in SIMD16 + * ----------------------------------- + * + * See also: emit_interpolation_setup_gen4(). + */ struct brw_reg delta_x = src[0]; - struct brw_reg delta_y = src[1]; - struct brw_reg interp = src[2]; + struct brw_reg delta_y = offset(src[0], dispatch_width / 8); + struct brw_reg interp = src[1]; if (brw->has_pln && - delta_y.nr == delta_x.nr + 1 && (brw->gen >= 7 || (delta_x.nr & 1) == 0)) { brw_PLN(p, dst, interp, delta_x); } else { diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 3972581..e1687ed 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -1482,8 +1482,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) */ no16("interpolate_at_* not yet supported in SIMD16 mode."); - fs_reg dst_x = vgrf(2); - fs_reg dst_y = offset(dst_x, 1); + fs_reg dst_xy = vgrf(2); /* For most messages, we need one reg of ignored data; the hardware * requires mlen==1 even when there is no payload. in the per-slot @@ -1495,7 +1494,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) switch (instr->intrinsic) { case nir_intrinsic_interp_var_at_centroid: - inst = emit(FS_OPCODE_INTERPOLATE_AT_CENTROID, dst_x, src, fs_reg(0u)); + inst = emit(FS_OPCODE_INTERPOLATE_AT_CENTROID, dst_xy, src, fs_reg(0u)); break; case nir_intrinsic_interp_var_at_sample: { @@ -1503,7 +1502,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]); assert(const_sample); unsigned msg_data = const_sample ? const_sample->i[0] << 4 : 0; - inst = emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_x, src, + inst = emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src, fs_reg(msg_data)); break; } @@ -1515,7 +1514,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) unsigned off_x = MIN2((int)(const_offset->f[0] * 16), 7) & 0xf; unsigned off_y = MIN2((int)(const_offset->f[1] * 16), 7) & 0xf; - inst = emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_x, src, + inst = emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_xy, src, fs_reg(off_x | (off_y << 4))); } else { src = vgrf(glsl_type::ivec2_type); @@ -1548,7 +1547,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) } mlen = 2; - inst = emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_x, src, + inst = emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_xy, src, fs_reg(0u)); } break; @@ -1567,7 +1566,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) fs_reg src = interp_reg(instr->variables[0]->var->data.location, j); src.type = dest.type; - emit(FS_OPCODE_LINTERP, dest, dst_x, dst_y, src); + emit(FS_OPCODE_LINTERP, dest, dst_xy, src); dest = offset(dest, 1); } break; diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 2a4054a..47f5a42 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -244,7 +244,7 @@ brw_alloc_reg_set(struct intel_screen *screen, int reg_width) } assert(reg == ra_reg_count); - /* Add a special class for aligned pairs, which we'll put delta_x/y + /* Add a special class for aligned pairs, which we'll put delta_xy * in on Gen <= 6 so that we can do PLN. */ if (devinfo->has_pln && reg_width == 1 && devinfo->gen <= 6) { @@ -558,14 +558,14 @@ fs_visitor::assign_regs(bool allow_spilling) * second operand of a PLN instruction needs to be an * even-numbered register, so we have a special register class * wm_aligned_pairs_class to handle this case. pre-GEN6 always - * uses this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] as the + * uses this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] as the * second operand of a PLN instruction (since it doesn't support * any other interpolation modes). So all we need to do is find * that register and set it to the appropriate class. */ if (screen->wm_reg_sets[rsi].aligned_pairs_class >= 0 && - this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].file == GRF && - this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].reg == i) { + this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].file == GRF && + this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].reg == i) { c = screen->wm_reg_sets[rsi].aligned_pairs_class; } diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 98c6988..7fdd4e5 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -593,8 +593,7 @@ fs_visitor::emit_interpolate_expression(ir_expression *ir) /* 1. collect interpolation factors */ - fs_reg dst_x = vgrf(glsl_type::get_instance(ir->type->base_type, 2, 1)); - fs_reg dst_y = offset(dst_x, 1); + fs_reg dst_xy = vgrf(glsl_type::get_instance(ir->type->base_type, 2, 1)); /* for most messages, we need one reg of ignored data; the hardware requires mlen==1 * even when there is no payload. in the per-slot offset case, we'll replace this with @@ -606,7 +605,7 @@ fs_visitor::emit_interpolate_expression(ir_expression *ir) switch (ir->operation) { case ir_unop_interpolate_at_centroid: - inst = emit(FS_OPCODE_INTERPOLATE_AT_CENTROID, dst_x, src, fs_reg(0u)); + inst = emit(FS_OPCODE_INTERPOLATE_AT_CENTROID, dst_xy, src, fs_reg(0u)); break; case ir_binop_interpolate_at_sample: { @@ -614,7 +613,7 @@ fs_visitor::emit_interpolate_expression(ir_expression *ir) assert(sample_num || !"nonconstant sample number should have been lowered."); unsigned msg_data = sample_num->value.i[0] << 4; - inst = emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_x, src, fs_reg(msg_data)); + inst = emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src, fs_reg(msg_data)); break; } @@ -623,7 +622,7 @@ fs_visitor::emit_interpolate_expression(ir_expression *ir) if (const_offset) { unsigned msg_data = pack_pixel_offset(const_offset->value.f[0]) | (pack_pixel_offset(const_offset->value.f[1]) << 4); - inst = emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_x, src, + inst = emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_xy, src, fs_reg(msg_data)); } else { /* pack the operands: hw wants offsets as 4 bit signed ints */ @@ -656,7 +655,7 @@ fs_visitor::emit_interpolate_expression(ir_expression *ir) } mlen = 2 * reg_width; - inst = emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_x, src, + inst = emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_xy, src, fs_reg(0u)); } break; @@ -678,8 +677,7 @@ fs_visitor::emit_interpolate_expression(ir_expression *ir) for (int i = 0; i < ir->type->vector_elements; i++) { int ch = swiz ? ((*(int *)&swiz->mask) >> 2*i) & 3 : i; - emit(FS_OPCODE_LINTERP, res, - dst_x, dst_y, + emit(FS_OPCODE_LINTERP, res, dst_xy, fs_reg(interp_reg(var->data.location, ch))); res = offset(res, 1); } @@ -3443,31 +3441,31 @@ fs_visitor::emit_interpolation_setup_gen4() fs_reg(brw_imm_v(0x11001100)))); this->current_annotation = "compute pixel deltas from v0"; - if (brw->has_pln) { - this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] = - vgrf(glsl_type::vec2_type); - this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] = - offset(this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], 1); + + this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] = + vgrf(glsl_type::vec2_type); + const fs_reg &delta_xy = this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC]; + const fs_reg xstart(negate(brw_vec1_grf(1, 0))); + const fs_reg ystart(negate(brw_vec1_grf(1, 1))); + + if (brw->has_pln && dispatch_width == 16) { + emit(ADD(half(offset(delta_xy, 0), 0), half(this->pixel_x, 0), xstart)); + emit(ADD(half(offset(delta_xy, 0), 1), half(this->pixel_y, 0), ystart)); + emit(ADD(half(offset(delta_xy, 1), 0), half(this->pixel_x, 1), xstart)) + ->force_sechalf = true; + emit(ADD(half(offset(delta_xy, 1), 1), half(this->pixel_y, 1), ystart)) + ->force_sechalf = true; } else { - this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] = - vgrf(glsl_type::float_type); - this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] = - vgrf(glsl_type::float_type); + emit(ADD(offset(delta_xy, 0), this->pixel_x, xstart)); + emit(ADD(offset(delta_xy, 1), this->pixel_y, ystart)); } - emit(ADD(this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], - this->pixel_x, fs_reg(negate(brw_vec1_grf(1, 0))))); - emit(ADD(this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], - this->pixel_y, fs_reg(negate(brw_vec1_grf(1, 1))))); this->current_annotation = "compute pos.w and 1/pos.w"; /* Compute wpos.w. It's always in our setup, since it's needed to * interpolate the other attributes. */ this->wpos_w = vgrf(glsl_type::float_type); - emit(FS_OPCODE_LINTERP, wpos_w, - this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], - this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], - interp_reg(VARYING_SLOT_POS, 3)); + emit(FS_OPCODE_LINTERP, wpos_w, delta_xy, interp_reg(VARYING_SLOT_POS, 3)); /* Compute the pixel 1/W value from wpos.w. */ this->pixel_w = vgrf(glsl_type::float_type); emit_math(SHADER_OPCODE_RCP, this->pixel_w, wpos_w); @@ -3509,8 +3507,7 @@ fs_visitor::emit_interpolation_setup_gen6() for (int i = 0; i < BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT; ++i) { uint8_t reg = payload.barycentric_coord_reg[i]; - this->delta_x[i] = fs_reg(brw_vec8_grf(reg, 0)); - this->delta_y[i] = fs_reg(brw_vec8_grf(reg + 1, 0)); + this->delta_xy[i] = fs_reg(brw_vec16_grf(reg, 0)); } this->current_annotation = NULL; diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h index 3a50e86..1b2bb10 100644 --- a/src/mesa/drivers/dri/i965/brw_reg.h +++ b/src/mesa/drivers/dri/i965/brw_reg.h @@ -704,6 +704,13 @@ brw_vec8_grf(unsigned nr, unsigned subnr) return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); } +/** Construct float[16] general-purpose register */ +static inline struct brw_reg +brw_vec16_grf(unsigned nr, unsigned subnr) +{ + return brw_vec16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + static inline struct brw_reg brw_uw8_grf(unsigned nr, unsigned subnr) From mattst88 at kemper.freedesktop.org Tue Apr 21 17:04:07 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Tue, 21 Apr 2015 10:04:07 -0700 (PDT) Subject: Mesa (master): i965/fs: Allow an execution size of 32. Message-ID: <20150421170407.30ECA761EA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: b5a5b63548e9b27a3d0b8ad1b399006c71dcc3c4 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b5a5b63548e9b27a3d0b8ad1b399006c71dcc3c4 Author: Matt Turner Date: Fri Apr 3 17:44:06 2015 -0700 i965/fs: Allow an execution size of 32. In a few commits, we'll start emitting an add(32) instruction on some platforms. Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_eu_emit.c | 2 +- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 706b66b..44b9ffb 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -213,7 +213,7 @@ validate_reg(const struct brw_context *brw, brw_inst *inst, struct brw_reg reg) const int hstride_for_reg[] = {0, 1, 2, 4}; const int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32}; const int width_for_reg[] = {1, 2, 4, 8, 16}; - const int execsize_for_reg[] = {1, 2, 4, 8, 16}; + const int execsize_for_reg[] = {1, 2, 4, 8, 16, 32}; int width, hstride, vstride, execsize; if (reg.file == BRW_IMMEDIATE_VALUE) { diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index ba2a12f..af08f9d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -1630,6 +1630,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) } break; case 16: + case 32: brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED); break; default: From mattst88 at kemper.freedesktop.org Tue Apr 21 17:04:07 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Tue, 21 Apr 2015 10:04:07 -0700 (PDT) Subject: Mesa (master): i965/fs: Combine pixel center calculation into one inst. Message-ID: <20150421170407.886B5761EA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 529064f6a80d72294cc865a46304110e0401296d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=529064f6a80d72294cc865a46304110e0401296d Author: Matt Turner Date: Tue Apr 14 13:17:38 2015 -0700 i965/fs: Combine pixel center calculation into one inst. The X and Y values come interleaved in g1 (.4-.11 inclusive), so we can calculate them together with a single add(32) instruction on some platforms like Broadwell and newer or in SIMD8 elsewhere. Note that I also moved the PIXEL_X/PIXEL_Y virtual opcodes from before LINTERP to after it. That's because the writes_accumulator_implicitly() function in backend_instruction tests for <= LINTERP for determining whether the instruction indeed writes the accumulator implicitly. The old FS_OPCODE_PIXEL_X/Y emitted ADD instructions, which did, but the new opcodes just emit MOVs, which don't. It doesn't matter, since we don't use these opcodes on Gen4/5 anymore, but in the case that we do... On Broadwell: total instructions in shared programs: 7192355 -> 7186224 (-0.09%) instructions in affected programs: 1190700 -> 1184569 (-0.51%) helped: 6131 On Haswell: total instructions in shared programs: 6155979 -> 6152800 (-0.05%) instructions in affected programs: 652362 -> 649183 (-0.49%) helped: 3179 Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_defines.h | 2 + src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 10 ++++ src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 71 +++++++++++++++++------- 3 files changed, 63 insertions(+), 20 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 5962b00..bd3218a 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -925,6 +925,8 @@ enum opcode { FS_OPCODE_DDY_FINE, FS_OPCODE_CINTERP, FS_OPCODE_LINTERP, + FS_OPCODE_PIXEL_X, + FS_OPCODE_PIXEL_Y, FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7, FS_OPCODE_VARYING_PULL_CONSTANT_LOAD, diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 4955640..8d34d8a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -1940,6 +1940,16 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) case FS_OPCODE_LINTERP: generate_linterp(inst, dst, src); break; + case FS_OPCODE_PIXEL_X: + assert(src[0].type == BRW_REGISTER_TYPE_UW); + src[0].subnr = 0 * type_sz(src[0].type); + brw_MOV(p, dst, stride(src[0], 8, 4, 1)); + break; + case FS_OPCODE_PIXEL_Y: + assert(src[0].type == BRW_REGISTER_TYPE_UW); + src[0].subnr = 4 * type_sz(src[0].type); + brw_MOV(p, dst, stride(src[0], 8, 4, 1)); + break; case SHADER_OPCODE_TEX: case FS_OPCODE_TXB: case SHADER_OPCODE_TXD: diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 7fdd4e5..c66ec3e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -3478,27 +3478,58 @@ fs_visitor::emit_interpolation_setup_gen6() { struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); - /* If the pixel centers end up used, the setup is the same as for gen4. */ this->current_annotation = "compute pixel centers"; - fs_reg int_pixel_x = vgrf(glsl_type::uint_type); - fs_reg int_pixel_y = vgrf(glsl_type::uint_type); - int_pixel_x.type = BRW_REGISTER_TYPE_UW; - int_pixel_y.type = BRW_REGISTER_TYPE_UW; - emit(ADD(int_pixel_x, - fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), - fs_reg(brw_imm_v(0x10101010)))); - emit(ADD(int_pixel_y, - fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), - fs_reg(brw_imm_v(0x11001100)))); - - /* As of gen6, we can no longer mix float and int sources. We have - * to turn the integer pixel centers into floats for their actual - * use. - */ - this->pixel_x = vgrf(glsl_type::float_type); - this->pixel_y = vgrf(glsl_type::float_type); - emit(MOV(this->pixel_x, int_pixel_x)); - emit(MOV(this->pixel_y, int_pixel_y)); + if (brw->gen >= 8 || dispatch_width == 8) { + /* The "Register Region Restrictions" page says for BDW (and newer, + * presumably): + * + * "When destination spans two registers, the source may be one or + * two registers. The destination elements must be evenly split + * between the two registers." + * + * Thus we can do a single add(16) in SIMD8 or an add(32) in SIMD16 to + * compute our pixel centers. + */ + fs_reg int_pixel_xy(GRF, alloc.allocate(dispatch_width / 8), + BRW_REGISTER_TYPE_UW, dispatch_width * 2); + emit(ADD(int_pixel_xy, + fs_reg(stride(suboffset(g1_uw, 4), 1, 4, 0)), + fs_reg(brw_imm_v(0x11001010)))) + ->force_writemask_all = true; + + this->pixel_x = vgrf(glsl_type::float_type); + this->pixel_y = vgrf(glsl_type::float_type); + emit(FS_OPCODE_PIXEL_X, this->pixel_x, int_pixel_xy); + emit(FS_OPCODE_PIXEL_Y, this->pixel_y, int_pixel_xy); + } else { + /* The "Register Region Restrictions" page says for SNB, IVB, HSW: + * + * "When destination spans two registers, the source MUST span two + * registers." + * + * Since the GRF source of the ADD will only read a single register, we + * must do two separate ADDs in SIMD16. + */ + fs_reg int_pixel_x = vgrf(glsl_type::uint_type); + fs_reg int_pixel_y = vgrf(glsl_type::uint_type); + int_pixel_x.type = BRW_REGISTER_TYPE_UW; + int_pixel_y.type = BRW_REGISTER_TYPE_UW; + emit(ADD(int_pixel_x, + fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), + fs_reg(brw_imm_v(0x10101010)))); + emit(ADD(int_pixel_y, + fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), + fs_reg(brw_imm_v(0x11001100)))); + + /* As of gen6, we can no longer mix float and int sources. We have + * to turn the integer pixel centers into floats for their actual + * use. + */ + this->pixel_x = vgrf(glsl_type::float_type); + this->pixel_y = vgrf(glsl_type::float_type); + emit(MOV(this->pixel_x, int_pixel_x)); + emit(MOV(this->pixel_y, int_pixel_y)); + } this->current_annotation = "compute pos.w"; this->pixel_w = fs_reg(brw_vec8_grf(payload.source_w_reg, 0)); From mattst88 at kemper.freedesktop.org Tue Apr 21 17:04:07 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Tue, 21 Apr 2015 10:04:07 -0700 (PDT) Subject: Mesa (master): i965/fs: Set compression only if writing two registers. Message-ID: <20150421170407.3B820761EA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 8bc49f9536058a8373d3057324e22fcbf79a7d89 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8bc49f9536058a8373d3057324e22fcbf79a7d89 Author: Matt Turner Date: Sat Apr 11 14:51:13 2015 -0700 i965/fs: Set compression only if writing two registers. We don't want to set compression control on a SIMD16 instruction operating on words or smaller. Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index af08f9d..397d825 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -1631,7 +1631,10 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) break; case 16: case 32: - brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED); + if (type_sz(inst->dst.type) < sizeof(float)) + brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); + else + brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED); break; default: unreachable("Invalid instruction width"); From tpalli at kemper.freedesktop.org Tue Apr 21 17:13:02 2015 From: tpalli at kemper.freedesktop.org (Tapani Pälli) Date: Tue, 21 Apr 2015 10:13:02 -0700 (PDT) Subject: Mesa (master): glsl: correct indentation of comment, Trivial. Message-ID: <20150421171302.E64D1761EA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: ed10f9cfad1a01226725d542155a240bcf70e9cb URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ed10f9cfad1a01226725d542155a240bcf70e9cb Author: Tapani P?lli Date: Tue Apr 21 20:11:43 2015 +0300 glsl: correct indentation of comment, Trivial. Signed-off-by: Tapani P?lli --- src/glsl/linker.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index 9aa1389..21fde94 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -2571,7 +2571,7 @@ add_interface_variables(struct gl_shader_program *shProg, var->data.location != SYSTEM_VALUE_VERTEX_ID_ZERO_BASE && var->data.location != SYSTEM_VALUE_INSTANCE_ID) continue; - /* FALLTHROUGH */ + /* FALLTHROUGH */ case ir_var_shader_in: if (programInterface != GL_PROGRAM_INPUT) continue; From kwg at kemper.freedesktop.org Tue Apr 21 19:02:23 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Tue, 21 Apr 2015 12:02:23 -0700 (PDT) Subject: Mesa (master): nir: Fix per-component negation in prog_to_nir' s SWZ handling. Message-ID: <20150421190223.963C2761EA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 44461e70985278464f5c2ce89bda2336c7299b0b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=44461e70985278464f5c2ce89bda2336c7299b0b Author: Kenneth Graunke Date: Fri Apr 17 14:08:14 2015 -0700 nir: Fix per-component negation in prog_to_nir's SWZ handling. I missed the fact that the ARB_fragment_program SWZ instruction allows per-component negation. To fix this, move Abs/Negate handling into both the simple case and the SWZ case's per-component loop. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90000 Signed-off-by: Kenneth Graunke Reviewed-by: Matt Turner --- src/mesa/program/prog_to_nir.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c index c738f50..ff3d9f3 100644 --- a/src/mesa/program/prog_to_nir.c +++ b/src/mesa/program/prog_to_nir.c @@ -222,12 +222,23 @@ ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src) } nir_ssa_def *def; - if (!HAS_EXTENDED_SWIZZLE(prog_src->Swizzle)) { + if (!HAS_EXTENDED_SWIZZLE(prog_src->Swizzle) && + (prog_src->Negate == NEGATE_NONE || prog_src->Negate == NEGATE_XYZW)) { + /* The simple non-SWZ case. */ for (int i = 0; i < 4; i++) src.swizzle[i] = GET_SWZ(prog_src->Swizzle, i); def = nir_fmov_alu(b, src, 4); + + if (prog_src->Abs) + def = nir_fabs(b, def); + + if (prog_src->Negate) + def = nir_fneg(b, def); } else { + /* The SWZ instruction allows per-component zero/one swizzles, and also + * per-component negation. + */ nir_ssa_def *chans[4]; for (int i = 0; i < 4; i++) { int swizzle = GET_SWZ(prog_src->Swizzle, i); @@ -246,16 +257,16 @@ ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src) chans[i] = &mov->dest.dest.ssa; } + + if (prog_src->Abs) + chans[i] = nir_fabs(b, chans[i]); + + if (prog_src->Negate & (1 << i)) + chans[i] = nir_fneg(b, chans[i]); } def = nir_vec4(b, chans[0], chans[1], chans[2], chans[3]); } - if (prog_src->Abs) - def = nir_fabs(b, def); - - if (prog_src->Negate) - def = nir_fneg(b, def); - return def; } From kwg at kemper.freedesktop.org Wed Apr 22 05:19:01 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Tue, 21 Apr 2015 22:19:01 -0700 (PDT) Subject: Mesa (master): drirc: Add "Second Life" quirk ( allow_glsl_extension_directive_midshader). Message-ID: <20150422051901.DC6EB761EA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 00bf7d2e9cd60dbd82d25b459c448e11c545a89a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=00bf7d2e9cd60dbd82d25b459c448e11c545a89a Author: Kenneth Graunke Date: Fri Apr 10 10:24:33 2015 -0700 drirc: Add "Second Life" quirk (allow_glsl_extension_directive_midshader). Appears to fix shader compilation. Tested by starting the client, dragging the "quality and speed" slider back and forth, and watching the console output - instead of piles of "shader failed to compile", the CPU seems to be busy compiling shaders. I haven't actually tried to play. Signed-off-by: Kenneth Graunke Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=69226 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=71591 Cc: mesa-stable at lists.freedesktop.org --- src/mesa/drivers/dri/common/drirc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/mesa/drivers/dri/common/drirc b/src/mesa/drivers/dri/common/drirc index cecd6a9..145e707 100644 --- a/src/mesa/drivers/dri/common/drirc +++ b/src/mesa/drivers/dri/common/drirc @@ -91,5 +91,9 @@ TODO: document the other workarounds. + + + From evelikov at kemper.freedesktop.org Wed Apr 22 14:55:35 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Wed, 22 Apr 2015 07:55:35 -0700 (PDT) Subject: Mesa (master): 25 new commits Message-ID: <20150422145535.37942761EB@kemper.freedesktop.org> URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=0b1823f5beeeb7a35e3addee071b31ac227734dc Author: Chih-Wei Huang Date: Sat Apr 4 05:01:26 2015 +0800 android: re-build all mesa binaries properly The clean steps ensure both 32-bit and 64-bit objects are cleaned. Signed-off-by: Chih-Wei Huang Reviewed-by: Emil Velikov URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=36e59215bab203236602e0f5734dffe8339c58ba Author: Emil Velikov Date: Wed Apr 22 15:38:57 2015 +0100 android: xmlpool: cleanup the generation rules - Do not attempt to create the save folder twice - both dir $@ and PRIVATE_LOCALEDIR point to the same place. - Use @ and $(hide), for mkdir and python, to avoid spamming the output. Signed-off-by: Emil Velikov URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=98c8997fe5e4be1b158944d2bd58dba42afb4e1e Author: Chih-Wei Huang Date: Sat Apr 4 05:01:24 2015 +0800 android: xmlpool: Get rid of the last use of intermediates-dir-for v2 [Emil Velikov] - Keep the PRIVATE_LOCALEDIR variable. - Do not use $(@D) but the more widespead $(dir $@) Signed-off-by: Chih-Wei Huang Signed-off-by: Emil Velikov URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5b8d61b0ccf9c3228bef680b02c128734270a9d9 Author: Chih-Wei Huang Date: Sat Apr 4 05:01:23 2015 +0800 android: export the path of the generated headers The modules need the headers can get the path automatically. Signed-off-by: Chih-Wei Huang Reviewed-by: Emil Velikov URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b0e33c2256a68d87c4f67b6e25a86e594649ab93 Author: Chih-Wei Huang Date: Sat Apr 4 05:01:22 2015 +0800 android: fix the building rules for Android 5.0 Android 5.0 allows modules to generate source into $OUT/gen, which will then be copied into $OUT/obj and $OUT/obj_$(TARGET_2ND_ARCH) as necessary. Modules will need to change calls to local-intermediates-dir into local-generated-sources-dir. The patch changes local-intermediates-dir into local-generated-sources-dir. If the Android version is less than 5.0, fallback to local-intermediates-dir. The patch also fixes the 64-bit building issue of Android 5.0. v2 [Emil Velikov] - Keep the LOCAL_UNSTRIPPED_PATH variable. Signed-off-by: Chih-Wei Huang URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=671a55084649f9e33018b1e5e1af4e28c639bf3b Author: Chih-Wei Huang Date: Sat Apr 4 05:01:21 2015 +0800 android: fix building issues of host binaries Define _GNU_SOURCE to enable features (__USE_XOPEN2K and __USE_UNIX98) required to build the host binaries. Signed-off-by: Chih-Wei Huang Reviewed-by: Emil Velikov URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=076edc6a036cb7601e010b7851eeaa7cfb1aa4e6 Author: Chih-Wei Huang Date: Thu Apr 2 14:16:33 2015 +0800 android: fix a building error of libmesa_program Add libmesa_glsl to LOCAL_STATIC_LIBRARIES to get its exported include path (for nir_opcodes.h). Signed-off-by: Chih-Wei Huang URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8098bf8e7a975a18b271b44e760ff86feddb0472 Author: Emil Velikov Date: Wed Apr 22 15:53:11 2015 +0100 android: mesa: fold the ARCH_X86_HAVE_SSE4_1 conditionals Signed-off-by: Emil Velikov URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=669cfc267a1102ff903b3e562f9aa45a410e0312 Author: Emil Velikov Date: Sat Mar 28 18:23:01 2015 +0000 android: mesa: fix the path of the SSE4_1 optimisations Commit dd6f641303c(mesa: Build with subdir-objects.) removed the SRCDIR variable, but forgot to update all references of it. v2: Fix path - must be relative to LOCAL_PATH. (Chih-Wei) Cc: "10.5" Signed-off-by: Emil Velikov Reviewed-by: Chih-Wei Huang URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=64171c2d24196801cbb5b549fdc90743c42e3257 Author: Emil Velikov Date: Sat Mar 28 18:12:23 2015 +0000 android: build the Mesa IR -> NIR translator Signed-off-by: Emil Velikov URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c734261dcf91fef6f3f55096835608a600bddec4 Author: Emil Velikov Date: Wed Apr 22 14:51:31 2015 +0100 android: nir: add build rules for nir_builder_opcodes.h Missed out with commit 2a135c470e3(nir: Add an ALU op builder kind of like ir_builder.h) Signed-off-by: Emil Velikov URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=06619749a11651a50e353168c7c793082820585d Author: Mauro Rossi Date: Fri Mar 27 22:25:45 2015 +0000 android: add inital NIR build Required by the i965 driver. v2: - Split out the nir_builder_opcodes.h rules. - Do not unconditionally hide the python command - use $(hide) - Use LOCAL_EXPORT_C_INCLUDE_DIRS to manage includes for the generated sources. Cc: "10.5" [Emil Velikov: Split from a larger commit, v2] Signed-off-by: Emil Velikov Reviewed-by: Chih-Wei Huang URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=618885f71fcacb3d68bf37fa23be36830d4178d2 Author: Emil Velikov Date: Fri Mar 27 21:39:15 2015 +0000 android: dri: link against libmesa_util The dri modules depend on symbols provided by it. Cc: "10.5" Signed-off-by: Emil Velikov Reviewed-by: Chih-Wei Huang URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=0afbd2df0485cd480979d9f4cdae00262d1a3c62 Author: Emil Velikov Date: Sat Mar 28 01:11:54 2015 +0000 android: add $(mesa_top)/src/mesa/main to the includes list Required by the format_{un,}pack rework. Otherwise the build will fail to locate the respective headers - format_{un,}pack.h Cc: "10.5" Signed-off-by: Emil Velikov Reviewed-by: Chih-Wei Huang URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=39a175e0c792f569dfe73de1b4d01b0caea43a01 Author: Emil Velikov Date: Fri Mar 27 20:10:35 2015 +0000 android: add HAVE__BUILTIN_* and HAVE_FUNC_ATTRIBUTE_* defines All of those are available on gcc 4.5 and later with the current android build using gcc 4.7. Cc: "10.4 10.5" Signed-off-by: Emil Velikov Reviewed-by: Chih-Wei Huang URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=94cab35ee93ed200013eddffa4c97828bd6aa82c Author: Emil Velikov Date: Sat Mar 28 01:10:52 2015 +0000 android: add gallium dirs to more places in the tree Similar to e8c5cbfd921(mesa: Add gallium include dirs to more parts of the tree.) Signed-off-by: Emil Velikov Reviewed-by: Chih-Wei Huang URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8d90bfb724f89b04d703f869362cf2fc2a3d7567 Author: Emil Velikov Date: Fri Mar 27 18:57:46 2015 +0000 android: dri/common: conditionally include drm_cflags/set __NOT_HAVE_DRM_H Otherwise we'll fail to find the drm.h header. Cc: "10.4 10.5" Signed-off-by: Emil Velikov URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=2d06791f6f9e8ab37109be52e63d247bbbcb42d4 Author: Emil Velikov Date: Fri Mar 27 18:36:10 2015 +0000 android: egl: add libsync_cflags to the build ... via local_shared_libraries. Otherwise the sync/sync.h header won't be found. Note: 10.5 and earlier will need similar change in st/egl. v2: Append the library to the local_shared_libraries list. (Chih-Wei) Cc: "10.4 10.5" Signed-off-by: Emil Velikov Reviewed-by: Chih-Wei Huang URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5f7081eb90bc5a25f0740314fa22e04d189238ca Author: Mauro Rossi Date: Fri Mar 27 18:20:53 2015 +0000 android: mesa: generate the format_{un,}pack.[ch] sources Missed out with commit e1fdcddafe9(mesa: Autogenerate format_unpack.c) v2: Conditionaly print the python commands - s/@/$(hide) / (Chih-Wei) Cc: "10.5" [Emil Velikov: Split our from a larger commit.] Signed-off-by: Emil Velikov URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6fb801786604c270fae99c3d665dcebaa0bff3a6 Author: Emil Velikov Date: Fri Mar 27 16:13:50 2015 +0000 android: add $(mesa_top)/src include to the whole of mesa Many parts of mesa already have the include with others depending on it but it's missing. Add it once at the top makefile and be done with it. Cc: "10.4 10.5" Signed-off-by: Emil Velikov Reviewed-by: Chih-Wei Huang URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ba3bc1eea2f41e06a6c4300d4bc2b3547dbcaaec Author: Emil Velikov Date: Fri Mar 27 14:18:24 2015 +0000 android: use := operator for assigning MESA_VERSION Signed-off-by: Emil Velikov URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6c2c5f74a2e8d94d22454bd19cf961a66bfc9103 Author: Chih-Wei Huang Date: Thu Jan 22 01:21:00 2015 +0800 util: android: optimize the rules to generate format_srgb.c Signed-off-by: Chih-Wei Huang Reviewed-by: Emil Velikov URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=63a76c15d8c259edc1a6bf8b696f109c86eace2e Author: Chih-Wei Huang Date: Thu Jan 22 01:17:42 2015 +0800 android: simplify the subdirs including rules Use the macro defined in the Android build system. Signed-off-by: Chih-Wei Huang Reviewed-by: Emil Velikov URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=86919352e3da1c80409fdcb67c36f29a9687b7a9 Author: Emil Velikov Date: Fri Mar 27 11:40:42 2015 +0000 android: use LOCAL_SHARED_LIBRARIES over TARGET_OUT_HEADERS ... to manage the LIBDRM*_CFLAGS. The former is the recommended approach by the Android build system developers while the latter has been depreciated for quite some time. Cc: "10.4 10.5" Signed-off-by: Emil Velikov URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=413bc0a618d39873336cbfaf8fb5e43217f66ccf Author: Emil Velikov Date: Fri Mar 27 11:13:21 2015 +0000 ilo: remove unused include from Android.mk Signed-off-by: Emil Velikov Reviewed-by: Chih-Wei Huang From brianp at kemper.freedesktop.org Wed Apr 22 14:57:21 2015 From: brianp at kemper.freedesktop.org (Brian Paul) Date: Wed, 22 Apr 2015 07:57:21 -0700 (PDT) Subject: Mesa (master): main: silence missing return value warning in array_index_of_resource() Message-ID: <20150422145721.21F10761EB@kemper.freedesktop.org> Module: Mesa Branch: master Commit: b260d9d91f84f26676cf8baf42c6df88678be668 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b260d9d91f84f26676cf8baf42c6df88678be668 Author: Brian Paul Date: Thu Apr 16 15:27:20 2015 -0600 main: silence missing return value warning in array_index_of_resource() v2: return -1 instead of 0, per Emil Velikov. Reviewed-by: Anuj Phogat --- src/mesa/main/shader_query.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp index 336598d..bc6fec5 100644 --- a/src/mesa/main/shader_query.cpp +++ b/src/mesa/main/shader_query.cpp @@ -537,6 +537,7 @@ array_index_of_resource(struct gl_program_resource *res, return get_matching_index(RESOURCE_VAR(res), name); default: assert(!"support for resource type not implemented"); + return -1; } } From brianp at kemper.freedesktop.org Wed Apr 22 14:57:21 2015 From: brianp at kemper.freedesktop.org (Brian Paul) Date: Wed, 22 Apr 2015 07:57:21 -0700 (PDT) Subject: Mesa (master): mesa: add check for NV_texture_barrier in _mesa_TextureBarrierNV() Message-ID: <20150422145721.2EBED761EB@kemper.freedesktop.org> Module: Mesa Branch: master Commit: bd4dbdfa519362f74263a2c277a3b4be841acd06 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=bd4dbdfa519362f74263a2c277a3b4be841acd06 Author: Brian Paul Date: Thu Apr 16 15:27:41 2015 -0600 mesa: add check for NV_texture_barrier in _mesa_TextureBarrierNV() If an app called glTextureBarrierNV() without checking if the extension was available, we'd crash with some gallium drivers in st_TextureBarrier() because the pipe_context::texture_barrier() pointer was NULL. Generate GL_INVALID_OPERATION instead. Reviewed-by: Ilia Mirkin Reviewed-by: Anuj Phogat --- src/mesa/main/texturebarrier.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/mesa/main/texturebarrier.c b/src/mesa/main/texturebarrier.c index 08ff561..d879eed 100644 --- a/src/mesa/main/texturebarrier.c +++ b/src/mesa/main/texturebarrier.c @@ -49,5 +49,11 @@ _mesa_TextureBarrierNV(void) { GET_CURRENT_CONTEXT(ctx); + if (!ctx->Extensions.NV_texture_barrier) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glTextureBarrier(not supported)"); + return; + } + ctx->Driver.TextureBarrier(ctx); } From brianp at kemper.freedesktop.org Wed Apr 22 14:57:21 2015 From: brianp at kemper.freedesktop.org (Brian Paul) Date: Wed, 22 Apr 2015 07:57:21 -0700 (PDT) Subject: Mesa (master): glsl: rewrite glsl_type::record_key_hash() to avoid buffer overflow Message-ID: <20150422145721.392BC761EB@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 31667e6237d30188d0b29e17f5b9892f10c0d83a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=31667e6237d30188d0b29e17f5b9892f10c0d83a Author: Brian Paul Date: Thu Apr 16 15:29:18 2015 -0600 glsl: rewrite glsl_type::record_key_hash() to avoid buffer overflow This should be more efficient than the previous snprintf() solution. But more importantly, it avoids a buffer overflow bug that could result in crashes or unpredictable results when processing very large interface blocks. For the app in question, key->length = 103 for some interfaces. The check if size >= sizeof(hash_key) was insufficient to prevent overflows of the hash_key[128] array because it didn't account for the terminating zero. In this case, this caused the call to hash_table_string_hash() to return different results for identical inputs, and then shader linking failed. This new solution also takes all structure fields into account instead of just the first 15 when sizeof(pointer)==8. Cc: mesa-stable at lists.freedesktop.org Reviewed-by: Ian Romanick --- src/glsl/glsl_types.cpp | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/src/glsl/glsl_types.cpp b/src/glsl/glsl_types.cpp index 4aa36a7..9c9b7ef 100644 --- a/src/glsl/glsl_types.cpp +++ b/src/glsl/glsl_types.cpp @@ -738,24 +738,27 @@ glsl_type::record_key_compare(const void *a, const void *b) } +/** + * Generate an integer hash value for a glsl_type structure type. + */ unsigned glsl_type::record_key_hash(const void *a) { const glsl_type *const key = (glsl_type *) a; - char hash_key[128]; - unsigned size = 0; - - size = snprintf(hash_key, sizeof(hash_key), "%08x", key->length); + uintptr_t hash = key->length; + unsigned retval; for (unsigned i = 0; i < key->length; i++) { - if (size >= sizeof(hash_key)) - break; - - size += snprintf(& hash_key[size], sizeof(hash_key) - size, - "%p", (void *) key->fields.structure[i].type); + /* casting pointer to uintptr_t */ + hash = (hash * 13 ) + (uintptr_t) key->fields.structure[i].type; } - return hash_table_string_hash(& hash_key); + if (sizeof(hash) == 8) + retval = (hash & 0xffffffff) ^ ((uint64_t) hash >> 32); + else + retval = hash; + + return retval; } From brianp at kemper.freedesktop.org Wed Apr 22 14:57:21 2015 From: brianp at kemper.freedesktop.org (Brian Paul) Date: Wed, 22 Apr 2015 07:57:21 -0700 (PDT) Subject: Mesa (master): cso: minor comment fix Message-ID: <20150422145721.43A76761EB@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 02e93be55e81d5ff257f4717eb3fdb5a8f79b544 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=02e93be55e81d5ff257f4717eb3fdb5a8f79b544 Author: Brian Paul Date: Tue Apr 21 08:10:22 2015 -0600 cso: minor comment fix --- src/gallium/auxiliary/cso_cache/cso_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 5d597ba..31ffa7d 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -326,7 +326,7 @@ void cso_destroy_context( struct cso_context *ctx ) ctx->pipe->set_stream_output_targets(ctx->pipe, 0, NULL, NULL); } - /* free fragment sampler views */ + /* free sampler views for each shader stage */ for (shader = 0; shader < Elements(ctx->samplers); shader++) { struct sampler_info *info = &ctx->samplers[shader]; for (i = 0; i < PIPE_MAX_SHADER_SAMPLER_VIEWS; i++) { From evelikov at kemper.freedesktop.org Wed Apr 22 14:59:40 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Wed, 22 Apr 2015 07:59:40 -0700 (PDT) Subject: Mesa (master): gallium/targets/d3dadapter9: drop the libdrm prefix for drm.h Message-ID: <20150422145940.F254D761EB@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 9450bd56bee3ab99c33be2e08317990fa7563520 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=9450bd56bee3ab99c33be2e08317990fa7563520 Author: Emil Velikov Date: Fri Mar 20 21:20:10 2015 +0000 gallium/targets/d3dadapter9: drop the libdrm prefix for drm.h The path is provided by libdrm.pc and already used appropriately by the build system. Signed-off-by: Emil Velikov --- src/gallium/targets/d3dadapter9/drm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/targets/d3dadapter9/drm.c b/src/gallium/targets/d3dadapter9/drm.c index ec594fd..6342ab8 100644 --- a/src/gallium/targets/d3dadapter9/drm.c +++ b/src/gallium/targets/d3dadapter9/drm.c @@ -39,7 +39,7 @@ #include "xmlconfig.h" #include "xmlpool.h" -#include +#include #include #include #include From robclark at kemper.freedesktop.org Wed Apr 22 18:01:56 2015 From: robclark at kemper.freedesktop.org (Rob Clark) Date: Wed, 22 Apr 2015 11:01:56 -0700 (PDT) Subject: Mesa (master): freedreno: update generated headers Message-ID: <20150422180156.6CFC0761EB@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 21ceedfd8bd1b9e45ee12be24b328876bd7fa4eb URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=21ceedfd8bd1b9e45ee12be24b328876bd7fa4eb Author: Rob Clark Date: Sat Apr 18 12:08:53 2015 -0400 freedreno: update generated headers Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/a2xx/a2xx.xml.h | 4 +- src/gallium/drivers/freedreno/a3xx/a3xx.xml.h | 4 +- src/gallium/drivers/freedreno/a4xx/a4xx.xml.h | 106 +++++++++++++++++++-- src/gallium/drivers/freedreno/a4xx/fd4_emit.c | 13 ++- src/gallium/drivers/freedreno/adreno_common.xml.h | 4 +- src/gallium/drivers/freedreno/adreno_pm4.xml.h | 12 ++- 6 files changed, 123 insertions(+), 20 deletions(-) diff --git a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h index a315f5c..f4f6b94 100644 --- a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h +++ b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h @@ -12,9 +12,9 @@ The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27) - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2014-06-02 15:21:30) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2014-11-13 22:44:30) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14748 bytes, from 2015-04-12 15:01:13) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14895 bytes, from 2015-04-19 15:23:28) - /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 66709 bytes, from 2015-04-12 18:16:35) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 57486 bytes, from 2015-04-12 18:10:00) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 59314 bytes, from 2015-04-19 16:21:40) Copyright (C) 2013-2014 by the following authors: - Rob Clark (robclark) diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h index 0cccff1..a3bc74e 100644 --- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h +++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h @@ -12,9 +12,9 @@ The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27) - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2014-06-02 15:21:30) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2014-11-13 22:44:30) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14748 bytes, from 2015-04-12 15:01:13) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14895 bytes, from 2015-04-19 15:23:28) - /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 66709 bytes, from 2015-04-12 18:16:35) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 57486 bytes, from 2015-04-12 18:10:00) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 59314 bytes, from 2015-04-19 16:21:40) Copyright (C) 2013-2015 by the following authors: - Rob Clark (robclark) diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h index 0f69205..0e7d3cf 100644 --- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h +++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h @@ -12,9 +12,9 @@ The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27) - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2014-06-02 15:21:30) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2014-11-13 22:44:30) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14748 bytes, from 2015-04-12 15:01:13) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14895 bytes, from 2015-04-19 15:23:28) - /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 66709 bytes, from 2015-04-12 18:16:35) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 57486 bytes, from 2015-04-12 18:10:00) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 59314 bytes, from 2015-04-19 16:21:40) Copyright (C) 2013-2015 by the following authors: - Rob Clark (robclark) @@ -229,6 +229,12 @@ enum a4xx_depth_format { DEPTH4_24_8 = 2, }; +enum a4xx_tess_spacing { + EQUAL_SPACING = 0, + ODD_SPACING = 2, + EVEN_SPACING = 3, +}; + enum a4xx_tex_filter { A4XX_TEX_NEAREST = 0, A4XX_TEX_LINEAR = 1, @@ -1440,6 +1446,14 @@ static inline uint32_t A4XX_SP_HS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val) return ((val) << A4XX_SP_HS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A4XX_SP_HS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK; } +#define REG_A4XX_SP_HS_OBJ_START 0x0000230e + +#define REG_A4XX_SP_HS_PVT_MEM_PARAM 0x0000230f + +#define REG_A4XX_SP_HS_PVT_MEM_ADDR 0x00002310 + +#define REG_A4XX_SP_HS_LENGTH_REG 0x00002312 + #define REG_A4XX_SP_DS_OBJ_OFFSET_REG 0x00002334 #define A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK 0x01ff0000 #define A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT 16 @@ -1454,6 +1468,14 @@ static inline uint32_t A4XX_SP_DS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val) return ((val) << A4XX_SP_DS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A4XX_SP_DS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK; } +#define REG_A4XX_SP_DS_OBJ_START 0x00002335 + +#define REG_A4XX_SP_DS_PVT_MEM_PARAM 0x00002336 + +#define REG_A4XX_SP_DS_PVT_MEM_ADDR 0x00002337 + +#define REG_A4XX_SP_DS_LENGTH_REG 0x00002339 + #define REG_A4XX_SP_GS_OBJ_OFFSET_REG 0x0000235b #define A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK 0x01ff0000 #define A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT 16 @@ -1740,6 +1762,44 @@ static inline uint32_t A4XX_VFD_DECODE_INSTR_SHIFTCNT(uint32_t val) #define REG_A4XX_TPL1_TP_TEX_OFFSET 0x00002380 +#define REG_A4XX_TPL1_TP_TEX_COUNT 0x00002381 +#define A4XX_TPL1_TP_TEX_COUNT_VS__MASK 0x000000ff +#define A4XX_TPL1_TP_TEX_COUNT_VS__SHIFT 0 +static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_VS(uint32_t val) +{ + return ((val) << A4XX_TPL1_TP_TEX_COUNT_VS__SHIFT) & A4XX_TPL1_TP_TEX_COUNT_VS__MASK; +} +#define A4XX_TPL1_TP_TEX_COUNT_HS__MASK 0x0000ff00 +#define A4XX_TPL1_TP_TEX_COUNT_HS__SHIFT 8 +static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_HS(uint32_t val) +{ + return ((val) << A4XX_TPL1_TP_TEX_COUNT_HS__SHIFT) & A4XX_TPL1_TP_TEX_COUNT_HS__MASK; +} +#define A4XX_TPL1_TP_TEX_COUNT_DS__MASK 0x00ff0000 +#define A4XX_TPL1_TP_TEX_COUNT_DS__SHIFT 16 +static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_DS(uint32_t val) +{ + return ((val) << A4XX_TPL1_TP_TEX_COUNT_DS__SHIFT) & A4XX_TPL1_TP_TEX_COUNT_DS__MASK; +} +#define A4XX_TPL1_TP_TEX_COUNT_GS__MASK 0xff000000 +#define A4XX_TPL1_TP_TEX_COUNT_GS__SHIFT 24 +static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_GS(uint32_t val) +{ + return ((val) << A4XX_TPL1_TP_TEX_COUNT_GS__SHIFT) & A4XX_TPL1_TP_TEX_COUNT_GS__MASK; +} + +#define REG_A4XX_TPL1_TP_VS_BORDER_COLOR_BASE_ADDR 0x00002384 + +#define REG_A4XX_TPL1_TP_HS_BORDER_COLOR_BASE_ADDR 0x00002387 + +#define REG_A4XX_TPL1_TP_DS_BORDER_COLOR_BASE_ADDR 0x0000238a + +#define REG_A4XX_TPL1_TP_GS_BORDER_COLOR_BASE_ADDR 0x0000238d + +#define REG_A4XX_TPL1_TP_FS_TEX_COUNT 0x000023a0 + +#define REG_A4XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR 0x000023a1 + #define REG_A4XX_TPL1_TP_CS_TEXMEMOBJ_BASE_ADDR 0x000023a6 #define REG_A4XX_GRAS_TSE_STATUS 0x00000c80 @@ -2057,7 +2117,12 @@ static inline uint32_t A4XX_HLSQ_CONTROL_1_REG_COORDREGID(uint32_t val) { return ((val) << A4XX_HLSQ_CONTROL_1_REG_COORDREGID__SHIFT) & A4XX_HLSQ_CONTROL_1_REG_COORDREGID__MASK; } -#define A4XX_HLSQ_CONTROL_1_REG_ZWCOORD 0x02000000 +#define A4XX_HLSQ_CONTROL_1_REG_ZWCOORDREGID__MASK 0xff000000 +#define A4XX_HLSQ_CONTROL_1_REG_ZWCOORDREGID__SHIFT 24 +static inline uint32_t A4XX_HLSQ_CONTROL_1_REG_ZWCOORDREGID(uint32_t val) +{ + return ((val) << A4XX_HLSQ_CONTROL_1_REG_ZWCOORDREGID__SHIFT) & A4XX_HLSQ_CONTROL_1_REG_ZWCOORDREGID__MASK; +} #define REG_A4XX_HLSQ_CONTROL_2_REG 0x000023c2 #define A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__MASK 0xfc000000 @@ -2106,6 +2171,7 @@ static inline uint32_t A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val) { return ((val) << A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET__MASK; } +#define A4XX_HLSQ_VS_CONTROL_REG_ENABLED 0x00010000 #define A4XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET__MASK 0x00fe0000 #define A4XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET__SHIFT 17 static inline uint32_t A4XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val) @@ -2132,6 +2198,7 @@ static inline uint32_t A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val) { return ((val) << A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET__MASK; } +#define A4XX_HLSQ_FS_CONTROL_REG_ENABLED 0x00010000 #define A4XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET__MASK 0x00fe0000 #define A4XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET__SHIFT 17 static inline uint32_t A4XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val) @@ -2158,6 +2225,7 @@ static inline uint32_t A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val) { return ((val) << A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET__MASK; } +#define A4XX_HLSQ_HS_CONTROL_REG_ENABLED 0x00010000 #define A4XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET__MASK 0x00fe0000 #define A4XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET__SHIFT 17 static inline uint32_t A4XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val) @@ -2184,6 +2252,7 @@ static inline uint32_t A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val) { return ((val) << A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET__MASK; } +#define A4XX_HLSQ_DS_CONTROL_REG_ENABLED 0x00010000 #define A4XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET__MASK 0x00fe0000 #define A4XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET__SHIFT 17 static inline uint32_t A4XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val) @@ -2210,6 +2279,7 @@ static inline uint32_t A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val) { return ((val) << A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET__MASK; } +#define A4XX_HLSQ_GS_CONTROL_REG_ENABLED 0x00010000 #define A4XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET__MASK 0x00fe0000 #define A4XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET__SHIFT 17 static inline uint32_t A4XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val) @@ -2243,6 +2313,7 @@ static inline uint32_t A4XX_PC_PRIM_VTX_CNTL_VAROUT(uint32_t val) { return ((val) << A4XX_PC_PRIM_VTX_CNTL_VAROUT__SHIFT) & A4XX_PC_PRIM_VTX_CNTL_VAROUT__MASK; } +#define A4XX_PC_PRIM_VTX_CNTL_PRIMITIVE_RESTART 0x00100000 #define A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST 0x02000000 #define A4XX_PC_PRIM_VTX_CNTL_PSIZE 0x04000000 @@ -2257,14 +2328,39 @@ static inline uint32_t A4XX_PC_GS_PARAM_MAX_VERTICES(uint32_t val) { return ((val) << A4XX_PC_GS_PARAM_MAX_VERTICES__SHIFT) & A4XX_PC_GS_PARAM_MAX_VERTICES__MASK; } +#define A4XX_PC_GS_PARAM_INVOCATIONS__MASK 0x0000f800 +#define A4XX_PC_GS_PARAM_INVOCATIONS__SHIFT 11 +static inline uint32_t A4XX_PC_GS_PARAM_INVOCATIONS(uint32_t val) +{ + return ((val) << A4XX_PC_GS_PARAM_INVOCATIONS__SHIFT) & A4XX_PC_GS_PARAM_INVOCATIONS__MASK; +} #define A4XX_PC_GS_PARAM_PRIMTYPE__MASK 0x01800000 #define A4XX_PC_GS_PARAM_PRIMTYPE__SHIFT 23 static inline uint32_t A4XX_PC_GS_PARAM_PRIMTYPE(enum adreno_pa_su_sc_draw val) { return ((val) << A4XX_PC_GS_PARAM_PRIMTYPE__SHIFT) & A4XX_PC_GS_PARAM_PRIMTYPE__MASK; } +#define A4XX_PC_GS_PARAM_LAYER 0x80000000 #define REG_A4XX_PC_HS_PARAM 0x000021e7 +#define A4XX_PC_HS_PARAM_VERTICES_OUT__MASK 0x0000003f +#define A4XX_PC_HS_PARAM_VERTICES_OUT__SHIFT 0 +static inline uint32_t A4XX_PC_HS_PARAM_VERTICES_OUT(uint32_t val) +{ + return ((val) << A4XX_PC_HS_PARAM_VERTICES_OUT__SHIFT) & A4XX_PC_HS_PARAM_VERTICES_OUT__MASK; +} +#define A4XX_PC_HS_PARAM_SPACING__MASK 0x00600000 +#define A4XX_PC_HS_PARAM_SPACING__SHIFT 21 +static inline uint32_t A4XX_PC_HS_PARAM_SPACING(enum a4xx_tess_spacing val) +{ + return ((val) << A4XX_PC_HS_PARAM_SPACING__SHIFT) & A4XX_PC_HS_PARAM_SPACING__MASK; +} +#define A4XX_PC_HS_PARAM_PRIMTYPE__MASK 0x01800000 +#define A4XX_PC_HS_PARAM_PRIMTYPE__SHIFT 23 +static inline uint32_t A4XX_PC_HS_PARAM_PRIMTYPE(enum adreno_pa_su_sc_draw val) +{ + return ((val) << A4XX_PC_HS_PARAM_PRIMTYPE__SHIFT) & A4XX_PC_HS_PARAM_PRIMTYPE__MASK; +} #define REG_A4XX_VBIF_VERSION 0x00003000 @@ -2343,10 +2439,6 @@ static inline uint32_t A4XX_UNKNOWN_20F7(float val) #define REG_A4XX_UNKNOWN_22D7 0x000022d7 -#define REG_A4XX_UNKNOWN_2381 0x00002381 - -#define REG_A4XX_UNKNOWN_23A0 0x000023a0 - #define REG_A4XX_TEX_SAMP_0 0x00000000 #define A4XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR 0x00000001 #define A4XX_TEX_SAMP_0_XY_MAG__MASK 0x00000006 diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c index bae55dc..847a675 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -701,11 +701,14 @@ fd4_emit_restore(struct fd_context *ctx) OUT_PKT0(ring, REG_A4XX_TPL1_TP_TEX_OFFSET, 1); OUT_RING(ring, 0x00000000); - OUT_PKT0(ring, REG_A4XX_UNKNOWN_2381, 1); - OUT_RING(ring, 0x00000010); - - OUT_PKT0(ring, REG_A4XX_UNKNOWN_23A0, 1); - OUT_RING(ring, 0x00000010); + OUT_PKT0(ring, REG_A4XX_TPL1_TP_TEX_COUNT, 1); + OUT_RING(ring, A4XX_TPL1_TP_TEX_COUNT_VS(16) | + A4XX_TPL1_TP_TEX_COUNT_HS(0) | + A4XX_TPL1_TP_TEX_COUNT_DS(0) | + A4XX_TPL1_TP_TEX_COUNT_GS(0)); + + OUT_PKT0(ring, REG_A4XX_TPL1_TP_FS_TEX_COUNT, 1); + OUT_RING(ring, 16); /* we don't use this yet.. probably best to disable.. */ OUT_PKT3(ring, CP_SET_DRAW_STATE, 2); diff --git a/src/gallium/drivers/freedreno/adreno_common.xml.h b/src/gallium/drivers/freedreno/adreno_common.xml.h index 174b495..b23aa83 100644 --- a/src/gallium/drivers/freedreno/adreno_common.xml.h +++ b/src/gallium/drivers/freedreno/adreno_common.xml.h @@ -12,9 +12,9 @@ The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27) - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2014-06-02 15:21:30) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2014-11-13 22:44:30) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14748 bytes, from 2015-04-12 15:01:13) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14895 bytes, from 2015-04-19 15:23:28) - /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 66709 bytes, from 2015-04-12 18:16:35) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 57486 bytes, from 2015-04-12 18:10:00) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 59314 bytes, from 2015-04-19 16:21:40) Copyright (C) 2013-2014 by the following authors: - Rob Clark (robclark) diff --git a/src/gallium/drivers/freedreno/adreno_pm4.xml.h b/src/gallium/drivers/freedreno/adreno_pm4.xml.h index a3d5fff..2b24c5b 100644 --- a/src/gallium/drivers/freedreno/adreno_pm4.xml.h +++ b/src/gallium/drivers/freedreno/adreno_pm4.xml.h @@ -12,9 +12,9 @@ The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27) - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2014-06-02 15:21:30) - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2014-11-13 22:44:30) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14748 bytes, from 2015-04-12 15:01:13) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14895 bytes, from 2015-04-19 15:23:28) - /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 66709 bytes, from 2015-04-12 18:16:35) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 57486 bytes, from 2015-04-12 18:10:00) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 59314 bytes, from 2015-04-19 16:21:40) Copyright (C) 2013-2015 by the following authors: - Rob Clark (robclark) @@ -80,6 +80,7 @@ enum pc_di_primtype { DI_PT_LINESTRIP_ADJ = 11, DI_PT_TRI_ADJ = 12, DI_PT_TRISTRIP_ADJ = 13, + DI_PT_PATCHES = 34, }; enum pc_di_src_sel { @@ -377,12 +378,19 @@ static inline uint32_t CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(enum pc_di_src_sel va { return ((val) << CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT__SHIFT) & CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT__MASK; } +#define CP_DRAW_INDX_OFFSET_0_TESSELLATE 0x00000100 #define CP_DRAW_INDX_OFFSET_0_INDEX_SIZE__MASK 0x00000c00 #define CP_DRAW_INDX_OFFSET_0_INDEX_SIZE__SHIFT 10 static inline uint32_t CP_DRAW_INDX_OFFSET_0_INDEX_SIZE(enum a4xx_index_size val) { return ((val) << CP_DRAW_INDX_OFFSET_0_INDEX_SIZE__SHIFT) & CP_DRAW_INDX_OFFSET_0_INDEX_SIZE__MASK; } +#define CP_DRAW_INDX_OFFSET_0_TESS_MODE__MASK 0x01f00000 +#define CP_DRAW_INDX_OFFSET_0_TESS_MODE__SHIFT 20 +static inline uint32_t CP_DRAW_INDX_OFFSET_0_TESS_MODE(uint32_t val) +{ + return ((val) << CP_DRAW_INDX_OFFSET_0_TESS_MODE__SHIFT) & CP_DRAW_INDX_OFFSET_0_TESS_MODE__MASK; +} #define REG_CP_DRAW_INDX_OFFSET_1 0x00000001 #define CP_DRAW_INDX_OFFSET_1_NUM_INSTANCES__MASK 0xffffffff From robclark at kemper.freedesktop.org Wed Apr 22 18:01:56 2015 From: robclark at kemper.freedesktop.org (Rob Clark) Date: Wed, 22 Apr 2015 11:01:56 -0700 (PDT) Subject: Mesa (master): freedreno/a4xx: formats updates/fixes Message-ID: <20150422180156.759F97626F@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 48a651e98ce764a9dae3d4dfd6e18044414be18b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=48a651e98ce764a9dae3d4dfd6e18044414be18b Author: Rob Clark Date: Wed Apr 22 13:00:03 2015 -0400 freedreno/a4xx: formats updates/fixes Update formats table with new formats that Ilia has figured out, and fix sampling from srgb texture and integer vbo's. Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/a4xx/fd4_emit.c | 2 + src/gallium/drivers/freedreno/a4xx/fd4_format.c | 111 +++++++++++++++------- src/gallium/drivers/freedreno/a4xx/fd4_texture.c | 3 + 3 files changed, 84 insertions(+), 32 deletions(-) diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c index 847a675..7a244d8 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -328,6 +328,7 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit) bool switchnext = (i != last) || (vertex_regid != regid(63, 0)) || (instance_regid != regid(63, 0)); + bool isint = util_format_is_pure_integer(pfmt); uint32_t fs = util_format_get_blocksize(pfmt); uint32_t off = vb->buffer_offset + elem->src_offset; uint32_t size = fd_bo_size(rsc->bo) - off; @@ -350,6 +351,7 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit) A4XX_VFD_DECODE_INSTR_REGID(vp->inputs[i].regid) | A4XX_VFD_DECODE_INSTR_SHIFTCNT(fs) | A4XX_VFD_DECODE_INSTR_LASTCOMPVALID | + COND(isint, A4XX_VFD_DECODE_INSTR_INT) | COND(switchnext, A4XX_VFD_DECODE_INSTR_SWITCHNEXT)); total_in += vp->inputs[i].ncomp; diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_format.c b/src/gallium/drivers/freedreno/a4xx/fd4_format.c index 9cff134..29abe0b 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_format.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_format.c @@ -78,7 +78,7 @@ struct fd4_format { static struct fd4_format formats[PIPE_FORMAT_COUNT] = { /* 8-bit */ - VT(R8_UNORM, 8_UNORM, NONE, WZYX), + VT(R8_UNORM, 8_UNORM, R8_UNORM, WZYX), V_(R8_SNORM, 8_SNORM, NONE, WZYX), V_(R8_UINT, 8_UINT, NONE, WZYX), V_(R8_SINT, 8_SINT, NONE, WZYX), @@ -86,29 +86,38 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = { V_(R8_SSCALED, 8_UINT, NONE, WZYX), _T(A8_UNORM, 8_UNORM, A8_UNORM, WZYX), - _T(L8_UNORM, 8_UNORM, NONE, WZYX), + _T(L8_UNORM, 8_UNORM, R8_UNORM, WZYX), _T(I8_UNORM, 8_UNORM, NONE, WZYX), /* 16-bit */ V_(R16_UNORM, 16_UNORM, NONE, WZYX), V_(R16_SNORM, 16_SNORM, NONE, WZYX), - V_(R16_UINT, 16_UINT, NONE, WZYX), - V_(R16_SINT, 16_SINT, NONE, WZYX), + VT(R16_UINT, 16_UINT, R16_UINT, WZYX), + VT(R16_SINT, 16_SINT, R16_SINT, WZYX), V_(R16_USCALED, 16_UINT, NONE, WZYX), V_(R16_SSCALED, 16_UINT, NONE, WZYX), VT(R16_FLOAT, 16_FLOAT, NONE, WZYX), - VT(R8G8_UNORM, 8_8_UNORM, NONE, WZYX), - V_(R8G8_SNORM, 8_8_SNORM, NONE, WZYX), - V_(R8G8_UINT, 8_8_UINT, NONE, WZYX), - V_(R8G8_SINT, 8_8_SINT, NONE, WZYX), + _T(A16_UINT, 16_UINT, NONE, WZYX), + _T(A16_SINT, 16_SINT, NONE, WZYX), + _T(L16_UINT, 16_UINT, NONE, WZYX), + _T(L16_SINT, 16_SINT, NONE, WZYX), + _T(I16_UINT, 16_UINT, NONE, WZYX), + _T(I16_SINT, 16_SINT, NONE, WZYX), + + VT(R8G8_UNORM, 8_8_UNORM, R8G8_UNORM, WZYX), + VT(R8G8_SNORM, 8_8_SNORM, R8G8_SNORM, WZYX), + VT(R8G8_UINT, 8_8_UINT, NONE, WZYX), + VT(R8G8_SINT, 8_8_SINT, NONE, WZYX), V_(R8G8_USCALED, 8_8_UINT, NONE, WZYX), V_(R8G8_SSCALED, 8_8_SINT, NONE, WZYX), - /*_T(B5G6R5_UNORM, 5_6_5_UNORM, R5G6B5_UNORM, WXYZ),*/ - _T(B5G5R5A1_UNORM, 5_5_5_1_UNORM, NONE, WXYZ), - _T(B5G5R5X1_UNORM, 5_5_5_1_UNORM, NONE, WXYZ), - _T(B4G4R4A4_UNORM, 4_4_4_4_UNORM, NONE, WXYZ), + _T(L8A8_UINT, 8_8_UINT, NONE, WZYX), + _T(L8A8_SINT, 8_8_SINT, NONE, WZYX), + + _T(B5G5R5A1_UNORM, 5_5_5_1_UNORM, R5G5B5A1_UNORM, WXYZ), + _T(B5G5R5X1_UNORM, 5_5_5_1_UNORM, R5G5B5A1_UNORM, WXYZ), + _T(B4G4R4A4_UNORM, 4_4_4_4_UNORM, R4G4B4A4_UNORM, WXYZ), /* 24-bit */ V_(R8G8B8_UNORM, 8_8_8_UNORM, NONE, WZYX), @@ -119,48 +128,67 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = { V_(R8G8B8_SSCALED, 8_8_8_SINT, NONE, WZYX), /* 32-bit */ + VT(R32_UINT, 32_UINT, R32_UINT, WZYX), + VT(R32_SINT, 32_SINT, R32_SINT, WZYX), + V_(R32_USCALED, 32_UINT, NONE, WZYX), + V_(R32_SSCALED, 32_UINT, NONE, WZYX), VT(R32_FLOAT, 32_FLOAT, NONE, WZYX), V_(R32_FIXED, 32_FIXED, NONE, WZYX), + _T(A32_UINT, 32_UINT, NONE, WZYX), + _T(A32_SINT, 32_SINT, NONE, WZYX), + _T(L32_UINT, 32_UINT, NONE, WZYX), + _T(L32_SINT, 32_SINT, NONE, WZYX), + _T(I32_UINT, 32_UINT, NONE, WZYX), + _T(I32_SINT, 32_SINT, NONE, WZYX), + V_(R16G16_UNORM, 16_16_UNORM, NONE, WZYX), V_(R16G16_SNORM, 16_16_SNORM, NONE, WZYX), - V_(R16G16_UINT, 16_16_UINT, NONE, WZYX), - V_(R16G16_SINT, 16_16_SINT, NONE, WZYX), + VT(R16G16_UINT, 16_16_UINT, R16G16_UINT, WZYX), + VT(R16G16_SINT, 16_16_SINT, R16G16_SINT, WZYX), V_(R16G16_USCALED, 16_16_UINT, NONE, WZYX), V_(R16G16_SSCALED, 16_16_SINT, NONE, WZYX), VT(R16G16_FLOAT, 16_16_FLOAT, NONE, WZYX), + _T(L16A16_UINT, 16_16_UINT, NONE, WZYX), + _T(L16A16_SINT, 16_16_SINT, NONE, WZYX), + VT(R8G8B8A8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX), _T(R8G8B8X8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX), - _T(R8G8B8A8_SRGB, 8_8_8_8_UNORM, NONE, WZYX), - _T(R8G8B8X8_SRGB, 8_8_8_8_UNORM, NONE, WZYX), - V_(R8G8B8A8_SNORM, 8_8_8_8_SNORM, NONE, WZYX), - V_(R8G8B8A8_UINT, 8_8_8_8_UINT, NONE, WZYX), - V_(R8G8B8A8_SINT, 8_8_8_8_SINT, NONE, WZYX), + _T(R8G8B8A8_SRGB, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX), + _T(R8G8B8X8_SRGB, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX), + VT(R8G8B8A8_SNORM, 8_8_8_8_SNORM, R8G8B8A8_SNORM, WZYX), + VT(R8G8B8A8_UINT, 8_8_8_8_UINT, R8G8B8A8_UINT, WZYX), + VT(R8G8B8A8_SINT, 8_8_8_8_SINT, R8G8B8A8_SINT, WZYX), V_(R8G8B8A8_USCALED, 8_8_8_8_UINT, NONE, WZYX), V_(R8G8B8A8_SSCALED, 8_8_8_8_SINT, NONE, WZYX), VT(B8G8R8A8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WXYZ), _T(B8G8R8X8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WXYZ), - VT(B8G8R8A8_SRGB, 8_8_8_8_UNORM, NONE, WXYZ), - _T(B8G8R8X8_SRGB, 8_8_8_8_UNORM, NONE, WXYZ), + VT(B8G8R8A8_SRGB, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WXYZ), + _T(B8G8R8X8_SRGB, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WXYZ), VT(A8B8G8R8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, XYZW), _T(X8B8G8R8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, XYZW), - _T(A8B8G8R8_SRGB, 8_8_8_8_UNORM, NONE, XYZW), - _T(X8B8G8R8_SRGB, 8_8_8_8_UNORM, NONE, XYZW), + _T(A8B8G8R8_SRGB, 8_8_8_8_UNORM, R8G8B8A8_UNORM, XYZW), + _T(X8B8G8R8_SRGB, 8_8_8_8_UNORM, R8G8B8A8_UNORM, XYZW), VT(A8R8G8B8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, ZYXW), _T(X8R8G8B8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, ZYXW), - _T(A8R8G8B8_SRGB, 8_8_8_8_UNORM, NONE, ZYXW), - _T(X8R8G8B8_SRGB, 8_8_8_8_UNORM, NONE, ZYXW), + _T(A8R8G8B8_SRGB, 8_8_8_8_UNORM, R8G8B8A8_UNORM, ZYXW), + _T(X8R8G8B8_SRGB, 8_8_8_8_UNORM, R8G8B8A8_UNORM, ZYXW), - V_(R10G10B10A2_UNORM, 10_10_10_2_UNORM, NONE/*R10G10B10A2_UNORM*/, WZYX), + VT(R10G10B10A2_UNORM, 10_10_10_2_UNORM, R10G10B10A2_UNORM, WZYX), + VT(B10G10R10A2_UNORM, 10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ), + _T(B10G10R10X2_UNORM, 10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ), V_(R10G10B10A2_SNORM, 10_10_10_2_SNORM, NONE, WZYX), V_(R10G10B10A2_UINT, 10_10_10_2_UINT, NONE, WZYX), V_(R10G10B10A2_USCALED, 10_10_10_2_UINT, NONE, WZYX), V_(R10G10B10A2_SSCALED, 10_10_10_2_SINT, NONE, WZYX), + _T(R11G11B10_FLOAT, 11_11_10_FLOAT, R11G11B10_FLOAT, WZYX), + _T(R9G9B9E5_FLOAT, 9_9_9_E5_FLOAT, NONE, WZYX), + _T(Z24X8_UNORM, X8Z24_UNORM, R8G8B8A8_UNORM, WZYX), _T(Z24_UNORM_S8_UINT, X8Z24_UNORM, R8G8B8A8_UNORM, WZYX), /*_T(Z32_FLOAT, Z32_FLOAT, R8G8B8A8_UNORM, WZYX),*/ @@ -177,23 +205,42 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = { /* 64-bit */ V_(R16G16B16A16_UNORM, 16_16_16_16_UNORM, NONE, WZYX), V_(R16G16B16A16_SNORM, 16_16_16_16_SNORM, NONE, WZYX), - V_(R16G16B16A16_UINT, 16_16_16_16_UINT, NONE, WZYX), - V_(R16G16B16A16_SINT, 16_16_16_16_SINT, NONE, WZYX), + VT(R16G16B16A16_UINT, 16_16_16_16_UINT, R16G16B16A16_UINT, WZYX), + _T(R16G16B16X16_UINT, 16_16_16_16_UINT, R16G16B16A16_UINT, WZYX), + VT(R16G16B16A16_SINT, 16_16_16_16_SINT, R16G16B16A16_SINT, WZYX), + _T(R16G16B16X16_SINT, 16_16_16_16_SINT, R16G16B16A16_SINT, WZYX), V_(R16G16B16A16_USCALED, 16_16_16_16_UINT, NONE, WZYX), V_(R16G16B16A16_SSCALED, 16_16_16_16_SINT, NONE, WZYX), - VT(R16G16B16A16_FLOAT, 16_16_16_16_FLOAT, NONE, WZYX), - _T(R16G16B16X16_FLOAT, 16_16_16_16_FLOAT, NONE, WZYX), + VT(R16G16B16A16_FLOAT, 16_16_16_16_FLOAT, R16G16B16A16_FLOAT, WZYX), + _T(R16G16B16X16_FLOAT, 16_16_16_16_FLOAT, R16G16B16A16_FLOAT, WZYX), + VT(R32G32_UINT, 32_32_UINT, R32G32_UINT, WZYX), + VT(R32G32_SINT, 32_32_SINT, R32G32_SINT, WZYX), + V_(R32G32_USCALED, 32_32_UINT, NONE, WZYX), + V_(R32G32_SSCALED, 32_32_SINT, NONE, WZYX), VT(R32G32_FLOAT, 32_32_FLOAT, NONE, WZYX), V_(R32G32_FIXED, 32_32_FIXED, NONE, WZYX), + _T(L32A32_UINT, 32_32_UINT, NONE, WZYX), + _T(L32A32_SINT, 32_32_SINT, NONE, WZYX), + /* 96-bit */ + V_(R32G32B32_UINT, 32_32_32_UINT, NONE, WZYX), + V_(R32G32B32_SINT, 32_32_32_SINT, NONE, WZYX), + V_(R32G32B32_USCALED, 32_32_32_UINT, NONE, WZYX), + V_(R32G32B32_SSCALED, 32_32_32_SINT, NONE, WZYX), V_(R32G32B32_FLOAT, 32_32_32_FLOAT, NONE, WZYX), V_(R32G32B32_FIXED, 32_32_32_FIXED, NONE, WZYX), /* 128-bit */ - VT(R32G32B32A32_FLOAT, 32_32_32_32_FLOAT, NONE, WZYX), - _T(R32G32B32X32_FLOAT, 32_32_32_32_FLOAT, NONE, WZYX), + VT(R32G32B32A32_UINT, 32_32_32_32_UINT, R32G32B32A32_UINT, WZYX), + _T(R32G32B32X32_UINT, 32_32_32_32_UINT, R32G32B32A32_UINT, WZYX), + VT(R32G32B32A32_SINT, 32_32_32_32_SINT, R32G32B32A32_SINT, WZYX), + _T(R32G32B32X32_SINT, 32_32_32_32_SINT, R32G32B32A32_SINT, WZYX), + V_(R32G32B32A32_USCALED, 32_32_32_32_UINT, NONE, WZYX), + V_(R32G32B32A32_SSCALED, 32_32_32_32_SINT, NONE, WZYX), + VT(R32G32B32A32_FLOAT, 32_32_32_32_FLOAT, R32G32B32A32_FLOAT, WZYX), + _T(R32G32B32X32_FLOAT, 32_32_32_32_FLOAT, R32G32B32A32_FLOAT, WZYX), V_(R32G32B32A32_FIXED, 32_32_32_32_FIXED, NONE, WZYX), }; diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c index f2cc33b..6ba25d0 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c @@ -169,6 +169,9 @@ fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, fd4_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g, cso->swizzle_b, cso->swizzle_a); + if (util_format_is_srgb(cso->format)) + so->texconst0 |= A4XX_TEX_CONST_0_SRGB; + so->texconst1 = A4XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) | A4XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl)); From robclark at kemper.freedesktop.org Wed Apr 22 18:01:56 2015 From: robclark at kemper.freedesktop.org (Rob Clark) Date: Wed, 22 Apr 2015 11:01:56 -0700 (PDT) Subject: Mesa (master): freedreno/a4xx: wire up integer texture sampling Message-ID: <20150422180156.7F3B5761EB@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 13527df143be1ca6e2f69bf4728f69efaebb3b13 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=13527df143be1ca6e2f69bf4728f69efaebb3b13 Author: Rob Clark Date: Wed Apr 22 13:05:53 2015 -0400 freedreno/a4xx: wire up integer texture sampling Similar to a3xx, the compiler needs to know the return type of the sam, etc, instructions. Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/a4xx/fd4_context.h | 7 +++-- src/gallium/drivers/freedreno/a4xx/fd4_draw.c | 8 +++-- src/gallium/drivers/freedreno/a4xx/fd4_texture.c | 34 +++++++++++++++++++++- 3 files changed, 44 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_context.h b/src/gallium/drivers/freedreno/a4xx/fd4_context.h index 87e69fa..384602a 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_context.h +++ b/src/gallium/drivers/freedreno/a4xx/fd4_context.h @@ -76,12 +76,15 @@ struct fd4_context { /* bitmask of sampler which needs coords clamped for vertex * shader: */ - unsigned vsaturate_s, vsaturate_t, vsaturate_r; + uint16_t vsaturate_s, vsaturate_t, vsaturate_r; /* bitmask of sampler which needs coords clamped for frag * shader: */ - unsigned fsaturate_s, fsaturate_t, fsaturate_r; + uint16_t fsaturate_s, fsaturate_t, fsaturate_r; + + /* bitmask of integer texture samplers */ + uint16_t vinteger_s, finteger_s; /* some state changes require a different shader variant. Keep * track of this so we know when we need to re-emit shader state diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c index 6c54f61..ae407f7 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c @@ -82,7 +82,8 @@ fixup_shader_state(struct fd_context *ctx, struct ir3_shader_key *key) if (last_key->has_per_samp || key->has_per_samp) { if ((last_key->vsaturate_s != key->vsaturate_s) || (last_key->vsaturate_t != key->vsaturate_t) || - (last_key->vsaturate_r != key->vsaturate_r)) + (last_key->vsaturate_r != key->vsaturate_r) || + (last_key->vinteger_s != key->vinteger_s)) ctx->prog.dirty |= FD_SHADER_DIRTY_VP; if ((last_key->fsaturate_s != key->fsaturate_s) || @@ -121,13 +122,16 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info) // TODO set .half_precision based on render target format, // ie. float16 and smaller use half, float32 use full.. .half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF), - .has_per_samp = fd4_ctx->fsaturate || fd4_ctx->vsaturate, + .has_per_samp = (fd4_ctx->fsaturate || fd4_ctx->vsaturate || + fd4_ctx->vinteger_s || fd4_ctx->finteger_s), .vsaturate_s = fd4_ctx->vsaturate_s, .vsaturate_t = fd4_ctx->vsaturate_t, .vsaturate_r = fd4_ctx->vsaturate_r, .fsaturate_s = fd4_ctx->fsaturate_s, .fsaturate_t = fd4_ctx->fsaturate_t, .fsaturate_r = fd4_ctx->fsaturate_r, + .vinteger_s = fd4_ctx->vinteger_s, + .finteger_s = fd4_ctx->finteger_s, }, .format = fd4_emit_format(pfb->cbufs[0]), .pformat = pipe_surface_format(pfb->cbufs[0]), diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c index 6ba25d0..ff1ff8f 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c @@ -205,11 +205,43 @@ fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, return &so->base; } +static void +fd4_set_sampler_views(struct pipe_context *pctx, unsigned shader, + unsigned start, unsigned nr, struct pipe_sampler_view **views) +{ + struct fd_context *ctx = fd_context(pctx); + struct fd4_context *fd4_ctx = fd4_context(ctx); + struct fd_texture_stateobj *tex; + uint16_t integer_s = 0, *ptr; + int i; + + fd_set_sampler_views(pctx, shader, start, nr, views); + + switch (shader) { + case PIPE_SHADER_FRAGMENT: + tex = &ctx->fragtex; + ptr = &fd4_ctx->finteger_s; + break; + case PIPE_SHADER_VERTEX: + tex = &ctx->verttex; + ptr = &fd4_ctx->vinteger_s; + break; + default: + return; + } + + for (i = 0; i < tex->num_textures; i++) + if (util_format_is_pure_integer(tex->textures[i]->format)) + integer_s |= 1 << i; + + *ptr = integer_s; +} + void fd4_texture_init(struct pipe_context *pctx) { pctx->create_sampler_state = fd4_sampler_state_create; pctx->bind_sampler_states = fd_sampler_states_bind; pctx->create_sampler_view = fd4_sampler_view_create; - pctx->set_sampler_views = fd_set_sampler_views; + pctx->set_sampler_views = fd4_set_sampler_views; } From robclark at kemper.freedesktop.org Wed Apr 22 18:01:56 2015 From: robclark at kemper.freedesktop.org (Rob Clark) Date: Wed, 22 Apr 2015 11:01:56 -0700 (PDT) Subject: Mesa (master): freedreno/a4xx: primitive-restart Message-ID: <20150422180156.9636F761EB@kemper.freedesktop.org> Module: Mesa Branch: master Commit: a86918312320ccc2e6dafae25fd5800ef62b3710 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a86918312320ccc2e6dafae25fd5800ef62b3710 Author: Rob Clark Date: Wed Apr 22 13:08:44 2015 -0400 freedreno/a4xx: primitive-restart This was the missing bit to get dolphin-emu working on a4xx. Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/a4xx/fd4_emit.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c index 353873d..4b6eb64 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -466,10 +466,15 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, * when it changes. */ if (emit->info) { + const struct pipe_draw_info *info = emit->info; uint32_t val = fd4_rasterizer_stateobj(ctx->rasterizer) ->pc_prim_vtx_cntl; + if (info->indexed && info->primitive_restart) + val |= A4XX_PC_PRIM_VTX_CNTL_PRIMITIVE_RESTART; + val |= COND(vp->writes_psize, A4XX_PC_PRIM_VTX_CNTL_PSIZE); + if (fp->total_in > 0) { uint32_t varout = align(fp->total_in, 16) / 16; if (varout > 1) From robclark at kemper.freedesktop.org Wed Apr 22 18:01:56 2015 From: robclark at kemper.freedesktop.org (Rob Clark) Date: Wed, 22 Apr 2015 11:01:56 -0700 (PDT) Subject: Mesa (master): freedreno: misc minor cleanups Message-ID: <20150422180156.ABA01761EB@kemper.freedesktop.org> Module: Mesa Branch: master Commit: cb24d3b7ad2f6c03edd86d827db2b308670ae8a7 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=cb24d3b7ad2f6c03edd86d827db2b308670ae8a7 Author: Rob Clark Date: Wed Apr 22 13:11:33 2015 -0400 freedreno: misc minor cleanups Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/a4xx/fd4_program.c | 11 +++++------ src/gallium/drivers/freedreno/ir3/disasm-a3xx.c | 3 ++- src/gallium/drivers/freedreno/ir3/instr-a3xx.h | 5 +++-- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/src/gallium/drivers/freedreno/a4xx/fd4_program.c index 4a616dc..e8f5837 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_program.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c @@ -455,8 +455,8 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit) memset(vinterp, 0, sizeof(vinterp)); memset(flatshade, 0, sizeof(flatshade)); - /* TODO: looks like we need to do int varyings in the frag - * shader on a4xx (no flatshad reg?): + /* looks like we need to do int varyings in the frag + * shader on a4xx (no flatshad reg? or a420.0 bug?): * * (sy)(ss)nop * (sy)ldlv.u32 r0.x,l[r0.x], 1 @@ -466,10 +466,9 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit) * (rpt5)nop * sam (f16)(xyzw)hr0.x, hr0.x, s#0, t#0 * - * for now, don't set FLAT on vinterp[], since that - * at least works well enough for pure float impl (ie. - * pre glsl130).. we'll have to do a bit more work to - * handle this properly: + * Possibly on later a4xx variants we'll be able to use + * something like the code below instead of workaround + * in the shader: */ #if 0 /* figure out VARYING_INTERP / FLAT_SHAD register values: */ diff --git a/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c b/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c index 22e0dc2..a5136c6 100644 --- a/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c +++ b/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c @@ -516,7 +516,8 @@ static void print_instr_cat6(instr_t *instr) printf("%c[", ss); print_reg_src((reg_t)(cat6->a.src1), true, false, false, cat6->a.src1_im, false, false, false); - printf("%+d", cat6->a.off); + if (cat6->a.off) + printf("%+d", cat6->a.off); if (ss) printf("]"); printf(", "); diff --git a/src/gallium/drivers/freedreno/ir3/instr-a3xx.h b/src/gallium/drivers/freedreno/ir3/instr-a3xx.h index 98637c7..cffa62b 100644 --- a/src/gallium/drivers/freedreno/ir3/instr-a3xx.h +++ b/src/gallium/drivers/freedreno/ir3/instr-a3xx.h @@ -625,9 +625,10 @@ typedef union PACKED { uint32_t pad1 : 31; /* dword1: */ - uint32_t pad2 : 17; + uint32_t dst : 8; + uint32_t dummy2 : 9; uint32_t type : 3; - uint32_t pad3 : 2; + uint32_t dummy3 : 2; uint32_t opc : 5; uint32_t jmp_tgt : 1; uint32_t sync : 1; From robclark at kemper.freedesktop.org Wed Apr 22 18:01:56 2015 From: robclark at kemper.freedesktop.org (Rob Clark) Date: Wed, 22 Apr 2015 11:01:56 -0700 (PDT) Subject: Mesa (master): freedreno/nir: sysval fixes Message-ID: <20150422180156.8A1C2761EB@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 632ea2a1139f4b228ca55331e411dbae9920c28d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=632ea2a1139f4b228ca55331e411dbae9920c28d Author: Rob Clark Date: Wed Apr 22 13:07:33 2015 -0400 freedreno/nir: sysval fixes Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/a4xx/fd4_emit.c | 2 +- src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c | 15 +++++++++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c index 7a244d8..353873d 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -302,7 +302,7 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit) for (i = 0; i < vp->inputs_count; i++) { uint8_t semantic = sem2name(vp->inputs[i].semantic); - if (semantic == TGSI_SEMANTIC_VERTEXID) + if (semantic == TGSI_SEMANTIC_VERTEXID_NOBASE) vertex_regid = vp->inputs[i].regid; else if (semantic == TGSI_SEMANTIC_INSTANCEID) instance_regid = vp->inputs[i].regid; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index e5f6c2a..05e7049 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -1140,6 +1140,7 @@ static void add_sysval_input(struct ir3_compile *ctx, unsigned name, so->inputs[n].interpolate = TGSI_INTERPOLATE_CONSTANT; so->total_in++; + ctx->block->ninputs = MAX2(ctx->block->ninputs, r + 1); ctx->block->inputs[r] = instr; } @@ -1785,16 +1786,22 @@ emit_instructions(struct ir3_compile *ctx) unsigned noutputs = exec_list_length(&ctx->s->outputs) * 4; /* we need to allocate big enough outputs array so that - * we can stuff the kill's at the end: + * we can stuff the kill's at the end. Likewise for vtx + * shaders, we need to leave room for sysvals: */ - if (ctx->so->type == SHADER_FRAGMENT) + if (ctx->so->type == SHADER_FRAGMENT) { noutputs += ARRAY_SIZE(ctx->kill); + } else if (ctx->so->type == SHADER_VERTEX) { + ninputs += 8; + } ctx->block = ir3_block_create(ctx->ir, 0, ninputs, noutputs); - if (ctx->so->type == SHADER_FRAGMENT) + if (ctx->so->type == SHADER_FRAGMENT) { ctx->block->noutputs -= ARRAY_SIZE(ctx->kill); - + } else if (ctx->so->type == SHADER_VERTEX) { + ctx->block->ninputs -= 8; + } /* for fragment shader, we have a single input register (usually * r0.xy) which is used as the base for bary.f varying fetch instrs: From robclark at kemper.freedesktop.org Wed Apr 22 18:01:56 2015 From: robclark at kemper.freedesktop.org (Rob Clark) Date: Wed, 22 Apr 2015 11:01:56 -0700 (PDT) Subject: Mesa (master): freedreno/a4xx: (partial) gl_FragCoord.zw Message-ID: <20150422180156.A0BD1761EB@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 1b58d8c2bf6136af2a89178f9da4e5f0631d2909 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1b58d8c2bf6136af2a89178f9da4e5f0631d2909 Author: Rob Clark Date: Wed Apr 22 13:09:28 2015 -0400 freedreno/a4xx: (partial) gl_FragCoord.zw The bit to enable .z is still commented out, as it is triggering gpu hangs in 0ad. But at least gl_FragCoord.w works now, and we know what bits we are *supposed* to set for .z (with that uncommented all piglit fragcoord tests are passing). Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/a4xx/fd4_program.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/src/gallium/drivers/freedreno/a4xx/fd4_program.c index 9c4a7d9..4a616dc 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_program.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c @@ -217,7 +217,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit) { struct stage s[MAX_STAGES]; uint32_t pos_regid, posz_regid, psize_regid, color_regid; - uint32_t face_regid, coord_regid; + uint32_t face_regid, coord_regid, zwcoord_regid; int constmode; int i, j, k; @@ -241,6 +241,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit) /* TODO get these dynamically: */ face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0); coord_regid = s[FS].v->frag_coord ? regid(0,0) : regid(63,0); + zwcoord_regid = s[FS].v->frag_coord ? regid(0,2) : regid(63,0); /* we could probably divide this up into things that need to be * emitted if frag-prog is dirty vs if vert-prog is dirty.. @@ -249,7 +250,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit) OUT_PKT0(ring, REG_A4XX_HLSQ_UPDATE_CONTROL, 1); OUT_RING(ring, 0x00000003); - OUT_PKT0(ring, REG_A4XX_HLSQ_CONTROL_0_REG, 4); + OUT_PKT0(ring, REG_A4XX_HLSQ_CONTROL_0_REG, 5); OUT_RING(ring, A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) | A4XX_HLSQ_CONTROL_0_REG_CONSTMODE(constmode) | A4XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE | @@ -260,14 +261,15 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit) A4XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART | A4XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE); OUT_RING(ring, A4XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) | - 0xfc000000 | /* XXX */ A4XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE | - A4XX_HLSQ_CONTROL_1_REG_COORDREGID(coord_regid)); + A4XX_HLSQ_CONTROL_1_REG_COORDREGID(coord_regid) | + A4XX_HLSQ_CONTROL_1_REG_ZWCOORDREGID(zwcoord_regid)); OUT_RING(ring, A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(63) | 0x3f3f000 | /* XXX */ A4XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid)); OUT_RING(ring, A4XX_HLSQ_CONTROL_3_REG_REGID(s[FS].v->pos_regid) | 0xfcfcfc00); + OUT_RING(ring, 0x00fcfcfc); /* XXX HLSQ_CONTROL_4 */ OUT_PKT0(ring, REG_A4XX_HLSQ_VS_CONTROL_REG, 5); OUT_RING(ring, A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(s[VS].constlen) | @@ -410,7 +412,11 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit) COND(s[FS].v->total_in > 0, A4XX_RB_RENDER_CONTROL2_VARYING) | COND(s[FS].v->frag_face, A4XX_RB_RENDER_CONTROL2_FACENESS) | COND(s[FS].v->frag_coord, A4XX_RB_RENDER_CONTROL2_XCOORD | - A4XX_RB_RENDER_CONTROL2_YCOORD)); + A4XX_RB_RENDER_CONTROL2_YCOORD | +// TODO enabling gl_FragCoord.z is causing lockups on 0ad (but seems +// to work everywhere else). +// A4XX_RB_RENDER_CONTROL2_ZCOORD | + A4XX_RB_RENDER_CONTROL2_WCOORD)); OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT_REG, 1); OUT_RING(ring, A4XX_RB_FS_OUTPUT_REG_MRT(1) | From aphogat at kemper.freedesktop.org Wed Apr 22 22:25:00 2015 From: aphogat at kemper.freedesktop.org (Anuj Phogat) Date: Wed, 22 Apr 2015 15:25:00 -0700 (PDT) Subject: Mesa (master): mesa: Fix typo in a comment Message-ID: <20150422222500.A8A5876102@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 2c08e3b8ea6d3f689a138d19bc812551ca966415 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=2c08e3b8ea6d3f689a138d19bc812551ca966415 Author: Anuj Phogat Date: Wed Apr 22 11:23:49 2015 -0700 mesa: Fix typo in a comment Signed-off-by: Anuj Phogat Reviewed-by: Brian Paul --- src/mesa/main/texgetimage.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c index 255d365..908bb9b 100644 --- a/src/mesa/main/texgetimage.c +++ b/src/mesa/main/texgetimage.c @@ -285,7 +285,7 @@ get_tex_rgba_compressed(struct gl_context *ctx, GLuint dimensions, } /* Depending on the base format involved we may need to apply a rebase - * tranaform (for example: if we download to a Luminance format we want + * transform (for example: if we download to a Luminance format we want * G=0 and B=0). */ if (baseFormat == GL_LUMINANCE || @@ -388,7 +388,7 @@ get_tex_rgba_uncompressed(struct gl_context *ctx, GLuint dimensions, } /* Depending on the base format involved we may need to apply a rebase - * tranaform (for example: if we download to a Luminance format we want + * transform (for example: if we download to a Luminance format we want * G=0 and B=0). */ if (texImage->_BaseFormat == GL_LUMINANCE || From airlied at kemper.freedesktop.org Wed Apr 22 22:34:40 2015 From: airlied at kemper.freedesktop.org (Dave Airlie) Date: Wed, 22 Apr 2015 15:34:40 -0700 (PDT) Subject: Mesa (master): softpipe: fix stencil write to use an integer value Message-ID: <20150422223440.35AF776102@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 8a41cd2407c1d219cba98add69609ce3a65b5435 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8a41cd2407c1d219cba98add69609ce3a65b5435 Author: Dave Airlie Date: Wed Apr 22 17:13:06 2015 +1000 softpipe: fix stencil write to use an integer value This fixes a number of regressions since 61393bdcdc3b63624bf6e9730444f5e9deeedfc8 u_tile: fix stencil texturing tests under softpipe Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89960 Reviewed-by: Brian Paul Reviewed-by: Roland Scheidegger Signed-off-by: Dave Airlie --- src/gallium/drivers/softpipe/sp_fs_exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c index f3814fd..369ab6e 100644 --- a/src/gallium/drivers/softpipe/sp_fs_exec.c +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c @@ -164,7 +164,7 @@ exec_run( const struct sp_fragment_shader_variant *var, uint j; for (j = 0; j < 4; j++) - quad->output.stencil[j] = (unsigned)machine->Outputs[i].xyzw[1].f[j]; + quad->output.stencil[j] = (unsigned)machine->Outputs[i].xyzw[1].u[j]; } break; } From jekstrand at kemper.freedesktop.org Wed Apr 22 23:01:35 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 22 Apr 2015 16:01:35 -0700 (PDT) Subject: Mesa (master): i965: Remove the context parameter from brw_texture_offset Message-ID: <20150422230135.5B7D776102@kemper.freedesktop.org> Module: Mesa Branch: master Commit: ceb6e5eebe13b85f57cf5a7a22371c10170943a3 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ceb6e5eebe13b85f57cf5a7a22371c10170943a3 Author: Jason Ekstrand Date: Tue Apr 14 14:23:40 2015 -0700 i965: Remove the context parameter from brw_texture_offset It wasn't really being used anyway. We used it to assert that gpu_shader5 is supported in the back-end but that should be caught by the front-end. Signed-off-by: Jason Ekstrand Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 2 +- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 2 +- src/mesa/drivers/dri/i965/brw_shader.cpp | 8 +------- src/mesa/drivers/dri/i965/brw_shader.h | 3 +-- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 2 +- 5 files changed, 5 insertions(+), 12 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index e1687ed..f2f78be 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -1707,7 +1707,7 @@ fs_visitor::nir_emit_texture(nir_tex_instr *instr) for (unsigned i = 0; i < 3; i++) { if (instr->const_offset[i] != 0) { assert(offset_components == 0); - tex_offset = fs_reg(brw_texture_offset(ctx, instr->const_offset, 3)); + tex_offset = fs_reg(brw_texture_offset(instr->const_offset, 3)); break; } } diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index c66ec3e..2b8dfe4 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -2351,7 +2351,7 @@ fs_visitor::visit(ir_texture *ir) * offset, and a non-constant offset. */ offset_value = - fs_reg(brw_texture_offset(ctx, const_offset->value.i, + fs_reg(brw_texture_offset(const_offset->value.i, const_offset->type->vector_elements)); } else { ir->offset->accept(this); diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index d0d5cf9..91ffb53 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -393,14 +393,8 @@ brw_math_function(enum opcode op) } uint32_t -brw_texture_offset(struct gl_context *ctx, int *offsets, - unsigned num_components) +brw_texture_offset(int *offsets, unsigned num_components) { - /* If the driver does not support GL_ARB_gpu_shader5, the offset - * must be constant. - */ - assert(offsets != NULL || ctx->Extensions.ARB_gpu_shader5); - if (!offsets) return 0; /* nonconstant offset; caller will handle it. */ /* Combine all three offsets into a single unsigned dword: diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index 8a3263e..e6246ed 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -206,8 +206,7 @@ public: virtual void invalidate_live_intervals() = 0; }; -uint32_t brw_texture_offset(struct gl_context *ctx, int *offsets, - unsigned num_components); +uint32_t brw_texture_offset(int *offsets, unsigned num_components); #endif /* __cplusplus */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 3d16caa..20e6109 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -2630,7 +2630,7 @@ vec4_visitor::visit(ir_texture *ir) if (ir->offset != NULL && !has_nonconstant_offset) { inst->offset = - brw_texture_offset(ctx, ir->offset->as_constant()->value.i, + brw_texture_offset(ir->offset->as_constant()->value.i, ir->offset->type->vector_elements); } From jekstrand at kemper.freedesktop.org Wed Apr 22 23:01:35 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 22 Apr 2015 16:01:35 -0700 (PDT) Subject: Mesa (master): i965/eu: Add a devinfo parameter to brw_compile Message-ID: <20150422230135.6F4E376104@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 6219a8f098f4e18ff974db380e97a351c8e78906 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6219a8f098f4e18ff974db380e97a351c8e78906 Author: Jason Ekstrand Date: Tue Apr 14 16:57:24 2015 -0700 i965/eu: Add a devinfo parameter to brw_compile Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_eu.c | 1 + src/mesa/drivers/dri/i965/brw_eu.h | 1 + 2 files changed, 2 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c index c21d14d..ccd659e 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.c +++ b/src/mesa/drivers/dri/i965/brw_eu.c @@ -218,6 +218,7 @@ brw_init_compile(struct brw_context *brw, struct brw_compile *p, void *mem_ctx) memset(p, 0, sizeof(*p)); p->brw = brw; + p->devinfo = brw->intelScreen->devinfo; /* * Set the initial instruction store array size to 1024, if found that * isn't enough, then it will double the store size at brw_next_insn() diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 31c1492..dc62cf0 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -70,6 +70,7 @@ struct brw_compile { bool single_program_flow; bool compressed; struct brw_context *brw; + const struct brw_device_info *devinfo; /* Control flow stacks: * - if_stack contains IF and ELSE instructions which must be patched From jekstrand at kemper.freedesktop.org Wed Apr 22 23:01:35 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 22 Apr 2015 16:01:35 -0700 (PDT) Subject: Mesa (master): i965: Do better fake context setup in unit tests Message-ID: <20150422230135.64A5576103@kemper.freedesktop.org> Module: Mesa Branch: master Commit: a921475c22fe88fb6921a1f628d3ea05d55ba43b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a921475c22fe88fb6921a1f628d3ea05d55ba43b Author: Jason Ekstrand Date: Fri Apr 17 15:23:18 2015 -0700 i965: Do better fake context setup in unit tests In future tests, we will start relying on devinfo and not just brw in the compiler. Changing this now keeps these tests from failing in the future. Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/test_eu_compact.c | 5 ++++- src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp | 6 +++++- src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp | 6 +++++- src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp | 6 +++++- src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp | 6 +++++- 5 files changed, 24 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/test_eu_compact.c b/src/mesa/drivers/dri/i965/test_eu_compact.c index d95d2c0..f461654 100644 --- a/src/mesa/drivers/dri/i965/test_eu_compact.c +++ b/src/mesa/drivers/dri/i965/test_eu_compact.c @@ -291,7 +291,10 @@ int main(int argc, char **argv) { struct brw_context *brw = calloc(1, sizeof(*brw)); - brw->gen = 6; + struct brw_device_info *devinfo = calloc(1, sizeof(*devinfo)); + brw->intelScreen = calloc(1, sizeof(*brw->intelScreen)); + brw->intelScreen->devinfo = devinfo; + brw->gen = devinfo->gen = 6; bool fail = false; for (brw->gen = 6; brw->gen <= 7; brw->gen++) { diff --git a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp index ed8744d..206a76e 100644 --- a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp +++ b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp @@ -31,6 +31,7 @@ class cmod_propagation_test : public ::testing::Test { public: struct brw_context *brw; + struct brw_device_info *devinfo; struct gl_context *ctx; struct brw_wm_prog_data *prog_data; struct gl_shader_program *shader_prog; @@ -51,6 +52,9 @@ public: void cmod_propagation_test::SetUp() { brw = (struct brw_context *)calloc(1, sizeof(*brw)); + devinfo = (struct brw_device_info *)calloc(1, sizeof(*brw)); + brw->intelScreen = (struct intel_screen *)calloc(1, sizeof(*brw->intelScreen)); + brw->intelScreen->devinfo = devinfo; ctx = &brw->ctx; fp = ralloc(NULL, struct brw_fragment_program); @@ -61,7 +65,7 @@ void cmod_propagation_test::SetUp() _mesa_init_fragment_program(ctx, &fp->program, GL_FRAGMENT_SHADER, 0); - brw->gen = 4; + brw->gen = devinfo->gen = 4; } static fs_inst * diff --git a/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp index 6f762bc..4c91af3 100644 --- a/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp +++ b/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp @@ -31,6 +31,7 @@ class saturate_propagation_test : public ::testing::Test { public: struct brw_context *brw; + struct brw_device_info *devinfo; struct gl_context *ctx; struct brw_wm_prog_data *prog_data; struct gl_shader_program *shader_prog; @@ -51,6 +52,9 @@ public: void saturate_propagation_test::SetUp() { brw = (struct brw_context *)calloc(1, sizeof(*brw)); + devinfo = (struct brw_device_info *)calloc(1, sizeof(*brw)); + brw->intelScreen = (struct intel_screen *)calloc(1, sizeof(*brw->intelScreen)); + brw->intelScreen->devinfo = devinfo; ctx = &brw->ctx; fp = ralloc(NULL, struct brw_fragment_program); @@ -61,7 +65,7 @@ void saturate_propagation_test::SetUp() _mesa_init_fragment_program(ctx, &fp->program, GL_FRAGMENT_SHADER, 0); - brw->gen = 4; + brw->gen = devinfo->gen = 4; } static fs_inst * diff --git a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp index f9e4ce1..2ef52e9 100644 --- a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp @@ -34,6 +34,7 @@ class copy_propagation_test : public ::testing::Test { public: struct brw_context *brw; + struct brw_device_info *devinfo; struct gl_context *ctx; struct gl_shader_program *shader_prog; struct brw_vertex_program *vp; @@ -93,6 +94,9 @@ protected: void copy_propagation_test::SetUp() { brw = (struct brw_context *)calloc(1, sizeof(*brw)); + devinfo = (struct brw_device_info *)calloc(1, sizeof(*brw)); + brw->intelScreen = (struct intel_screen *)calloc(1, sizeof(*brw->intelScreen)); + brw->intelScreen->devinfo = devinfo; ctx = &brw->ctx; vp = ralloc(NULL, struct brw_vertex_program); @@ -103,7 +107,7 @@ void copy_propagation_test::SetUp() _mesa_init_vertex_program(ctx, &vp->program, GL_VERTEX_SHADER, 0); - brw->gen = 4; + brw->gen = devinfo->gen = 4; } static void diff --git a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp index 17bece5..c8c6757 100644 --- a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp +++ b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp @@ -36,6 +36,7 @@ class register_coalesce_test : public ::testing::Test { public: struct brw_context *brw; + struct brw_device_info *devinfo; struct gl_context *ctx; struct gl_shader_program *shader_prog; struct brw_vertex_program *vp; @@ -96,6 +97,9 @@ protected: void register_coalesce_test::SetUp() { brw = (struct brw_context *)calloc(1, sizeof(*brw)); + devinfo = (struct brw_device_info *)calloc(1, sizeof(*brw)); + brw->intelScreen = (struct intel_screen *)calloc(1, sizeof(*brw->intelScreen)); + brw->intelScreen->devinfo = devinfo; ctx = &brw->ctx; vp = ralloc(NULL, struct brw_vertex_program); @@ -106,7 +110,7 @@ void register_coalesce_test::SetUp() _mesa_init_vertex_program(ctx, &vp->program, GL_VERTEX_SHADER, 0); - brw->gen = 4; + brw->gen = devinfo->gen = 4; } static void From jekstrand at kemper.freedesktop.org Wed Apr 22 23:01:35 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 22 Apr 2015 16:01:35 -0700 (PDT) Subject: Mesa (master): i965: Make instruction compaction take a device_info instead of a context Message-ID: <20150422230135.91F5476102@kemper.freedesktop.org> Module: Mesa Branch: master Commit: c3e5f32840fbc7b44a15b2c7c7d7299cbd6d332a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c3e5f32840fbc7b44a15b2c7c7d7299cbd6d332a Author: Jason Ekstrand Date: Wed Apr 15 13:19:21 2015 -0700 i965: Make instruction compaction take a device_info instead of a context Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_eu.c | 2 +- src/mesa/drivers/dri/i965/brw_eu.h | 12 +- src/mesa/drivers/dri/i965/brw_eu_compact.c | 203 ++++++++++++++------------- src/mesa/drivers/dri/i965/test_eu_compact.c | 4 +- 4 files changed, 112 insertions(+), 109 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c index b98a089..deeb4fe 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.c +++ b/src/mesa/drivers/dri/i965/brw_eu.c @@ -282,7 +282,7 @@ brw_disassemble(struct brw_context *brw, ((uint32_t *)insn)[0]); } - brw_uncompact_instruction(brw, &uncompacted, compacted); + brw_uncompact_instruction(brw->intelScreen->devinfo, &uncompacted, compacted); insn = &uncompacted; offset += 8; } else { diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 1bb5686..e65c270 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -466,13 +466,13 @@ enum brw_conditional_mod brw_swap_cmod(uint32_t cmod); void brw_init_compaction_tables(struct brw_context *brw); void brw_compact_instructions(struct brw_compile *p, int start_offset, int num_annotations, struct annotation *annotation); -void brw_uncompact_instruction(struct brw_context *brw, brw_inst *dst, - brw_compact_inst *src); -bool brw_try_compact_instruction(struct brw_context *brw, brw_compact_inst *dst, - brw_inst *src); +void brw_uncompact_instruction(const struct brw_device_info *devinfo, + brw_inst *dst, brw_compact_inst *src); +bool brw_try_compact_instruction(const struct brw_device_info *devinfo, + brw_compact_inst *dst, brw_inst *src); -void brw_debug_compact_uncompact(struct brw_context *brw, brw_inst *orig, - brw_inst *uncompacted); +void brw_debug_compact_uncompact(struct brw_context *brw, + brw_inst *orig, brw_inst *uncompacted); static inline int next_offset(const struct brw_device_info *devinfo, void *store, int offset) diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c b/src/mesa/drivers/dri/i965/brw_eu_compact.c index 23aac0a..428cdf4 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c @@ -670,9 +670,10 @@ static const uint16_t *subreg_table; static const uint16_t *src_index_table; static bool -set_control_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src) +set_control_index(const struct brw_device_info *devinfo, + brw_compact_inst *dst, brw_inst *src) { - uint32_t uncompacted = brw->gen >= 8 /* 17b/G45; 19b/IVB+ */ + uint32_t uncompacted = devinfo->gen >= 8 /* 17b/G45; 19b/IVB+ */ ? (brw_inst_bits(src, 33, 31) << 16) | /* 3b */ (brw_inst_bits(src, 23, 12) << 4) | /* 12b */ (brw_inst_bits(src, 10, 9) << 2) | /* 2b */ @@ -684,7 +685,7 @@ set_control_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src) /* On gen7, the flag register and subregister numbers are integrated into * the control index. */ - if (brw->gen == 7) + if (devinfo->gen == 7) uncompacted |= brw_inst_bits(src, 90, 89) << 17; /* 2b */ for (int i = 0; i < 32; i++) { @@ -698,10 +699,10 @@ set_control_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src) } static bool -set_datatype_index(struct brw_context *brw, brw_compact_inst *dst, +set_datatype_index(const struct brw_device_info *devinfo, brw_compact_inst *dst, brw_inst *src) { - uint32_t uncompacted = brw->gen >= 8 /* 18b/G45+; 21b/BDW+ */ + uint32_t uncompacted = devinfo->gen >= 8 /* 18b/G45+; 21b/BDW+ */ ? (brw_inst_bits(src, 63, 61) << 18) | /* 3b */ (brw_inst_bits(src, 94, 89) << 12) | /* 6b */ (brw_inst_bits(src, 46, 35)) /* 12b */ @@ -719,8 +720,8 @@ set_datatype_index(struct brw_context *brw, brw_compact_inst *dst, } static bool -set_subreg_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src, - bool is_immediate) +set_subreg_index(const struct brw_device_info *devinfo, brw_compact_inst *dst, + brw_inst *src, bool is_immediate) { uint16_t uncompacted = /* 15b */ (brw_inst_bits(src, 52, 48) << 0) | /* 5b */ @@ -754,7 +755,8 @@ get_src_index(uint16_t uncompacted, } static bool -set_src0_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src) +set_src0_index(const struct brw_device_info *devinfo, + brw_compact_inst *dst, brw_inst *src) { uint16_t compacted; uint16_t uncompacted = brw_inst_bits(src, 88, 77); /* 12b */ @@ -768,13 +770,13 @@ set_src0_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src) } static bool -set_src1_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src, - bool is_immediate) +set_src1_index(const struct brw_device_info *devinfo, brw_compact_inst *dst, + brw_inst *src, bool is_immediate) { uint16_t compacted; if (is_immediate) { - compacted = (brw_inst_imm_ud(brw->intelScreen->devinfo, src) >> 8) & 0x1f; + compacted = (brw_inst_imm_ud(devinfo, src) >> 8) & 0x1f; } else { uint16_t uncompacted = brw_inst_bits(src, 120, 109); /* 12b */ @@ -788,15 +790,16 @@ set_src1_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src, } static bool -set_3src_control_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src) +set_3src_control_index(const struct brw_device_info *devinfo, + brw_compact_inst *dst, brw_inst *src) { - assert(brw->gen >= 8); + assert(devinfo->gen >= 8); uint32_t uncompacted = /* 24b/BDW; 26b/CHV */ (brw_inst_bits(src, 34, 32) << 21) | /* 3b */ (brw_inst_bits(src, 28, 8)); /* 21b */ - if (brw->gen >= 9 || brw->is_cherryview) + if (devinfo->gen >= 9 || devinfo->is_cherryview) uncompacted |= brw_inst_bits(src, 36, 35) << 24; /* 2b */ for (int i = 0; i < ARRAY_SIZE(gen8_3src_control_index_table); i++) { @@ -810,9 +813,10 @@ set_3src_control_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst } static bool -set_3src_source_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src) +set_3src_source_index(const struct brw_device_info *devinfo, + brw_compact_inst *dst, brw_inst *src) { - assert(brw->gen >= 8); + assert(devinfo->gen >= 8); uint64_t uncompacted = /* 46b/BDW; 49b/CHV */ (brw_inst_bits(src, 83, 83) << 43) | /* 1b */ @@ -821,7 +825,7 @@ set_3src_source_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst * (brw_inst_bits(src, 72, 65) << 19) | /* 8b */ (brw_inst_bits(src, 55, 37)); /* 19b */ - if (brw->gen >= 9 || brw->is_cherryview) { + if (devinfo->gen >= 9 || devinfo->is_cherryview) { uncompacted |= (brw_inst_bits(src, 126, 125) << 47) | /* 2b */ (brw_inst_bits(src, 105, 104) << 45) | /* 2b */ @@ -843,7 +847,7 @@ set_3src_source_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst * } static bool -has_unmapped_bits(struct brw_context *brw, brw_inst *src) +has_unmapped_bits(const struct brw_device_info *devinfo, brw_inst *src) { /* Check for instruction bits that don't map to any of the fields of the * compacted instruction. The instruction cannot be compacted if any of @@ -854,31 +858,31 @@ has_unmapped_bits(struct brw_context *brw, brw_inst *src) * - Imm64[27:31] (bits 91-95 on Gen7, bit 95 on Gen8) * - UIP[31] (bit 95 on Gen8) */ - if (brw->gen >= 8) { + if (devinfo->gen >= 8) { assert(!brw_inst_bits(src, 7, 7)); return brw_inst_bits(src, 95, 95) || brw_inst_bits(src, 47, 47) || brw_inst_bits(src, 11, 11); } else { assert(!brw_inst_bits(src, 7, 7) && - !(brw->gen < 7 && brw_inst_bits(src, 90, 90))); + !(devinfo->gen < 7 && brw_inst_bits(src, 90, 90))); return brw_inst_bits(src, 95, 91) || brw_inst_bits(src, 47, 47); } } static bool -has_3src_unmapped_bits(struct brw_context *brw, brw_inst *src) +has_3src_unmapped_bits(const struct brw_device_info *devinfo, brw_inst *src) { /* Check for three-source instruction bits that don't map to any of the * fields of the compacted instruction. All of them seem to be reserved * bits currently. */ - if (brw->gen >= 9 || brw->is_cherryview) { + if (devinfo->gen >= 9 || devinfo->is_cherryview) { assert(!brw_inst_bits(src, 127, 127) && !brw_inst_bits(src, 7, 7)); } else { - assert(brw->gen >= 8); + assert(devinfo->gen >= 8); assert(!brw_inst_bits(src, 127, 126) && !brw_inst_bits(src, 105, 105) && !brw_inst_bits(src, 84, 84) && @@ -890,24 +894,23 @@ has_3src_unmapped_bits(struct brw_context *brw, brw_inst *src) } static bool -brw_try_compact_3src_instruction(struct brw_context *brw, brw_compact_inst *dst, - brw_inst *src) +brw_try_compact_3src_instruction(const struct brw_device_info *devinfo, + brw_compact_inst *dst, brw_inst *src) { - assert(brw->gen >= 8); + assert(devinfo->gen >= 8); - if (has_3src_unmapped_bits(brw, src)) + if (has_3src_unmapped_bits(devinfo, src)) return false; - const struct brw_device_info *devinfo = brw->intelScreen->devinfo; #define compact(field) \ brw_compact_inst_set_3src_##field(dst, brw_inst_3src_##field(devinfo, src)) compact(opcode); - if (!set_3src_control_index(brw, dst, src)) + if (!set_3src_control_index(devinfo, dst, src)) return false; - if (!set_3src_source_index(brw, dst, src)) + if (!set_3src_source_index(devinfo, dst, src)) return false; compact(dst_reg_nr); @@ -959,18 +962,17 @@ is_3src(uint32_t op) * brw_compact_instructions(). */ bool -brw_try_compact_instruction(struct brw_context *brw, brw_compact_inst *dst, - brw_inst *src) +brw_try_compact_instruction(const struct brw_device_info *devinfo, + brw_compact_inst *dst, brw_inst *src) { - const struct brw_device_info *devinfo = brw->intelScreen->devinfo; brw_compact_inst temp; assert(brw_inst_cmpt_control(devinfo, src) == 0); if (is_3src(brw_inst_opcode(devinfo, src))) { - if (brw->gen >= 8) { + if (devinfo->gen >= 8) { memset(&temp, 0, sizeof(temp)); - if (brw_try_compact_3src_instruction(brw, &temp, src)) { + if (brw_try_compact_3src_instruction(devinfo, &temp, src)) { *dst = temp; return true; } else { @@ -985,34 +987,35 @@ brw_try_compact_instruction(struct brw_context *brw, brw_compact_inst *dst, brw_inst_src0_reg_file(devinfo, src) == BRW_IMMEDIATE_VALUE || brw_inst_src1_reg_file(devinfo, src) == BRW_IMMEDIATE_VALUE; if (is_immediate && - (brw->gen < 6 || !is_compactable_immediate(brw_inst_imm_ud(devinfo, src)))) { + (devinfo->gen < 6 || + !is_compactable_immediate(brw_inst_imm_ud(devinfo, src)))) { return false; } - if (has_unmapped_bits(brw, src)) + if (has_unmapped_bits(devinfo, src)) return false; memset(&temp, 0, sizeof(temp)); brw_compact_inst_set_opcode(&temp, brw_inst_opcode(devinfo, src)); brw_compact_inst_set_debug_control(&temp, brw_inst_debug_control(devinfo, src)); - if (!set_control_index(brw, &temp, src)) + if (!set_control_index(devinfo, &temp, src)) return false; - if (!set_datatype_index(brw, &temp, src)) + if (!set_datatype_index(devinfo, &temp, src)) return false; - if (!set_subreg_index(brw, &temp, src, is_immediate)) + if (!set_subreg_index(devinfo, &temp, src, is_immediate)) return false; brw_compact_inst_set_acc_wr_control(&temp, brw_inst_acc_wr_control(devinfo, src)); brw_compact_inst_set_cond_modifier(&temp, brw_inst_cond_modifier(devinfo, src)); - if (brw->gen <= 6) + if (devinfo->gen <= 6) brw_compact_inst_set_flag_subreg_nr(&temp, brw_inst_flag_subreg_nr(devinfo, src)); brw_compact_inst_set_cmpt_control(&temp, true); - if (!set_src0_index(brw, &temp, src)) + if (!set_src0_index(devinfo, &temp, src)) return false; - if (!set_src1_index(brw, &temp, src, is_immediate)) + if (!set_src1_index(devinfo, &temp, src, is_immediate)) return false; brw_compact_inst_set_dst_reg_nr(&temp, brw_inst_dst_da_reg_nr(devinfo, src)); brw_compact_inst_set_src0_reg_nr(&temp, brw_inst_src0_da_reg_nr(devinfo, src)); @@ -1030,13 +1033,13 @@ brw_try_compact_instruction(struct brw_context *brw, brw_compact_inst *dst, } static void -set_uncompacted_control(struct brw_context *brw, brw_inst *dst, +set_uncompacted_control(const struct brw_device_info *devinfo, brw_inst *dst, brw_compact_inst *src) { uint32_t uncompacted = control_index_table[brw_compact_inst_control_index(src)]; - if (brw->gen >= 8) { + if (devinfo->gen >= 8) { brw_inst_set_bits(dst, 33, 31, (uncompacted >> 16)); brw_inst_set_bits(dst, 23, 12, (uncompacted >> 4) & 0xfff); brw_inst_set_bits(dst, 10, 9, (uncompacted >> 2) & 0x3); @@ -1046,18 +1049,18 @@ set_uncompacted_control(struct brw_context *brw, brw_inst *dst, brw_inst_set_bits(dst, 31, 31, (uncompacted >> 16) & 0x1); brw_inst_set_bits(dst, 23, 8, (uncompacted & 0xffff)); - if (brw->gen == 7) + if (devinfo->gen == 7) brw_inst_set_bits(dst, 90, 89, uncompacted >> 17); } } static void -set_uncompacted_datatype(struct brw_context *brw, brw_inst *dst, +set_uncompacted_datatype(const struct brw_device_info *devinfo, brw_inst *dst, brw_compact_inst *src) { uint32_t uncompacted = datatype_table[brw_compact_inst_datatype_index(src)]; - if (brw->gen >= 8) { + if (devinfo->gen >= 8) { brw_inst_set_bits(dst, 63, 61, (uncompacted >> 18)); brw_inst_set_bits(dst, 94, 89, (uncompacted >> 12) & 0x3f); brw_inst_set_bits(dst, 46, 35, (uncompacted >> 0) & 0xfff); @@ -1068,7 +1071,7 @@ set_uncompacted_datatype(struct brw_context *brw, brw_inst *dst, } static void -set_uncompacted_subreg(struct brw_context *brw, brw_inst *dst, +set_uncompacted_subreg(const struct brw_device_info *devinfo, brw_inst *dst, brw_compact_inst *src) { uint16_t uncompacted = subreg_table[brw_compact_inst_subreg_index(src)]; @@ -1079,7 +1082,7 @@ set_uncompacted_subreg(struct brw_context *brw, brw_inst *dst, } static void -set_uncompacted_src0(struct brw_context *brw, brw_inst *dst, +set_uncompacted_src0(const struct brw_device_info *devinfo, brw_inst *dst, brw_compact_inst *src) { uint32_t compacted = brw_compact_inst_src0_index(src); @@ -1089,13 +1092,13 @@ set_uncompacted_src0(struct brw_context *brw, brw_inst *dst, } static void -set_uncompacted_src1(struct brw_context *brw, brw_inst *dst, +set_uncompacted_src1(const struct brw_device_info *devinfo, brw_inst *dst, brw_compact_inst *src, bool is_immediate) { if (is_immediate) { signed high5 = brw_compact_inst_src1_index(src); /* Replicate top bit of src1_index into high 20 bits of the immediate. */ - brw_inst_set_imm_ud(brw->intelScreen->devinfo, dst, (high5 << 27) >> 19); + brw_inst_set_imm_ud(devinfo, dst, (high5 << 27) >> 19); } else { uint16_t uncompacted = src_index_table[brw_compact_inst_src1_index(src)]; @@ -1104,10 +1107,10 @@ set_uncompacted_src1(struct brw_context *brw, brw_inst *dst, } static void -set_uncompacted_3src_control_index(struct brw_context *brw, brw_inst *dst, - brw_compact_inst *src) +set_uncompacted_3src_control_index(const struct brw_device_info *devinfo, + brw_inst *dst, brw_compact_inst *src) { - assert(brw->gen >= 8); + assert(devinfo->gen >= 8); uint32_t compacted = brw_compact_inst_3src_control_index(src); uint32_t uncompacted = gen8_3src_control_index_table[compacted]; @@ -1115,15 +1118,15 @@ set_uncompacted_3src_control_index(struct brw_context *brw, brw_inst *dst, brw_inst_set_bits(dst, 34, 32, (uncompacted >> 21) & 0x7); brw_inst_set_bits(dst, 28, 8, (uncompacted >> 0) & 0x1fffff); - if (brw->gen >= 9 || brw->is_cherryview) + if (devinfo->gen >= 9 || devinfo->is_cherryview) brw_inst_set_bits(dst, 36, 35, (uncompacted >> 24) & 0x3); } static void -set_uncompacted_3src_source_index(struct brw_context *brw, brw_inst *dst, - brw_compact_inst *src) +set_uncompacted_3src_source_index(const struct brw_device_info *devinfo, + brw_inst *dst, brw_compact_inst *src) { - assert(brw->gen >= 8); + assert(devinfo->gen >= 8); uint32_t compacted = brw_compact_inst_3src_source_index(src); uint64_t uncompacted = gen8_3src_source_index_table[compacted]; @@ -1134,7 +1137,7 @@ set_uncompacted_3src_source_index(struct brw_context *brw, brw_inst *dst, brw_inst_set_bits(dst, 72, 65, (uncompacted >> 19) & 0xff); brw_inst_set_bits(dst, 55, 37, (uncompacted >> 0) & 0x7ffff); - if (brw->gen >= 9 || brw->is_cherryview) { + if (devinfo->gen >= 9 || devinfo->is_cherryview) { brw_inst_set_bits(dst, 126, 125, (uncompacted >> 47) & 0x3); brw_inst_set_bits(dst, 105, 104, (uncompacted >> 45) & 0x3); brw_inst_set_bits(dst, 84, 84, (uncompacted >> 44) & 0x1); @@ -1145,19 +1148,18 @@ set_uncompacted_3src_source_index(struct brw_context *brw, brw_inst *dst, } static void -brw_uncompact_3src_instruction(struct brw_context *brw, brw_inst *dst, - brw_compact_inst *src) +brw_uncompact_3src_instruction(const struct brw_device_info *devinfo, + brw_inst *dst, brw_compact_inst *src) { - assert(brw->gen >= 8); + assert(devinfo->gen >= 8); - const struct brw_device_info *devinfo = brw->intelScreen->devinfo; #define uncompact(field) \ brw_inst_set_3src_##field(devinfo, dst, brw_compact_inst_3src_##field(src)) uncompact(opcode); - set_uncompacted_3src_control_index(brw, dst, src); - set_uncompacted_3src_source_index(brw, dst, src); + set_uncompacted_3src_control_index(devinfo, dst, src); + set_uncompacted_3src_source_index(devinfo, dst, src); uncompact(dst_reg_nr); uncompact(src0_rep_ctrl); @@ -1177,35 +1179,34 @@ brw_uncompact_3src_instruction(struct brw_context *brw, brw_inst *dst, } void -brw_uncompact_instruction(struct brw_context *brw, brw_inst *dst, +brw_uncompact_instruction(const struct brw_device_info *devinfo, brw_inst *dst, brw_compact_inst *src) { memset(dst, 0, sizeof(*dst)); - const struct brw_device_info *devinfo = brw->intelScreen->devinfo; - if (brw->gen >= 8 && is_3src(brw_compact_inst_3src_opcode(src))) { - brw_uncompact_3src_instruction(brw, dst, src); + if (devinfo->gen >= 8 && is_3src(brw_compact_inst_3src_opcode(src))) { + brw_uncompact_3src_instruction(devinfo, dst, src); return; } brw_inst_set_opcode(devinfo, dst, brw_compact_inst_opcode(src)); brw_inst_set_debug_control(devinfo, dst, brw_compact_inst_debug_control(src)); - set_uncompacted_control(brw, dst, src); - set_uncompacted_datatype(brw, dst, src); + set_uncompacted_control(devinfo, dst, src); + set_uncompacted_datatype(devinfo, dst, src); /* src0/1 register file fields are in the datatype table. */ bool is_immediate = brw_inst_src0_reg_file(devinfo, dst) == BRW_IMMEDIATE_VALUE || brw_inst_src1_reg_file(devinfo, dst) == BRW_IMMEDIATE_VALUE; - set_uncompacted_subreg(brw, dst, src); + set_uncompacted_subreg(devinfo, dst, src); brw_inst_set_acc_wr_control(devinfo, dst, brw_compact_inst_acc_wr_control(src)); brw_inst_set_cond_modifier(devinfo, dst, brw_compact_inst_cond_modifier(src)); - if (brw->gen <= 6) + if (devinfo->gen <= 6) brw_inst_set_flag_subreg_nr(devinfo, dst, brw_compact_inst_flag_subreg_nr(src)); - set_uncompacted_src0(brw, dst, src); - set_uncompacted_src1(brw, dst, src, is_immediate); + set_uncompacted_src0(devinfo, dst, src); + set_uncompacted_src1(devinfo, dst, src, is_immediate); brw_inst_set_dst_da_reg_nr(devinfo, dst, brw_compact_inst_dst_reg_nr(src)); brw_inst_set_src0_da_reg_nr(devinfo, dst, brw_compact_inst_src0_reg_nr(src)); if (is_immediate) { @@ -1254,15 +1255,14 @@ compacted_between(int old_ip, int old_target_ip, int *compacted_counts) } static void -update_uip_jip(struct brw_context *brw, brw_inst *insn, +update_uip_jip(const struct brw_device_info *devinfo, brw_inst *insn, int this_old_ip, int *compacted_counts) { - const struct brw_device_info *devinfo = brw->intelScreen->devinfo; /* JIP and UIP are in units of: * - bytes on Gen8+; and * - compacted instructions on Gen6+. */ - int shift = brw->gen >= 8 ? 3 : 0; + int shift = devinfo->gen >= 8 ? 3 : 0; int32_t jip_compacted = brw_inst_jip(devinfo, insn) >> shift; jip_compacted -= compacted_between(this_old_ip, @@ -1272,7 +1272,7 @@ update_uip_jip(struct brw_context *brw, brw_inst *insn, if (brw_inst_opcode(devinfo, insn) == BRW_OPCODE_ENDIF || brw_inst_opcode(devinfo, insn) == BRW_OPCODE_WHILE || - (brw_inst_opcode(devinfo, insn) == BRW_OPCODE_ELSE && brw->gen <= 7)) + (brw_inst_opcode(devinfo, insn) == BRW_OPCODE_ELSE && devinfo->gen <= 7)) return; int32_t uip_compacted = brw_inst_uip(devinfo, insn) >> shift; @@ -1283,17 +1283,16 @@ update_uip_jip(struct brw_context *brw, brw_inst *insn, } static void -update_gen4_jump_count(struct brw_context *brw, brw_inst *insn, +update_gen4_jump_count(const struct brw_device_info *devinfo, brw_inst *insn, int this_old_ip, int *compacted_counts) { - assert(brw->gen == 5 || brw->is_g4x); + assert(devinfo->gen == 5 || devinfo->is_g4x); - const struct brw_device_info *devinfo = brw->intelScreen->devinfo; /* Jump Count is in units of: * - uncompacted instructions on G45; and * - compacted instructions on Gen5. */ - int shift = brw->is_g4x ? 1 : 0; + int shift = devinfo->is_g4x ? 1 : 0; int jump_count_compacted = brw_inst_gen4_jump_count(devinfo, insn) << shift; @@ -1379,7 +1378,7 @@ brw_compact_instructions(struct brw_compile *p, int start_offset, */ int old_ip[(p->next_insn_offset - start_offset) / sizeof(brw_compact_inst)]; - if (brw->gen == 4 && !brw->is_g4x) + if (devinfo->gen == 4 && !devinfo->is_g4x) return; int offset = 0; @@ -1394,12 +1393,12 @@ brw_compact_instructions(struct brw_compile *p, int start_offset, brw_inst saved = *src; - if (brw_try_compact_instruction(brw, dst, src)) { + if (brw_try_compact_instruction(devinfo, dst, src)) { compacted_count++; if (INTEL_DEBUG) { brw_inst uncompacted; - brw_uncompact_instruction(brw, &uncompacted, dst); + brw_uncompact_instruction(devinfo, &uncompacted, dst); if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) { brw_debug_compact_uncompact(brw, &saved, &uncompacted); } @@ -1415,11 +1414,12 @@ brw_compact_instructions(struct brw_compile *p, int start_offset, (((brw_inst_opcode(devinfo, src) == BRW_OPCODE_SEND || brw_inst_opcode(devinfo, src) == BRW_OPCODE_SENDC) && brw_inst_eot(devinfo, src)) || - brw->is_g4x)) { + devinfo->is_g4x)) { brw_compact_inst *align = store + offset; memset(align, 0, sizeof(*align)); - brw_compact_inst_set_opcode(align, brw->is_g4x ? BRW_OPCODE_NENOP : - BRW_OPCODE_NOP); + brw_compact_inst_set_opcode(align, + devinfo->is_g4x ? BRW_OPCODE_NENOP : + BRW_OPCODE_NOP); brw_compact_inst_set_cmpt_control(align, true); offset += sizeof(brw_compact_inst); compacted_count--; @@ -1451,10 +1451,11 @@ brw_compact_instructions(struct brw_compile *p, int start_offset, case BRW_OPCODE_BREAK: case BRW_OPCODE_CONTINUE: case BRW_OPCODE_HALT: - if (brw->gen >= 6) { - update_uip_jip(brw, insn, this_old_ip, compacted_counts); + if (devinfo->gen >= 6) { + update_uip_jip(devinfo, insn, this_old_ip, compacted_counts); } else { - update_gen4_jump_count(brw, insn, this_old_ip, compacted_counts); + update_gen4_jump_count(devinfo, insn, this_old_ip, + compacted_counts); } break; @@ -1463,22 +1464,23 @@ brw_compact_instructions(struct brw_compile *p, int start_offset, case BRW_OPCODE_ELSE: case BRW_OPCODE_ENDIF: case BRW_OPCODE_WHILE: - if (brw->gen >= 7) { + if (devinfo->gen >= 7) { if (brw_inst_cmpt_control(devinfo, insn)) { brw_inst uncompacted; - brw_uncompact_instruction(brw, &uncompacted, + brw_uncompact_instruction(devinfo, &uncompacted, (brw_compact_inst *)insn); - update_uip_jip(brw, &uncompacted, this_old_ip, compacted_counts); + update_uip_jip(devinfo, &uncompacted, this_old_ip, + compacted_counts); - bool ret = brw_try_compact_instruction(brw, + bool ret = brw_try_compact_instruction(devinfo, (brw_compact_inst *)insn, &uncompacted); assert(ret); (void)ret; } else { - update_uip_jip(brw, insn, this_old_ip, compacted_counts); + update_uip_jip(devinfo, insn, this_old_ip, compacted_counts); } - } else if (brw->gen == 6) { + } else if (devinfo->gen == 6) { assert(!brw_inst_cmpt_control(devinfo, insn)); /* Jump Count is in units of compacted instructions on Gen6. */ @@ -1489,7 +1491,8 @@ brw_compact_instructions(struct brw_compile *p, int start_offset, jump_count_compacted -= (target_compacted_count - this_compacted_count); brw_inst_set_gen6_jump_count(devinfo, insn, jump_count_compacted); } else { - update_gen4_jump_count(brw, insn, this_old_ip, compacted_counts); + update_gen4_jump_count(devinfo, insn, this_old_ip, + compacted_counts); } break; diff --git a/src/mesa/drivers/dri/i965/test_eu_compact.c b/src/mesa/drivers/dri/i965/test_eu_compact.c index 71631dc..3990aa3 100644 --- a/src/mesa/drivers/dri/i965/test_eu_compact.c +++ b/src/mesa/drivers/dri/i965/test_eu_compact.c @@ -36,10 +36,10 @@ test_compact_instruction(struct brw_compile *p, brw_inst src) brw_compact_inst dst; memset(&dst, 0xd0, sizeof(dst)); - if (brw_try_compact_instruction(brw, &dst, &src)) { + if (brw_try_compact_instruction(p->devinfo, &dst, &src)) { brw_inst uncompacted; - brw_uncompact_instruction(brw, &uncompacted, &dst); + brw_uncompact_instruction(p->devinfo, &uncompacted, &dst); if (memcmp(&uncompacted, &src, sizeof(src))) { brw_debug_compact_uncompact(brw, &src, &uncompacted); return false; From jekstrand at kemper.freedesktop.org Wed Apr 22 23:01:35 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 22 Apr 2015 16:01:35 -0700 (PDT) Subject: Mesa (master): i965: Make the disassembler take a device_info instead of a context Message-ID: <20150422230135.A0DFB76102@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 639314d40e78b5b56c3fc840b2f416e7fc519a4d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=639314d40e78b5b56c3fc840b2f416e7fc519a4d Author: Jason Ekstrand Date: Wed Apr 15 13:46:21 2015 -0700 i965: Make the disassembler take a device_info instead of a context Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_clip.c | 3 +- src/mesa/drivers/dri/i965/brw_context.h | 2 +- src/mesa/drivers/dri/i965/brw_disasm.c | 162 ++++++++++------------ src/mesa/drivers/dri/i965/brw_eu.c | 8 +- src/mesa/drivers/dri/i965/brw_eu.h | 4 +- src/mesa/drivers/dri/i965/brw_eu_compact.c | 11 +- src/mesa/drivers/dri/i965/brw_ff_gs.c | 3 +- src/mesa/drivers/dri/i965/brw_sf.c | 3 +- src/mesa/drivers/dri/i965/brw_state_dump.c | 4 +- src/mesa/drivers/dri/i965/intel_asm_annotation.c | 2 +- src/mesa/drivers/dri/i965/test_eu_compact.c | 6 +- 11 files changed, 99 insertions(+), 109 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c index de78f46..07b10a2 100644 --- a/src/mesa/drivers/dri/i965/brw_clip.c +++ b/src/mesa/drivers/dri/i965/brw_clip.c @@ -117,7 +117,8 @@ static void compile_clip_prog( struct brw_context *brw, if (unlikely(INTEL_DEBUG & DEBUG_CLIP)) { fprintf(stderr, "clip:\n"); - brw_disassemble(brw, c.func.store, 0, program_size, stderr); + brw_disassemble(brw->intelScreen->devinfo, c.func.store, + 0, program_size, stderr); fprintf(stderr, "\n"); } diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index a6d6787..80556ea 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1622,7 +1622,7 @@ void brw_fs_alloc_reg_sets(struct intel_screen *screen); void brw_vec4_alloc_reg_set(struct intel_screen *screen); /* brw_disasm.c */ -int brw_disassemble_inst(FILE *file, struct brw_context *brw, +int brw_disassemble_inst(FILE *file, const struct brw_device_info *devinfo, struct brw_inst *inst, bool is_compacted); /* brw_vs.c */ diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c index 873ae86..d1078c0 100644 --- a/src/mesa/drivers/dri/i965/brw_disasm.c +++ b/src/mesa/drivers/dri/i965/brw_disasm.c @@ -103,9 +103,9 @@ const struct opcode_desc opcode_descs[128] = { }; static bool -has_jip(struct brw_context *brw, enum opcode opcode) +has_jip(const struct brw_device_info *devinfo, enum opcode opcode) { - if (brw->gen < 6) + if (devinfo->gen < 6) return false; return opcode == BRW_OPCODE_IF || @@ -118,22 +118,22 @@ has_jip(struct brw_context *brw, enum opcode opcode) } static bool -has_uip(struct brw_context *brw, enum opcode opcode) +has_uip(const struct brw_device_info *devinfo, enum opcode opcode) { - if (brw->gen < 6) + if (devinfo->gen < 6) return false; - return (brw->gen >= 7 && opcode == BRW_OPCODE_IF) || - (brw->gen >= 8 && opcode == BRW_OPCODE_ELSE) || + return (devinfo->gen >= 7 && opcode == BRW_OPCODE_IF) || + (devinfo->gen >= 8 && opcode == BRW_OPCODE_ELSE) || opcode == BRW_OPCODE_BREAK || opcode == BRW_OPCODE_CONTINUE || opcode == BRW_OPCODE_HALT; } static bool -has_branch_ctrl(struct brw_context *brw, enum opcode opcode) +has_branch_ctrl(const struct brw_device_info *devinfo, enum opcode opcode) { - if (brw->gen < 8) + if (devinfo->gen < 8) return false; return opcode == BRW_OPCODE_IF || @@ -718,10 +718,9 @@ reg(FILE *file, unsigned _reg_file, unsigned _reg_nr) } static int -dest(FILE *file, struct brw_context *brw, brw_inst *inst) +dest(FILE *file, const struct brw_device_info *devinfo, brw_inst *inst) { int err = 0; - const struct brw_device_info *devinfo = brw->intelScreen->devinfo; if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { if (brw_inst_dst_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) { @@ -776,13 +775,12 @@ dest(FILE *file, struct brw_context *brw, brw_inst *inst) } static int -dest_3src(FILE *file, struct brw_context *brw, brw_inst *inst) +dest_3src(FILE *file, const struct brw_device_info *devinfo, brw_inst *inst) { int err = 0; uint32_t reg_file; - const struct brw_device_info *devinfo = brw->intelScreen->devinfo; - if (brw->gen == 6 && brw_inst_3src_dst_reg_file(devinfo, inst)) + if (devinfo->gen == 6 && brw_inst_3src_dst_reg_file(devinfo, inst)) reg_file = BRW_MESSAGE_REGISTER_FILE; else reg_file = BRW_GENERAL_REGISTER_FILE; @@ -819,7 +817,7 @@ src_align1_region(FILE *file, static int src_da1(FILE *file, - const struct brw_context *brw, + const struct brw_device_info *devinfo, unsigned opcode, unsigned type, unsigned _reg_file, unsigned _vert_stride, unsigned _width, unsigned _horiz_stride, @@ -828,7 +826,7 @@ src_da1(FILE *file, { int err = 0; - if (brw->gen >= 8 && is_logic_instruction(opcode)) + if (devinfo->gen >= 8 && is_logic_instruction(opcode)) err |= control(file, "bitnot", m_bitnot, _negate, NULL); else err |= control(file, "negate", m_negate, _negate, NULL); @@ -847,7 +845,7 @@ src_da1(FILE *file, static int src_ia1(FILE *file, - const struct brw_context *brw, + const struct brw_device_info *devinfo, unsigned opcode, unsigned type, unsigned _reg_file, @@ -860,7 +858,7 @@ src_ia1(FILE *file, { int err = 0; - if (brw->gen >= 8 && is_logic_instruction(opcode)) + if (devinfo->gen >= 8 && is_logic_instruction(opcode)) err |= control(file, "bitnot", m_bitnot, _negate, NULL); else err |= control(file, "negate", m_negate, _negate, NULL); @@ -902,7 +900,7 @@ src_swizzle(FILE *file, unsigned swiz) static int src_da16(FILE *file, - const struct brw_context *brw, + const struct brw_device_info *devinfo, unsigned opcode, unsigned _reg_type, unsigned _reg_file, @@ -915,7 +913,7 @@ src_da16(FILE *file, { int err = 0; - if (brw->gen >= 8 && is_logic_instruction(opcode)) + if (devinfo->gen >= 8 && is_logic_instruction(opcode)) err |= control(file, "bitnot", m_bitnot, _negate, NULL); else err |= control(file, "negate", m_negate, _negate, NULL); @@ -938,10 +936,9 @@ src_da16(FILE *file, } static int -src0_3src(FILE *file, struct brw_context *brw, brw_inst *inst) +src0_3src(FILE *file, const struct brw_device_info *devinfo, brw_inst *inst) { int err = 0; - const struct brw_device_info *devinfo = brw->intelScreen->devinfo; unsigned src0_subreg_nr = brw_inst_3src_src0_subreg_nr(devinfo, inst); err |= control(file, "negate", m_negate, @@ -965,10 +962,9 @@ src0_3src(FILE *file, struct brw_context *brw, brw_inst *inst) } static int -src1_3src(FILE *file, struct brw_context *brw, brw_inst *inst) +src1_3src(FILE *file, const struct brw_device_info *devinfo, brw_inst *inst) { int err = 0; - const struct brw_device_info *devinfo = brw->intelScreen->devinfo; unsigned src1_subreg_nr = brw_inst_3src_src1_subreg_nr(devinfo, inst); err |= control(file, "negate", m_negate, @@ -993,10 +989,9 @@ src1_3src(FILE *file, struct brw_context *brw, brw_inst *inst) static int -src2_3src(FILE *file, struct brw_context *brw, brw_inst *inst) +src2_3src(FILE *file, const struct brw_device_info *devinfo, brw_inst *inst) { int err = 0; - const struct brw_device_info *devinfo = brw->intelScreen->devinfo; unsigned src2_subreg_nr = brw_inst_3src_src2_subreg_nr(devinfo, inst); err |= control(file, "negate", m_negate, @@ -1020,9 +1015,8 @@ src2_3src(FILE *file, struct brw_context *brw, brw_inst *inst) } static int -imm(FILE *file, struct brw_context *brw, unsigned type, brw_inst *inst) +imm(FILE *file, const struct brw_device_info *devinfo, unsigned type, brw_inst *inst) { - const struct brw_device_info *devinfo = brw->intelScreen->devinfo; switch (type) { case BRW_HW_REG_TYPE_UD: format(file, "0x%08xUD", brw_inst_imm_ud(devinfo, inst)); @@ -1063,15 +1057,14 @@ imm(FILE *file, struct brw_context *brw, unsigned type, brw_inst *inst) } static int -src0(FILE *file, struct brw_context *brw, brw_inst *inst) +src0(FILE *file, const struct brw_device_info *devinfo, brw_inst *inst) { - const struct brw_device_info *devinfo = brw->intelScreen->devinfo; if (brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) { - return imm(file, brw, brw_inst_src0_reg_type(devinfo, inst), inst); + return imm(file, devinfo, brw_inst_src0_reg_type(devinfo, inst), inst); } else if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { if (brw_inst_src0_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) { return src_da1(file, - brw, + devinfo, brw_inst_opcode(devinfo, inst), brw_inst_src0_reg_type(devinfo, inst), brw_inst_src0_reg_file(devinfo, inst), @@ -1084,7 +1077,7 @@ src0(FILE *file, struct brw_context *brw, brw_inst *inst) brw_inst_src0_negate(devinfo, inst)); } else { return src_ia1(file, - brw, + devinfo, brw_inst_opcode(devinfo, inst), brw_inst_src0_reg_type(devinfo, inst), brw_inst_src0_reg_file(devinfo, inst), @@ -1100,7 +1093,7 @@ src0(FILE *file, struct brw_context *brw, brw_inst *inst) } else { if (brw_inst_src0_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) { return src_da16(file, - brw, + devinfo, brw_inst_opcode(devinfo, inst), brw_inst_src0_reg_type(devinfo, inst), brw_inst_src0_reg_file(devinfo, inst), @@ -1121,15 +1114,14 @@ src0(FILE *file, struct brw_context *brw, brw_inst *inst) } static int -src1(FILE *file, struct brw_context *brw, brw_inst *inst) +src1(FILE *file, const struct brw_device_info *devinfo, brw_inst *inst) { - const struct brw_device_info *devinfo = brw->intelScreen->devinfo; if (brw_inst_src1_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) { - return imm(file, brw, brw_inst_src1_reg_type(devinfo, inst), inst); + return imm(file, devinfo, brw_inst_src1_reg_type(devinfo, inst), inst); } else if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { if (brw_inst_src1_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) { return src_da1(file, - brw, + devinfo, brw_inst_opcode(devinfo, inst), brw_inst_src1_reg_type(devinfo, inst), brw_inst_src1_reg_file(devinfo, inst), @@ -1142,7 +1134,7 @@ src1(FILE *file, struct brw_context *brw, brw_inst *inst) brw_inst_src1_negate(devinfo, inst)); } else { return src_ia1(file, - brw, + devinfo, brw_inst_opcode(devinfo, inst), brw_inst_src1_reg_type(devinfo, inst), brw_inst_src1_reg_file(devinfo, inst), @@ -1158,7 +1150,7 @@ src1(FILE *file, struct brw_context *brw, brw_inst *inst) } else { if (brw_inst_src1_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) { return src_da16(file, - brw, + devinfo, brw_inst_opcode(devinfo, inst), brw_inst_src1_reg_type(devinfo, inst), brw_inst_src1_reg_file(devinfo, inst), @@ -1179,9 +1171,8 @@ src1(FILE *file, struct brw_context *brw, brw_inst *inst) } static int -qtr_ctrl(FILE *file, struct brw_context *brw, brw_inst *inst) +qtr_ctrl(FILE *file, const struct brw_device_info *devinfo, brw_inst *inst) { - const struct brw_device_info *devinfo = brw->intelScreen->devinfo; int qtr_ctl = brw_inst_qtr_control(devinfo, inst); int exec_size = 1 << brw_inst_exec_size(devinfo, inst); @@ -1211,23 +1202,22 @@ qtr_ctrl(FILE *file, struct brw_context *brw, brw_inst *inst) #ifdef DEBUG static __attribute__((__unused__)) int -brw_disassemble_imm(struct brw_context *brw, +brw_disassemble_imm(const struct brw_device_info *devinfo, uint32_t dw3, uint32_t dw2, uint32_t dw1, uint32_t dw0) { brw_inst inst; inst.data[0] = (((uint64_t) dw1) << 32) | ((uint64_t) dw0); inst.data[1] = (((uint64_t) dw3) << 32) | ((uint64_t) dw2); - return brw_disassemble_inst(stderr, brw, &inst, false); + return brw_disassemble_inst(stderr, devinfo, &inst, false); } #endif int -brw_disassemble_inst(FILE *file, struct brw_context *brw, brw_inst *inst, - bool is_compacted) +brw_disassemble_inst(FILE *file, const struct brw_device_info *devinfo, + brw_inst *inst, bool is_compacted) { int err = 0; int space = 0; - const struct brw_device_info *devinfo = brw->intelScreen->devinfo; const enum opcode opcode = brw_inst_opcode(devinfo, inst); @@ -1268,11 +1258,11 @@ brw_disassemble_inst(FILE *file, struct brw_context *brw, brw_inst *inst, * control flow doesn't update flags. */ if (brw_inst_cond_modifier(devinfo, inst) && - (brw->gen < 6 || (opcode != BRW_OPCODE_SEL && + (devinfo->gen < 6 || (opcode != BRW_OPCODE_SEL && opcode != BRW_OPCODE_IF && opcode != BRW_OPCODE_WHILE))) { format(file, ".f%ld", - brw->gen >= 7 ? brw_inst_flag_reg_nr(devinfo, inst) : 0); + devinfo->gen >= 7 ? brw_inst_flag_reg_nr(devinfo, inst) : 0); if (brw_inst_flag_subreg_nr(devinfo, inst)) format(file, ".%ld", brw_inst_flag_subreg_nr(devinfo, inst)); } @@ -1285,66 +1275,66 @@ brw_disassemble_inst(FILE *file, struct brw_context *brw, brw_inst *inst, string(file, ")"); } - if (opcode == BRW_OPCODE_SEND && brw->gen < 6) + if (opcode == BRW_OPCODE_SEND && devinfo->gen < 6) format(file, " %ld", brw_inst_base_mrf(devinfo, inst)); - if (has_uip(brw, opcode)) { + if (has_uip(devinfo, opcode)) { /* Instructions that have UIP also have JIP. */ pad(file, 16); format(file, "JIP: %d", brw_inst_jip(devinfo, inst)); pad(file, 32); format(file, "UIP: %d", brw_inst_uip(devinfo, inst)); - } else if (has_jip(brw, opcode)) { + } else if (has_jip(devinfo, opcode)) { pad(file, 16); - if (brw->gen >= 7) { + if (devinfo->gen >= 7) { format(file, "JIP: %d", brw_inst_jip(devinfo, inst)); } else { format(file, "JIP: %d", brw_inst_gen6_jump_count(devinfo, inst)); } - } else if (brw->gen < 6 && (opcode == BRW_OPCODE_BREAK || + } else if (devinfo->gen < 6 && (opcode == BRW_OPCODE_BREAK || opcode == BRW_OPCODE_CONTINUE || opcode == BRW_OPCODE_ELSE)) { pad(file, 16); format(file, "Jump: %d", brw_inst_gen4_jump_count(devinfo, inst)); pad(file, 32); format(file, "Pop: %ld", brw_inst_gen4_pop_count(devinfo, inst)); - } else if (brw->gen < 6 && (opcode == BRW_OPCODE_IF || + } else if (devinfo->gen < 6 && (opcode == BRW_OPCODE_IF || opcode == BRW_OPCODE_IFF || opcode == BRW_OPCODE_HALT)) { pad(file, 16); format(file, "Jump: %d", brw_inst_gen4_jump_count(devinfo, inst)); - } else if (brw->gen < 6 && opcode == BRW_OPCODE_ENDIF) { + } else if (devinfo->gen < 6 && opcode == BRW_OPCODE_ENDIF) { pad(file, 16); format(file, "Pop: %ld", brw_inst_gen4_pop_count(devinfo, inst)); } else if (opcode == BRW_OPCODE_JMPI) { pad(file, 16); - err |= src1(file, brw, inst); + err |= src1(file, devinfo, inst); } else if (opcode_descs[opcode].nsrc == 3) { pad(file, 16); - err |= dest_3src(file, brw, inst); + err |= dest_3src(file, devinfo, inst); pad(file, 32); - err |= src0_3src(file, brw, inst); + err |= src0_3src(file, devinfo, inst); pad(file, 48); - err |= src1_3src(file, brw, inst); + err |= src1_3src(file, devinfo, inst); pad(file, 64); - err |= src2_3src(file, brw, inst); + err |= src2_3src(file, devinfo, inst); } else { if (opcode_descs[opcode].ndst > 0) { pad(file, 16); - err |= dest(file, brw, inst); + err |= dest(file, devinfo, inst); } if (opcode_descs[opcode].nsrc > 0) { pad(file, 32); - err |= src0(file, brw, inst); + err |= src0(file, devinfo, inst); } if (opcode_descs[opcode].nsrc > 1) { pad(file, 48); - err |= src1(file, brw, inst); + err |= src1(file, devinfo, inst); } } @@ -1354,7 +1344,7 @@ brw_disassemble_inst(FILE *file, struct brw_context *brw, brw_inst *inst, if (brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE) { /* show the indirect descriptor source */ pad(file, 48); - err |= src1(file, brw, inst); + err |= src1(file, devinfo, inst); } newline(file); @@ -1362,7 +1352,7 @@ brw_disassemble_inst(FILE *file, struct brw_context *brw, brw_inst *inst, space = 0; fprintf(file, " "); - err |= control(file, "SFID", brw->gen >= 6 ? gen6_sfid : gen4_sfid, + err |= control(file, "SFID", devinfo->gen >= 6 ? gen6_sfid : gen4_sfid, sfid, &space); @@ -1383,7 +1373,7 @@ brw_disassemble_inst(FILE *file, struct brw_context *brw, brw_inst *inst, brw_inst_math_msg_precision(devinfo, inst), &space); break; case BRW_SFID_SAMPLER: - if (brw->gen >= 5) { + if (devinfo->gen >= 5) { format(file, " (%ld, %ld, %ld, %ld)", brw_inst_binding_table_index(devinfo, inst), brw_inst_sampler(devinfo, inst), @@ -1394,7 +1384,7 @@ brw_disassemble_inst(FILE *file, struct brw_context *brw, brw_inst *inst, brw_inst_binding_table_index(devinfo, inst), brw_inst_sampler(devinfo, inst), brw_inst_sampler_msg_type(devinfo, inst)); - if (!brw->is_g4x) { + if (!devinfo->is_g4x) { err |= control(file, "sampler target format", sampler_target_format, brw_inst_sampler_return_format(devinfo, inst), NULL); @@ -1404,12 +1394,12 @@ brw_disassemble_inst(FILE *file, struct brw_context *brw, brw_inst *inst, break; case GEN6_SFID_DATAPORT_SAMPLER_CACHE: /* aka BRW_SFID_DATAPORT_READ on Gen4-5 */ - if (brw->gen >= 6) { + if (devinfo->gen >= 6) { format(file, " (%ld, %ld, %ld, %ld)", brw_inst_binding_table_index(devinfo, inst), brw_inst_dp_msg_control(devinfo, inst), brw_inst_dp_msg_type(devinfo, inst), - brw->gen >= 7 ? 0 : brw_inst_dp_write_commit(devinfo, inst)); + devinfo->gen >= 7 ? 0 : brw_inst_dp_write_commit(devinfo, inst)); } else { format(file, " (%ld, %ld, %ld)", brw_inst_binding_table_index(devinfo, inst), @@ -1423,22 +1413,22 @@ brw_disassemble_inst(FILE *file, struct brw_context *brw, brw_inst *inst, unsigned msg_type = brw_inst_dp_write_msg_type(devinfo, inst); err |= control(file, "DP rc message type", - brw->gen >= 6 ? dp_rc_msg_type_gen6 + devinfo->gen >= 6 ? dp_rc_msg_type_gen6 : dp_write_port_msg_type, msg_type, &space); bool is_rt_write = msg_type == - (brw->gen >= 6 ? GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE - : BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE); + (devinfo->gen >= 6 ? GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE + : BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE); if (is_rt_write) { err |= control(file, "RT message type", m_rt_write_subtype, brw_inst_rt_message_type(devinfo, inst), &space); - if (brw->gen >= 6 && brw_inst_rt_slot_group(devinfo, inst)) + if (devinfo->gen >= 6 && brw_inst_rt_slot_group(devinfo, inst)) string(file, " Hi"); if (brw_inst_rt_last(devinfo, inst)) string(file, " LastRT"); - if (brw->gen < 7 && brw_inst_dp_write_commit(devinfo, inst)) + if (devinfo->gen < 7 && brw_inst_dp_write_commit(devinfo, inst)) string(file, " WriteCommit"); } else { format(file, " MsgCtrl = 0x%lx", @@ -1453,22 +1443,22 @@ brw_disassemble_inst(FILE *file, struct brw_context *brw, brw_inst *inst, format(file, " %ld", brw_inst_urb_global_offset(devinfo, inst)); space = 1; - if (brw->gen >= 7) { + if (devinfo->gen >= 7) { err |= control(file, "urb opcode", gen7_urb_opcode, brw_inst_urb_opcode(devinfo, inst), &space); - } else if (brw->gen >= 5) { + } else if (devinfo->gen >= 5) { err |= control(file, "urb opcode", gen5_urb_opcode, brw_inst_urb_opcode(devinfo, inst), &space); } err |= control(file, "urb swizzle", urb_swizzle, brw_inst_urb_swizzle_control(devinfo, inst), &space); - if (brw->gen < 7) { + if (devinfo->gen < 7) { err |= control(file, "urb allocate", urb_allocate, brw_inst_urb_allocate(devinfo, inst), &space); err |= control(file, "urb used", urb_used, brw_inst_urb_used(devinfo, inst), &space); } - if (brw->gen < 8) { + if (devinfo->gen < 8) { err |= control(file, "urb complete", urb_complete, brw_inst_urb_complete(devinfo, inst), &space); } @@ -1476,7 +1466,7 @@ brw_disassemble_inst(FILE *file, struct brw_context *brw, brw_inst *inst, case BRW_SFID_THREAD_SPAWNER: break; case GEN7_SFID_DATAPORT_DATA_CACHE: - if (brw->gen >= 7) { + if (devinfo->gen >= 7) { format(file, " ("); err |= control(file, "DP DC0 message type", @@ -1499,7 +1489,7 @@ brw_disassemble_inst(FILE *file, struct brw_context *brw, brw_inst *inst, /* FALLTHROUGH */ case HSW_SFID_DATAPORT_DATA_CACHE_1: { - if (brw->gen >= 7) { + if (devinfo->gen >= 7) { format(file, " ("); unsigned msg_ctrl = brw_inst_dp_msg_control(devinfo, inst); @@ -1541,7 +1531,7 @@ brw_disassemble_inst(FILE *file, struct brw_context *brw, brw_inst *inst, } case GEN7_SFID_PIXEL_INTERPOLATOR: - if (brw->gen >= 7) { + if (devinfo->gen >= 7) { format(file, " (%s, %s, 0x%02lx)", brw_inst_pi_nopersp(devinfo, inst) ? "linear" : "persp", pixel_interpolator_msg_types[brw_inst_pi_message_type(devinfo, inst)], @@ -1567,7 +1557,7 @@ brw_disassemble_inst(FILE *file, struct brw_context *brw, brw_inst *inst, space = 1; err |= control(file, "access mode", access_mode, brw_inst_access_mode(devinfo, inst), &space); - if (brw->gen >= 6) { + if (devinfo->gen >= 6) { err |= control(file, "write enable control", wectrl, brw_inst_mask_control(devinfo, inst), &space); } else { @@ -1578,8 +1568,8 @@ brw_disassemble_inst(FILE *file, struct brw_context *brw, brw_inst *inst, ((brw_inst_no_dd_check(devinfo, inst) << 1) | brw_inst_no_dd_clear(devinfo, inst)), &space); - if (brw->gen >= 6) - err |= qtr_ctrl(file, brw, inst); + if (devinfo->gen >= 6) + err |= qtr_ctrl(file, devinfo, inst); else { if (brw_inst_qtr_control(devinfo, inst) == BRW_COMPRESSION_COMPRESSED && opcode_descs[opcode].ndst > 0 && @@ -1595,10 +1585,10 @@ brw_disassemble_inst(FILE *file, struct brw_context *brw, brw_inst *inst, err |= control(file, "compaction", cmpt_ctrl, is_compacted, &space); err |= control(file, "thread control", thread_ctrl, brw_inst_thread_control(devinfo, inst), &space); - if (has_branch_ctrl(brw, opcode)) { + if (has_branch_ctrl(devinfo, opcode)) { err |= control(file, "branch ctrl", branch_ctrl, brw_inst_branch_control(devinfo, inst), &space); - } else if (brw->gen >= 6) { + } else if (devinfo->gen >= 6) { err |= control(file, "acc write control", accwr, brw_inst_acc_wr_control(devinfo, inst), &space); } diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c index deeb4fe..7f66382 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.c +++ b/src/mesa/drivers/dri/i965/brw_eu.c @@ -262,7 +262,7 @@ const unsigned *brw_get_program( struct brw_compile *p, } void -brw_disassemble(struct brw_context *brw, +brw_disassemble(const struct brw_device_info *devinfo, void *assembly, int start, int end, FILE *out) { bool dump_hex = false; @@ -270,7 +270,7 @@ brw_disassemble(struct brw_context *brw, for (int offset = start; offset < end;) { brw_inst *insn = assembly + offset; brw_inst uncompacted; - bool compacted = brw_inst_cmpt_control(brw->intelScreen->devinfo, insn); + bool compacted = brw_inst_cmpt_control(devinfo, insn); if (0) fprintf(out, "0x%08x: ", offset); @@ -282,7 +282,7 @@ brw_disassemble(struct brw_context *brw, ((uint32_t *)insn)[0]); } - brw_uncompact_instruction(brw->intelScreen->devinfo, &uncompacted, compacted); + brw_uncompact_instruction(devinfo, &uncompacted, compacted); insn = &uncompacted; offset += 8; } else { @@ -296,6 +296,6 @@ brw_disassemble(struct brw_context *brw, offset += 16; } - brw_disassemble_inst(out, brw, insn, compacted); + brw_disassemble_inst(out, devinfo, insn, compacted); } } diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index e65c270..9b51691 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -112,7 +112,7 @@ void brw_set_default_acc_write_control(struct brw_compile *p, unsigned value); void brw_init_compile(struct brw_context *, struct brw_compile *p, void *mem_ctx); -void brw_disassemble(struct brw_context *brw, void *assembly, +void brw_disassemble(const struct brw_device_info *devinfo, void *assembly, int start, int end, FILE *out); const unsigned *brw_get_program( struct brw_compile *p, unsigned *sz ); @@ -471,7 +471,7 @@ void brw_uncompact_instruction(const struct brw_device_info *devinfo, bool brw_try_compact_instruction(const struct brw_device_info *devinfo, brw_compact_inst *dst, brw_inst *src); -void brw_debug_compact_uncompact(struct brw_context *brw, +void brw_debug_compact_uncompact(const struct brw_device_info *devinfo, brw_inst *orig, brw_inst *uncompacted); static inline int diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c b/src/mesa/drivers/dri/i965/brw_eu_compact.c index 428cdf4..56e87ad 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c @@ -1218,18 +1218,18 @@ brw_uncompact_instruction(const struct brw_device_info *devinfo, brw_inst *dst, } } -void brw_debug_compact_uncompact(struct brw_context *brw, +void brw_debug_compact_uncompact(const struct brw_device_info *devinfo, brw_inst *orig, brw_inst *uncompacted) { fprintf(stderr, "Instruction compact/uncompact changed (gen%d):\n", - brw->gen); + devinfo->gen); fprintf(stderr, " before: "); - brw_disassemble_inst(stderr, brw, orig, true); + brw_disassemble_inst(stderr, devinfo, orig, true); fprintf(stderr, " after: "); - brw_disassemble_inst(stderr, brw, uncompacted, false); + brw_disassemble_inst(stderr, devinfo, uncompacted, false); uint32_t *before_bits = (uint32_t *)orig; uint32_t *after_bits = (uint32_t *)uncompacted; @@ -1365,7 +1365,6 @@ void brw_compact_instructions(struct brw_compile *p, int start_offset, int num_annotations, struct annotation *annotation) { - struct brw_context *brw = p->brw; const struct brw_device_info *devinfo = p->devinfo; void *store = p->store + start_offset / 16; /* For an instruction at byte offset 16*i before compaction, this is the @@ -1400,7 +1399,7 @@ brw_compact_instructions(struct brw_compile *p, int start_offset, brw_inst uncompacted; brw_uncompact_instruction(devinfo, &uncompacted, dst); if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) { - brw_debug_compact_uncompact(brw, &saved, &uncompacted); + brw_debug_compact_uncompact(devinfo, &saved, &uncompacted); } } diff --git a/src/mesa/drivers/dri/i965/brw_ff_gs.c b/src/mesa/drivers/dri/i965/brw_ff_gs.c index e6f837c..a7d061a 100644 --- a/src/mesa/drivers/dri/i965/brw_ff_gs.c +++ b/src/mesa/drivers/dri/i965/brw_ff_gs.c @@ -136,7 +136,8 @@ brw_compile_ff_gs_prog(struct brw_context *brw, if (unlikely(INTEL_DEBUG & DEBUG_GS)) { fprintf(stderr, "gs:\n"); - brw_disassemble(brw, c.func.store, 0, program_size, stderr); + brw_disassemble(brw->intelScreen->devinfo, c.func.store, + 0, program_size, stderr); fprintf(stderr, "\n"); } diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c index d5395de..47bc454 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -119,7 +119,8 @@ static void compile_sf_prog( struct brw_context *brw, if (unlikely(INTEL_DEBUG & DEBUG_SF)) { fprintf(stderr, "sf:\n"); - brw_disassemble(brw, c.func.store, 0, program_size, stderr); + brw_disassemble(brw->intelScreen->devinfo, + c.func.store, 0, program_size, stderr); fprintf(stderr, "\n"); } diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index 8b32810..5cf70eb 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -496,8 +496,8 @@ dump_prog_cache(struct brw_context *brw) } fprintf(stderr, "%s:\n", name); - brw_disassemble(brw, brw->cache.bo->virtual, item->offset, item->size, - stderr); + brw_disassemble(brw->intelScreen->devinfo, brw->cache.bo->virtual, + item->offset, item->size, stderr); } } diff --git a/src/mesa/drivers/dri/i965/intel_asm_annotation.c b/src/mesa/drivers/dri/i965/intel_asm_annotation.c index eed5756..b4a693f 100644 --- a/src/mesa/drivers/dri/i965/intel_asm_annotation.c +++ b/src/mesa/drivers/dri/i965/intel_asm_annotation.c @@ -79,7 +79,7 @@ dump_assembly(void *assembly, int num_annotations, struct annotation *annotation fprintf(stderr, " %s\n", last_annotation_string); } - brw_disassemble(brw, assembly, start_offset, end_offset, stderr); + brw_disassemble(brw->intelScreen->devinfo, assembly, start_offset, end_offset, stderr); if (annotation[i].block_end) { fprintf(stderr, " END B%d", annotation[i].block_end->num); diff --git a/src/mesa/drivers/dri/i965/test_eu_compact.c b/src/mesa/drivers/dri/i965/test_eu_compact.c index 3990aa3..c80cc89 100644 --- a/src/mesa/drivers/dri/i965/test_eu_compact.c +++ b/src/mesa/drivers/dri/i965/test_eu_compact.c @@ -31,8 +31,6 @@ static bool test_compact_instruction(struct brw_compile *p, brw_inst src) { - struct brw_context *brw = p->brw; - brw_compact_inst dst; memset(&dst, 0xd0, sizeof(dst)); @@ -41,7 +39,7 @@ test_compact_instruction(struct brw_compile *p, brw_inst src) brw_uncompact_instruction(p->devinfo, &uncompacted, &dst); if (memcmp(&uncompacted, &src, sizeof(src))) { - brw_debug_compact_uncompact(brw, &src, &uncompacted); + brw_debug_compact_uncompact(p->devinfo, &src, &uncompacted); return false; } } else { @@ -51,7 +49,7 @@ test_compact_instruction(struct brw_compile *p, brw_inst src) if (memcmp(&unchanged, &dst, sizeof(dst))) { fprintf(stderr, "Failed to compact, but dst changed\n"); fprintf(stderr, " Instruction: "); - brw_disassemble_inst(stderr, brw, &src, false); + brw_disassemble_inst(stderr, p->devinfo, &src, false); return false; } } From jekstrand at kemper.freedesktop.org Wed Apr 22 23:01:35 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 22 Apr 2015 16:01:35 -0700 (PDT) Subject: Mesa (master): i965: Remove the context field from brw_compiler Message-ID: <20150422230135.B2EBE76102@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 61c4702489fa1694892c5ce90ccf65a5094df3e7 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=61c4702489fa1694892c5ce90ccf65a5094df3e7 Author: Jason Ekstrand Date: Wed Apr 15 14:13:58 2015 -0700 i965: Remove the context field from brw_compiler Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_clip.c | 2 +- src/mesa/drivers/dri/i965/brw_clip_line.c | 13 ++++++------- src/mesa/drivers/dri/i965/brw_clip_tri.c | 4 ++-- src/mesa/drivers/dri/i965/brw_clip_util.c | 12 ++++-------- src/mesa/drivers/dri/i965/brw_eu.c | 18 +++++++----------- src/mesa/drivers/dri/i965/brw_eu.h | 5 ++--- src/mesa/drivers/dri/i965/brw_eu_compact.c | 4 ++-- src/mesa/drivers/dri/i965/brw_eu_emit.c | 8 ++++---- src/mesa/drivers/dri/i965/brw_ff_gs.c | 2 +- src/mesa/drivers/dri/i965/brw_ff_gs_emit.c | 12 +++--------- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 2 +- src/mesa/drivers/dri/i965/brw_sf.c | 2 +- src/mesa/drivers/dri/i965/brw_sf_emit.c | 6 ++---- src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 2 +- src/mesa/drivers/dri/i965/test_eu_compact.c | 13 +++++-------- 15 files changed, 42 insertions(+), 63 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c index 07b10a2..3aa79b5 100644 --- a/src/mesa/drivers/dri/i965/brw_clip.c +++ b/src/mesa/drivers/dri/i965/brw_clip.c @@ -62,7 +62,7 @@ static void compile_clip_prog( struct brw_context *brw, /* Begin the compilation: */ - brw_init_compile(brw, &c.func, mem_ctx); + brw_init_compile(brw->intelScreen->devinfo, &c.func, mem_ctx); c.func.single_program_flow = 1; diff --git a/src/mesa/drivers/dri/i965/brw_clip_line.c b/src/mesa/drivers/dri/i965/brw_clip_line.c index 070adba..395cd2f 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_line.c +++ b/src/mesa/drivers/dri/i965/brw_clip_line.c @@ -45,7 +45,7 @@ static void brw_clip_line_alloc_regs( struct brw_clip_compile *c ) { - struct brw_context *brw = c->func.brw; + const struct brw_device_info *devinfo = c->func.devinfo; GLuint i = 0,j; /* Register usage is static, precompute here: @@ -89,7 +89,7 @@ static void brw_clip_line_alloc_regs( struct brw_clip_compile *c ) c->reg.clipdistance_offset = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_W); i++; - if (brw->gen == 5) { + if (devinfo->gen == 5) { c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; } @@ -129,7 +129,6 @@ static void brw_clip_line_alloc_regs( struct brw_clip_compile *c ) static void clip_and_emit_line( struct brw_clip_compile *c ) { struct brw_compile *p = &c->func; - struct brw_context *brw = p->brw; struct brw_indirect vtx0 = brw_indirect(0, 0); struct brw_indirect vtx1 = brw_indirect(1, 0); struct brw_indirect newvtx0 = brw_indirect(2, 0); @@ -155,7 +154,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) brw_clip_init_clipmask(c); /* -ve rhw workaround */ - if (brw->has_negative_rhw_bug) { + if (p->devinfo->has_negative_rhw_bug) { brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<20)); brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); @@ -213,7 +212,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) * Both can be negative on GM965/G965 due to RHW workaround * if so, this object should be rejected. */ - if (brw->has_negative_rhw_bug) { + if (p->devinfo->has_negative_rhw_bug) { brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, c->reg.dp0, brw_imm_f(0.0)); brw_IF(p, BRW_EXECUTE_1); { @@ -239,7 +238,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) /* If both are positive, do nothing */ /* Only on GM965/G965 */ - if (brw->has_negative_rhw_bug) { + if (p->devinfo->has_negative_rhw_bug) { brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0)); brw_IF(p, BRW_EXECUTE_1); } @@ -255,7 +254,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) BRW_PREDICATE_NORMAL); } - if (brw->has_negative_rhw_bug) { + if (p->devinfo->has_negative_rhw_bug) { brw_ENDIF(p); } } diff --git a/src/mesa/drivers/dri/i965/brw_clip_tri.c b/src/mesa/drivers/dri/i965/brw_clip_tri.c index d4babc9..ad5e588 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_tri.c +++ b/src/mesa/drivers/dri/i965/brw_clip_tri.c @@ -50,7 +50,7 @@ static void release_tmps( struct brw_clip_compile *c ) void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, GLuint nr_verts ) { - struct brw_context *brw = c->func.brw; + const struct brw_device_info *devinfo = c->func.devinfo; GLuint i = 0,j; /* Register usage is static, precompute here: @@ -123,7 +123,7 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, c->reg.clipdistance_offset = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_W); i++; - if (brw->gen == 5) { + if (devinfo->gen == 5) { c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; } diff --git a/src/mesa/drivers/dri/i965/brw_clip_util.c b/src/mesa/drivers/dri/i965/brw_clip_util.c index ffd01dd..af2029b 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_util.c +++ b/src/mesa/drivers/dri/i965/brw_clip_util.c @@ -417,7 +417,6 @@ void brw_clip_init_clipmask( struct brw_clip_compile *c ) { struct brw_compile *p = &c->func; struct brw_reg incoming = get_element_ud(c->reg.R0, 2); - struct brw_context *brw = p->brw; /* Shift so that lowest outcode bit is rightmost: */ @@ -429,7 +428,7 @@ void brw_clip_init_clipmask( struct brw_clip_compile *c ) /* Rearrange userclip outcodes so that they come directly after * the fixed plane bits. */ - if (brw->gen == 5 || brw->is_g4x) + if (p->devinfo->gen == 5 || p->devinfo->is_g4x) brw_AND(p, tmp, incoming, brw_imm_ud(0xff<<14)); else brw_AND(p, tmp, incoming, brw_imm_ud(0x3f<<14)); @@ -444,9 +443,8 @@ void brw_clip_init_clipmask( struct brw_clip_compile *c ) void brw_clip_ff_sync(struct brw_clip_compile *c) { struct brw_compile *p = &c->func; - struct brw_context *brw = p->brw; - if (brw->gen == 5) { + if (p->devinfo->gen == 5) { brw_AND(p, brw_null_reg(), c->reg.ff_sync, brw_imm_ud(0x1)); brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z); brw_IF(p, BRW_EXECUTE_1); @@ -467,11 +465,9 @@ void brw_clip_ff_sync(struct brw_clip_compile *c) void brw_clip_init_ff_sync(struct brw_clip_compile *c) { - struct brw_context *brw = c->func.brw; - - if (brw->gen == 5) { - struct brw_compile *p = &c->func; + struct brw_compile *p = &c->func; + if (p->devinfo->gen == 5) { brw_MOV(p, c->reg.ff_sync, brw_imm_ud(0)); } } diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c index 7f66382..ad0c2fb 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.c +++ b/src/mesa/drivers/dri/i965/brw_eu.c @@ -128,7 +128,7 @@ void brw_set_default_predicate_inverse(struct brw_compile *p, bool predicate_inv void brw_set_default_flag_reg(struct brw_compile *p, int reg, int subreg) { - if (p->brw->gen >= 7) + if (p->devinfo->gen >= 7) brw_inst_set_flag_reg_nr(p->devinfo, p->current, reg); brw_inst_set_flag_subreg_nr(p->devinfo, p->current, subreg); @@ -143,11 +143,9 @@ void brw_set_default_compression_control(struct brw_compile *p, enum brw_compression compression_control) { - struct brw_context *brw = p->brw; - p->compressed = (compression_control == BRW_COMPRESSION_COMPRESSED); - if (brw->gen >= 6) { + if (p->devinfo->gen >= 6) { /* Since we don't use the SIMD32 support in gen6, we translate * the pre-gen6 compression control here. */ @@ -188,9 +186,7 @@ void brw_set_default_saturate( struct brw_compile *p, bool enable ) void brw_set_default_acc_write_control(struct brw_compile *p, unsigned value) { - struct brw_context *brw = p->brw; - - if (brw->gen >= 6) + if (p->devinfo->gen >= 6) brw_inst_set_acc_wr_control(p->devinfo, p->current, value); } @@ -213,12 +209,12 @@ void brw_pop_insn_state( struct brw_compile *p ) /*********************************************************************** */ void -brw_init_compile(struct brw_context *brw, struct brw_compile *p, void *mem_ctx) +brw_init_compile(const struct brw_device_info *devinfo, + struct brw_compile *p, void *mem_ctx) { memset(p, 0, sizeof(*p)); - p->brw = brw; - p->devinfo = brw->intelScreen->devinfo; + p->devinfo = devinfo; /* * Set the initial instruction store array size to 1024, if found that * isn't enough, then it will double the store size at brw_next_insn() @@ -250,7 +246,7 @@ brw_init_compile(struct brw_context *brw, struct brw_compile *p, void *mem_ctx) p->loop_stack = rzalloc_array(mem_ctx, int, p->loop_stack_array_size); p->if_depth_in_loop = rzalloc_array(mem_ctx, int, p->loop_stack_array_size); - brw_init_compaction_tables(brw); + brw_init_compaction_tables(devinfo); } diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 9b51691..86240e8 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -69,7 +69,6 @@ struct brw_compile { bool single_program_flow; bool compressed; - struct brw_context *brw; const struct brw_device_info *devinfo; /* Control flow stacks: @@ -110,7 +109,7 @@ void brw_set_default_predicate_inverse(struct brw_compile *p, bool predicate_inv void brw_set_default_flag_reg(struct brw_compile *p, int reg, int subreg); void brw_set_default_acc_write_control(struct brw_compile *p, unsigned value); -void brw_init_compile(struct brw_context *, struct brw_compile *p, +void brw_init_compile(const struct brw_device_info *, struct brw_compile *p, void *mem_ctx); void brw_disassemble(const struct brw_device_info *devinfo, void *assembly, int start, int end, FILE *out); @@ -463,7 +462,7 @@ enum brw_conditional_mod brw_negate_cmod(uint32_t cmod); enum brw_conditional_mod brw_swap_cmod(uint32_t cmod); /* brw_eu_compact.c */ -void brw_init_compaction_tables(struct brw_context *brw); +void brw_init_compaction_tables(const struct brw_device_info *devinfo); void brw_compact_instructions(struct brw_compile *p, int start_offset, int num_annotations, struct annotation *annotation); void brw_uncompact_instruction(const struct brw_device_info *devinfo, diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c b/src/mesa/drivers/dri/i965/brw_eu_compact.c index 56e87ad..6085897 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c @@ -1306,7 +1306,7 @@ update_gen4_jump_count(const struct brw_device_info *devinfo, brw_inst *insn, } void -brw_init_compaction_tables(struct brw_context *brw) +brw_init_compaction_tables(const struct brw_device_info *devinfo) { static bool initialized; if (initialized || p_atomic_cmpxchg(&initialized, false, true) != false) @@ -1329,7 +1329,7 @@ brw_init_compaction_tables(struct brw_context *brw) assert(gen8_subreg_table[ARRAY_SIZE(gen8_subreg_table) - 1] != 0); assert(gen8_src_index_table[ARRAY_SIZE(gen8_src_index_table) - 1] != 0); - switch (brw->gen) { + switch (devinfo->gen) { case 9: case 8: control_index_table = gen8_control_index_table; diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index d11c220..2ffd205 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -48,8 +48,8 @@ gen6_resolve_implied_move(struct brw_compile *p, struct brw_reg *src, unsigned msg_reg_nr) { - struct brw_context *brw = p->brw; - if (brw->gen < 6) + const struct brw_device_info *devinfo = p->devinfo; + if (devinfo->gen < 6) return; if (src->file == BRW_MESSAGE_REGISTER_FILE) @@ -78,8 +78,8 @@ gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg) * Since we're pretending to have 16 MRFs anyway, we may as well use the * registers required for messages with EOT. */ - struct brw_context *brw = p->brw; - if (brw->gen >= 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) { + const struct brw_device_info *devinfo = p->devinfo; + if (devinfo->gen >= 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) { reg->file = BRW_GENERAL_REGISTER_FILE; reg->nr += GEN7_MRF_HACK_START; } diff --git a/src/mesa/drivers/dri/i965/brw_ff_gs.c b/src/mesa/drivers/dri/i965/brw_ff_gs.c index a7d061a..996e560 100644 --- a/src/mesa/drivers/dri/i965/brw_ff_gs.c +++ b/src/mesa/drivers/dri/i965/brw_ff_gs.c @@ -64,7 +64,7 @@ brw_compile_ff_gs_prog(struct brw_context *brw, /* Begin the compilation: */ - brw_init_compile(brw, &c.func, mem_ctx); + brw_init_compile(brw->intelScreen->devinfo, &c.func, mem_ctx); c.func.single_program_flow = 1; diff --git a/src/mesa/drivers/dri/i965/brw_ff_gs_emit.c b/src/mesa/drivers/dri/i965/brw_ff_gs_emit.c index e7e8e02..314443c 100644 --- a/src/mesa/drivers/dri/i965/brw_ff_gs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_ff_gs_emit.c @@ -244,14 +244,12 @@ static void brw_ff_gs_ff_sync(struct brw_ff_gs_compile *c, int num_prim) void brw_ff_gs_quads(struct brw_ff_gs_compile *c, struct brw_ff_gs_prog_key *key) { - struct brw_context *brw = c->func.brw; - brw_ff_gs_alloc_regs(c, 4, false); brw_ff_gs_initialize_header(c); /* Use polygons for correct edgeflag behaviour. Note that vertex 3 * is the PV for quads, but vertex 0 for polygons: */ - if (brw->gen == 5) + if (c->func.devinfo->gen == 5) brw_ff_gs_ff_sync(c, 1); brw_ff_gs_overwrite_header_dw2( c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT) @@ -284,12 +282,10 @@ void brw_ff_gs_quad_strip(struct brw_ff_gs_compile *c, struct brw_ff_gs_prog_key *key) { - struct brw_context *brw = c->func.brw; - brw_ff_gs_alloc_regs(c, 4, false); brw_ff_gs_initialize_header(c); - if (brw->gen == 5) + if (c->func.devinfo->gen == 5) brw_ff_gs_ff_sync(c, 1); brw_ff_gs_overwrite_header_dw2( c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT) @@ -320,12 +316,10 @@ brw_ff_gs_quad_strip(struct brw_ff_gs_compile *c, void brw_ff_gs_lines(struct brw_ff_gs_compile *c) { - struct brw_context *brw = c->func.brw; - brw_ff_gs_alloc_regs(c, 2, false); brw_ff_gs_initialize_header(c); - if (brw->gen == 5) + if (c->func.devinfo->gen == 5) brw_ff_gs_ff_sync(c, 1); brw_ff_gs_overwrite_header_dw2( c, ((_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT) diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 6b4362b..3e103b2 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -139,7 +139,7 @@ fs_generator::fs_generator(struct brw_context *brw, ctx = &brw->ctx; p = rzalloc(mem_ctx, struct brw_compile); - brw_init_compile(brw, p, mem_ctx); + brw_init_compile(brw->intelScreen->devinfo, p, mem_ctx); } fs_generator::~fs_generator() diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c index 47bc454..f056d8c 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -60,7 +60,7 @@ static void compile_sf_prog( struct brw_context *brw, mem_ctx = ralloc_context(NULL); /* Begin the compilation: */ - brw_init_compile(brw, &c.func, mem_ctx); + brw_init_compile(brw->intelScreen->devinfo, &c.func, mem_ctx); c.key = *key; c.vue_map = brw->vue_map_geom_out; diff --git a/src/mesa/drivers/dri/i965/brw_sf_emit.c b/src/mesa/drivers/dri/i965/brw_sf_emit.c index a16303b..31ec69c 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_emit.c +++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c @@ -192,7 +192,6 @@ static int count_flatshaded_attributes(struct brw_sf_compile *c) static void do_flatshade_triangle( struct brw_sf_compile *c ) { struct brw_compile *p = &c->func; - struct brw_context *brw = p->brw; GLuint nr; GLuint jmpi = 1; @@ -201,7 +200,7 @@ static void do_flatshade_triangle( struct brw_sf_compile *c ) if (c->key.primitive == SF_UNFILLED_TRIS) return; - if (brw->gen == 5) + if (p->devinfo->gen == 5) jmpi = 2; nr = count_flatshaded_attributes(c); @@ -225,7 +224,6 @@ static void do_flatshade_triangle( struct brw_sf_compile *c ) static void do_flatshade_line( struct brw_sf_compile *c ) { struct brw_compile *p = &c->func; - struct brw_context *brw = p->brw; GLuint nr; GLuint jmpi = 1; @@ -234,7 +232,7 @@ static void do_flatshade_line( struct brw_sf_compile *c ) if (c->key.primitive == SF_UNFILLED_TRIS) return; - if (brw->gen == 5) + if (p->devinfo->gen == 5) jmpi = 2; nr = count_flatshaded_attributes(c); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index 427f917..94ab32d 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -147,7 +147,7 @@ vec4_generator::vec4_generator(struct brw_context *brw, debug_flag(debug_flag) { p = rzalloc(mem_ctx, struct brw_compile); - brw_init_compile(brw, p, mem_ctx); + brw_init_compile(brw->intelScreen->devinfo, p, mem_ctx); } vec4_generator::~vec4_generator() diff --git a/src/mesa/drivers/dri/i965/test_eu_compact.c b/src/mesa/drivers/dri/i965/test_eu_compact.c index c80cc89..0343162 100644 --- a/src/mesa/drivers/dri/i965/test_eu_compact.c +++ b/src/mesa/drivers/dri/i965/test_eu_compact.c @@ -250,14 +250,14 @@ struct { }; static bool -run_tests(struct brw_context *brw) +run_tests(const struct brw_device_info *devinfo) { bool fail = false; for (int i = 0; i < ARRAY_SIZE(tests); i++) { for (int align_16 = 0; align_16 <= 1; align_16++) { struct brw_compile *p = rzalloc(NULL, struct brw_compile); - brw_init_compile(brw, p, p); + brw_init_compile(devinfo, p, p); brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); if (align_16) @@ -288,15 +288,12 @@ run_tests(struct brw_context *brw) int main(int argc, char **argv) { - struct brw_context *brw = calloc(1, sizeof(*brw)); struct brw_device_info *devinfo = calloc(1, sizeof(*devinfo)); - brw->intelScreen = calloc(1, sizeof(*brw->intelScreen)); - brw->intelScreen->devinfo = devinfo; - brw->gen = devinfo->gen = 6; + devinfo->gen = 6; bool fail = false; - for (brw->gen = 6; brw->gen <= 7; brw->gen++) { - fail |= run_tests(brw); + for (devinfo->gen = 6; devinfo->gen <= 7; devinfo->gen++) { + fail |= run_tests(devinfo); } return fail; From jekstrand at kemper.freedesktop.org Wed Apr 22 23:01:35 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 22 Apr 2015 16:01:35 -0700 (PDT) Subject: Mesa (master): i965/fs: Remove the GL context from the generator Message-ID: <20150422230135.BED8476102@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 5cb91db619e8689c8fd45a67b57b96fc36f4ca9b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5cb91db619e8689c8fd45a67b57b96fc36f4ca9b Author: Jason Ekstrand Date: Wed Apr 15 14:51:18 2015 -0700 i965/fs: Remove the GL context from the generator Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs.h | 1 - src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 11 +---------- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 24ca43c..c429070 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -639,7 +639,6 @@ private: bool patch_discard_jumps_to_fb_writes(); struct brw_context *brw; - struct gl_context *ctx; struct brw_compile *p; const void * const key; diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 3e103b2..539619a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -136,8 +136,6 @@ fs_generator::fs_generator(struct brw_context *brw, runtime_check_aads_emit(runtime_check_aads_emit), debug_flag(false), stage_abbrev(stage_abbrev), mem_ctx(mem_ctx) { - ctx = &brw->ctx; - p = rzalloc(mem_ctx, struct brw_compile); brw_init_compile(brw->intelScreen->devinfo, p, mem_ctx); } @@ -2091,14 +2089,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) break; default: - if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) { - _mesa_problem(ctx, "Unsupported opcode `%s' in %s", - opcode_descs[inst->opcode].name, stage_abbrev); - } else { - _mesa_problem(ctx, "Unsupported opcode %d in %s", inst->opcode, - stage_abbrev); - } - abort(); + unreachable("Unsupported opcode"); case SHADER_OPCODE_LOAD_PAYLOAD: unreachable("Should be lowered by lower_load_payload()"); From jekstrand at kemper.freedesktop.org Wed Apr 22 23:01:35 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 22 Apr 2015 16:01:35 -0700 (PDT) Subject: Mesa (master): i965/device_info: Add a HSW_FEATURES macro Message-ID: <20150422230135.D5F6D76102@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 85db2aca52acd7f05e1d8cb3300c64f0307e39a9 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=85db2aca52acd7f05e1d8cb3300c64f0307e39a9 Author: Jason Ekstrand Date: Thu Apr 16 17:50:43 2015 -0700 i965/device_info: Add a HSW_FEATURES macro It's basically just a copy of GEN7_FEATURES only with is_haswell set Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_device_info.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_device_info.c b/src/mesa/drivers/dri/i965/brw_device_info.c index c4350b4..bdbe78f 100644 --- a/src/mesa/drivers/dri/i965/brw_device_info.c +++ b/src/mesa/drivers/dri/i965/brw_device_info.c @@ -164,8 +164,12 @@ static const struct brw_device_info brw_device_info_byt = { }, }; +#define HSW_FEATURES \ + GEN7_FEATURES, \ + .is_haswell = true + static const struct brw_device_info brw_device_info_hsw_gt1 = { - GEN7_FEATURES, .is_haswell = true, .gt = 1, + HSW_FEATURES, .gt = 1, .max_vs_threads = 70, .max_hs_threads = 70, .max_ds_threads = 70, @@ -182,7 +186,7 @@ static const struct brw_device_info brw_device_info_hsw_gt1 = { }; static const struct brw_device_info brw_device_info_hsw_gt2 = { - GEN7_FEATURES, .is_haswell = true, .gt = 2, + HSW_FEATURES, .gt = 2, .max_vs_threads = 280, .max_hs_threads = 256, .max_ds_threads = 280, @@ -199,7 +203,7 @@ static const struct brw_device_info brw_device_info_hsw_gt2 = { }; static const struct brw_device_info brw_device_info_hsw_gt3 = { - GEN7_FEATURES, .is_haswell = true, .gt = 3, + HSW_FEATURES, .gt = 3, .max_vs_threads = 280, .max_hs_threads = 256, .max_ds_threads = 280, From jekstrand at kemper.freedesktop.org Wed Apr 22 23:01:35 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 22 Apr 2015 16:01:35 -0700 (PDT) Subject: Mesa (master): i965/fs: Add a devinfo field to the generator and use it for gen checks Message-ID: <20150422230135.F075876102@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 5bda1ff1bec0e01300a4e8e3da38168958df9ebc URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5bda1ff1bec0e01300a4e8e3da38168958df9ebc Author: Jason Ekstrand Date: Tue Apr 14 17:45:40 2015 -0700 i965/fs: Add a devinfo field to the generator and use it for gen checks Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs.h | 1 + src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 116 ++++++++++++------------ 2 files changed, 58 insertions(+), 59 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index c429070..1d3f83d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -639,6 +639,7 @@ private: bool patch_discard_jumps_to_fb_writes(); struct brw_context *brw; + const struct brw_device_info *devinfo; struct brw_compile *p; const void * const key; diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 1ef2d1d..ce5ad83 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -130,14 +130,14 @@ fs_generator::fs_generator(struct brw_context *brw, bool runtime_check_aads_emit, const char *stage_abbrev) - : brw(brw), key(key), + : brw(brw), devinfo(brw->intelScreen->devinfo), key(key), prog_data(prog_data), prog(prog), promoted_constants(promoted_constants), runtime_check_aads_emit(runtime_check_aads_emit), debug_flag(false), stage_abbrev(stage_abbrev), mem_ctx(mem_ctx) { p = rzalloc(mem_ctx, struct brw_compile); - brw_init_compile(brw->intelScreen->devinfo, p, mem_ctx); + brw_init_compile(devinfo, p, mem_ctx); } fs_generator::~fs_generator() @@ -159,7 +159,7 @@ public: bool fs_generator::patch_discard_jumps_to_fb_writes() { - if (brw->gen < 6 || this->discard_halt_patches.is_empty()) + if (devinfo->gen < 6 || this->discard_halt_patches.is_empty()) return false; int scale = brw_jump_scale(p->devinfo); @@ -203,7 +203,7 @@ fs_generator::fire_fb_write(fs_inst *inst, brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data; - if (brw->gen < 6) { + if (devinfo->gen < 6) { brw_push_insn_state(p); brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_set_default_mask_control(p, BRW_MASK_DISABLE); @@ -254,7 +254,7 @@ fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload) const brw_wm_prog_key * const key = (brw_wm_prog_key * const) this->key; struct brw_reg implied_header; - if (brw->gen < 8 && !brw->is_haswell) { + if (devinfo->gen < 8 && !devinfo->is_haswell) { brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); } @@ -277,7 +277,7 @@ fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload) if (prog_data->uses_kill) { struct brw_reg pixel_mask; - if (brw->gen >= 6) + if (devinfo->gen >= 6) pixel_mask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW); else pixel_mask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); @@ -285,7 +285,7 @@ fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload) brw_MOV(p, pixel_mask, brw_flag_reg(0, 1)); } - if (brw->gen >= 6) { + if (devinfo->gen >= 6) { brw_push_insn_state(p); brw_set_default_exec_size(p, BRW_EXECUTE_16); brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED); @@ -325,7 +325,7 @@ fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload) fire_fb_write(inst, payload, implied_header, inst->mlen); } else { /* This can only happen in gen < 6 */ - assert(brw->gen < 6); + assert(devinfo->gen < 6); struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); @@ -413,8 +413,8 @@ fs_generator::generate_linterp(fs_inst *inst, struct brw_reg delta_y = offset(src[0], dispatch_width / 8); struct brw_reg interp = src[1]; - if (brw->has_pln && - (brw->gen >= 7 || (delta_x.nr & 1) == 0)) { + if (devinfo->has_pln && + (devinfo->gen >= 7 || (delta_x.nr & 1) == 0)) { brw_PLN(p, dst, interp, delta_x); } else { brw_LINE(p, brw_null_reg(), interp, delta_x); @@ -531,7 +531,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src unreachable("Invalid width for texture instruction"); } - if (brw->gen >= 5) { + if (devinfo->gen >= 5) { switch (inst->opcode) { case SHADER_OPCODE_TEX: if (inst->shadow_compare) { @@ -560,7 +560,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src case SHADER_OPCODE_TXD: if (inst->shadow_compare) { /* Gen7.5+. Otherwise, lowered by brw_lower_texture_gradients(). */ - assert(brw->gen >= 8 || brw->is_haswell); + assert(devinfo->gen >= 8 || devinfo->is_haswell); msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE; } else { msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS; @@ -570,17 +570,17 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD; break; case SHADER_OPCODE_TXF_CMS: - if (brw->gen >= 7) + if (devinfo->gen >= 7) msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS; else msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD; break; case SHADER_OPCODE_TXF_UMS: - assert(brw->gen >= 7); + assert(devinfo->gen >= 7); msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS; break; case SHADER_OPCODE_TXF_MCS: - assert(brw->gen >= 7); + assert(devinfo->gen >= 7); msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS; break; case SHADER_OPCODE_LOD: @@ -588,15 +588,15 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src break; case SHADER_OPCODE_TG4: if (inst->shadow_compare) { - assert(brw->gen >= 7); + assert(devinfo->gen >= 7); msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C; } else { - assert(brw->gen >= 6); + assert(devinfo->gen >= 6); msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4; } break; case SHADER_OPCODE_TG4_OFFSET: - assert(brw->gen >= 7); + assert(devinfo->gen >= 7); if (inst->shadow_compare) { msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C; } else { @@ -679,11 +679,11 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src } if (is_combined_send) { - assert(brw->gen >= 9 || brw->is_cherryview); + assert(devinfo->gen >= 9 || devinfo->is_cherryview); rlen = 0; } - assert(brw->gen < 7 || !inst->header_present || + assert(devinfo->gen < 7 || !inst->header_present || src.file == BRW_GENERAL_REGISTER_FILE); assert(sampler_index.type == BRW_REGISTER_TYPE_UD); @@ -693,13 +693,13 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src * Otherwise, we can use an implied move from g0 to the first message reg. */ if (inst->header_present) { - if (brw->gen < 6 && !inst->offset) { + if (devinfo->gen < 6 && !inst->offset) { /* Set up an implied move from g0 to the MRF. */ src = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW); } else { struct brw_reg header_reg; - if (brw->gen >= 7) { + if (devinfo->gen >= 7) { header_reg = src; } else { assert(inst->base_mrf != -1); @@ -890,7 +890,7 @@ fs_generator::generate_ddy(enum opcode opcode, */ bool unroll_to_simd8 = (dispatch_width == 16 && - (brw->gen == 4 || (brw->gen == 7 && !brw->is_haswell))); + (devinfo->gen == 4 || (devinfo->gen == 7 && !devinfo->is_haswell))); /* produce accurate derivatives */ struct brw_reg src0 = brw_reg(src.file, src.nr, 0, @@ -953,7 +953,7 @@ fs_generator::generate_ddy(enum opcode opcode, void fs_generator::generate_discard_jump(fs_inst *inst) { - assert(brw->gen >= 6); + assert(devinfo->gen >= 6); /* This HALT will be patched up at FB write time to point UIP at the end of * the program, and at brw_uip_jip() JIP will be set to the end of the @@ -1039,7 +1039,7 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst, bool header_present = false; int mlen = 1; - if (brw->gen >= 9) { + if (devinfo->gen >= 9) { /* Skylake requires a message header in order to use SIMD4x2 mode. */ src = retype(brw_vec4_grf(offset.nr - 1, 0), BRW_REGISTER_TYPE_UD); mlen = 2; @@ -1122,7 +1122,7 @@ fs_generator::generate_varying_pull_constant_load(fs_inst *inst, struct brw_reg index, struct brw_reg offset) { - assert(brw->gen < 7); /* Should use the gen7 variant. */ + assert(devinfo->gen < 7); /* Should use the gen7 variant. */ assert(inst->header_present); assert(inst->mlen); @@ -1139,7 +1139,7 @@ fs_generator::generate_varying_pull_constant_load(fs_inst *inst, rlen = 4; } - if (brw->gen >= 5) + if (devinfo->gen >= 5) msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD; else { /* We always use the SIMD16 message so that we only have to load U, and @@ -1163,7 +1163,7 @@ fs_generator::generate_varying_pull_constant_load(fs_inst *inst, brw_inst_set_qtr_control(p->devinfo, send, BRW_COMPRESSION_NONE); brw_set_dest(p, send, retype(dst, BRW_REGISTER_TYPE_UW)); brw_set_src0(p, send, header); - if (brw->gen < 6) + if (devinfo->gen < 6) brw_inst_set_base_mrf(p->devinfo, send, inst->base_mrf); /* Our surface is set up as floats, regardless of what actual data is @@ -1189,7 +1189,7 @@ fs_generator::generate_varying_pull_constant_load_gen7(fs_inst *inst, struct brw_reg index, struct brw_reg offset) { - assert(brw->gen >= 7); + assert(devinfo->gen >= 7); /* Varying-offset pull constant loads are treated as a normal expression on * gen7, so the fact that it's a send message is hidden at the IR level. */ @@ -1276,7 +1276,7 @@ fs_generator::generate_mov_dispatch_to_flags(fs_inst *inst) struct brw_reg flags = brw_flag_reg(0, inst->flag_subreg); struct brw_reg dispatch_mask; - if (brw->gen >= 6) + if (devinfo->gen >= 6) dispatch_mask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW); else dispatch_mask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); @@ -1395,7 +1395,7 @@ fs_generator::generate_pack_half_2x16_split(fs_inst *inst, struct brw_reg x, struct brw_reg y) { - assert(brw->gen >= 7); + assert(devinfo->gen >= 7); assert(dst.type == BRW_REGISTER_TYPE_UD); assert(x.type == BRW_REGISTER_TYPE_F); assert(y.type == BRW_REGISTER_TYPE_F); @@ -1433,7 +1433,7 @@ fs_generator::generate_unpack_half_2x16_split(fs_inst *inst, struct brw_reg dst, struct brw_reg src) { - assert(brw->gen >= 7); + assert(devinfo->gen >= 7); assert(dst.type == BRW_REGISTER_TYPE_F); assert(src.type == BRW_REGISTER_TYPE_UD); @@ -1463,7 +1463,7 @@ fs_generator::generate_shader_time_add(fs_inst *inst, struct brw_reg offset, struct brw_reg value) { - assert(brw->gen >= 7); + assert(devinfo->gen >= 7); brw_push_insn_state(p); brw_set_default_mask_control(p, true); @@ -1537,8 +1537,6 @@ fs_generator::enable_debug(const char *shader_name) int fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) { - const struct brw_device_info *devinfo = brw->intelScreen->devinfo; - /* align to 64 byte boundary. */ while (p->next_insn_offset % 64) brw_NOP(p); @@ -1632,7 +1630,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) break; case BRW_OPCODE_MAD: - assert(brw->gen >= 6); + assert(devinfo->gen >= 6); brw_set_default_access_mode(p, BRW_ALIGN_16); if (dispatch_width == 16 && !devinfo->supports_simd16_3src) { brw_set_default_exec_size(p, BRW_EXECUTE_8); @@ -1654,7 +1652,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) break; case BRW_OPCODE_LRP: - assert(brw->gen >= 6); + assert(devinfo->gen >= 6); brw_set_default_access_mode(p, BRW_ALIGN_16); if (dispatch_width == 16 && !devinfo->supports_simd16_3src) { brw_set_default_exec_size(p, BRW_EXECUTE_8); @@ -1710,11 +1708,11 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) brw_SHL(p, dst, src[0], src[1]); break; case BRW_OPCODE_F32TO16: - assert(brw->gen >= 7); + assert(devinfo->gen >= 7); brw_F32TO16(p, dst, src[0]); break; case BRW_OPCODE_F16TO32: - assert(brw->gen >= 7); + assert(devinfo->gen >= 7); brw_F16TO32(p, dst, src[0]); break; case BRW_OPCODE_CMP: @@ -1729,7 +1727,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) * coissuing would affect CMP instructions not otherwise affected by * the errata. */ - if (dispatch_width == 16 && brw->gen == 7 && !brw->is_haswell) { + if (dispatch_width == 16 && devinfo->gen == 7 && !devinfo->is_haswell) { if (dst.file == BRW_GENERAL_REGISTER_FILE) { brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); @@ -1760,32 +1758,32 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) brw_SEL(p, dst, src[0], src[1]); break; case BRW_OPCODE_BFREV: - assert(brw->gen >= 7); + assert(devinfo->gen >= 7); /* BFREV only supports UD type for src and dst. */ brw_BFREV(p, retype(dst, BRW_REGISTER_TYPE_UD), retype(src[0], BRW_REGISTER_TYPE_UD)); break; case BRW_OPCODE_FBH: - assert(brw->gen >= 7); + assert(devinfo->gen >= 7); /* FBH only supports UD type for dst. */ brw_FBH(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]); break; case BRW_OPCODE_FBL: - assert(brw->gen >= 7); + assert(devinfo->gen >= 7); /* FBL only supports UD type for dst. */ brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]); break; case BRW_OPCODE_CBIT: - assert(brw->gen >= 7); + assert(devinfo->gen >= 7); /* CBIT only supports UD type for dst. */ brw_CBIT(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]); break; case BRW_OPCODE_ADDC: - assert(brw->gen >= 7); + assert(devinfo->gen >= 7); brw_ADDC(p, dst, src[0], src[1]); break; case BRW_OPCODE_SUBB: - assert(brw->gen >= 7); + assert(devinfo->gen >= 7); brw_SUBB(p, dst, src[0], src[1]); break; case BRW_OPCODE_MAC: @@ -1793,7 +1791,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) break; case BRW_OPCODE_BFE: - assert(brw->gen >= 7); + assert(devinfo->gen >= 7); brw_set_default_access_mode(p, BRW_ALIGN_16); if (dispatch_width == 16 && !devinfo->supports_simd16_3src) { brw_set_default_exec_size(p, BRW_EXECUTE_8); @@ -1809,13 +1807,13 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) break; case BRW_OPCODE_BFI1: - assert(brw->gen >= 7); + assert(devinfo->gen >= 7); /* The Haswell WaForceSIMD8ForBFIInstruction workaround says that we * should * * "Force BFI instructions to be executed always in SIMD8." */ - if (dispatch_width == 16 && brw->is_haswell) { + if (dispatch_width == 16 && devinfo->is_haswell) { brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); brw_BFI1(p, firsthalf(dst), firsthalf(src[0]), firsthalf(src[1])); @@ -1827,7 +1825,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) } break; case BRW_OPCODE_BFI2: - assert(brw->gen >= 7); + assert(devinfo->gen >= 7); brw_set_default_access_mode(p, BRW_ALIGN_16); /* The Haswell WaForceSIMD8ForBFIInstruction workaround says that we * should @@ -1838,7 +1836,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) * do for the other three-source instructions. */ if (dispatch_width == 16 && - (brw->is_haswell || !devinfo->supports_simd16_3src)) { + (devinfo->is_haswell || !devinfo->supports_simd16_3src)) { brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); brw_BFI2(p, firsthalf(dst), firsthalf(src[0]), firsthalf(src[1]), firsthalf(src[2])); @@ -1854,7 +1852,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) case BRW_OPCODE_IF: if (inst->src[0].file != BAD_FILE) { /* The instruction has an embedded compare (only allowed on gen6) */ - assert(brw->gen == 6); + assert(devinfo->gen == 6); gen6_IF(p, inst->conditional_mod, src[0], src[1]); } else { brw_IF(p, dispatch_width == 16 ? BRW_EXECUTE_16 : BRW_EXECUTE_8); @@ -1893,14 +1891,14 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) case SHADER_OPCODE_LOG2: case SHADER_OPCODE_SIN: case SHADER_OPCODE_COS: - assert(brw->gen < 6 || inst->mlen == 0); + assert(devinfo->gen < 6 || inst->mlen == 0); assert(inst->conditional_mod == BRW_CONDITIONAL_NONE); - if (brw->gen >= 7) { + if (devinfo->gen >= 7) { gen6_math(p, dst, brw_math_function(inst->opcode), src[0], brw_null_reg()); - } else if (brw->gen == 6) { + } else if (devinfo->gen == 6) { generate_math_gen6(inst, dst, src[0], brw_null_reg()); - } else if (brw->gen == 5 || brw->is_g4x) { + } else if (devinfo->gen == 5 || devinfo->is_g4x) { generate_math_g45(inst, dst, src[0]); } else { generate_math_gen4(inst, dst, src[0]); @@ -1909,11 +1907,11 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) case SHADER_OPCODE_INT_QUOTIENT: case SHADER_OPCODE_INT_REMAINDER: case SHADER_OPCODE_POW: - assert(brw->gen < 6 || inst->mlen == 0); + assert(devinfo->gen < 6 || inst->mlen == 0); assert(inst->conditional_mod == BRW_CONDITIONAL_NONE); - if (brw->gen >= 7 && inst->opcode == SHADER_OPCODE_POW) { + if (devinfo->gen >= 7 && inst->opcode == SHADER_OPCODE_POW) { gen6_math(p, dst, brw_math_function(inst->opcode), src[0], src[1]); - } else if (brw->gen >= 6) { + } else if (devinfo->gen >= 6) { generate_math_gen6(inst, dst, src[0], src[1]); } else { generate_math_gen4(inst, dst, src[0]); From jekstrand at kemper.freedesktop.org Wed Apr 22 23:01:36 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 22 Apr 2015 16:01:36 -0700 (PDT) Subject: Mesa (master): i965: Use device_info instead of the context in instruction scheduling Message-ID: <20150422230136.391B776102@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 02ccb19495c5804f4ab798c00eeb786ee77aaacc URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=02ccb19495c5804f4ab798c00eeb786ee77aaacc Author: Jason Ekstrand Date: Fri Apr 17 12:15:58 2015 -0700 i965: Use device_info instead of the context in instruction scheduling Reviewed-by: Matt Turner --- .../drivers/dri/i965/brw_schedule_instructions.cpp | 21 ++++++++++---------- src/mesa/drivers/dri/i965/brw_shader.cpp | 4 ++-- src/mesa/drivers/dri/i965/brw_shader.h | 2 +- 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp index 56f69ea..a8bd93d 100644 --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp @@ -602,7 +602,7 @@ vec4_instruction_scheduler::get_register_pressure_benefit(backend_instruction *b schedule_node::schedule_node(backend_instruction *inst, instruction_scheduler *sched) { - struct brw_context *brw = sched->bv->brw; + const struct brw_device_info *devinfo = sched->bv->devinfo; this->inst = inst; this->child_array_size = 0; @@ -619,8 +619,8 @@ schedule_node::schedule_node(backend_instruction *inst, */ if (!sched->post_reg_alloc) this->latency = 1; - else if (brw->gen >= 6) - set_latency_gen7(brw->is_haswell); + else if (devinfo->gen >= 6) + set_latency_gen7(devinfo->is_haswell); else set_latency_gen4(); } @@ -896,7 +896,7 @@ fs_instruction_scheduler::calculate_deps() last_conditional_mod[inst->flag_subreg] = n; } - if (inst->writes_accumulator_implicitly(v->brw) && + if (inst->writes_accumulator_implicitly(v->devinfo) && !inst->dst.is_accumulator()) { add_dep(last_accumulator_write, n); last_accumulator_write = n; @@ -1021,7 +1021,7 @@ fs_instruction_scheduler::calculate_deps() last_conditional_mod[inst->flag_subreg] = n; } - if (inst->writes_accumulator_implicitly(v->brw)) { + if (inst->writes_accumulator_implicitly(v->devinfo)) { last_accumulator_write = n; } } @@ -1136,7 +1136,7 @@ vec4_instruction_scheduler::calculate_deps() last_conditional_mod = n; } - if (inst->writes_accumulator_implicitly(v->brw) && + if (inst->writes_accumulator_implicitly(v->devinfo) && !inst->dst.is_accumulator()) { add_dep(last_accumulator_write, n); last_accumulator_write = n; @@ -1226,7 +1226,7 @@ vec4_instruction_scheduler::calculate_deps() last_conditional_mod = n; } - if (inst->writes_accumulator_implicitly(v->brw)) { + if (inst->writes_accumulator_implicitly(v->devinfo)) { last_accumulator_write = n; } } @@ -1235,7 +1235,6 @@ vec4_instruction_scheduler::calculate_deps() schedule_node * fs_instruction_scheduler::choose_instruction_to_schedule() { - struct brw_context *brw = v->brw; schedule_node *chosen = NULL; if (mode == SCHEDULE_PRE || mode == SCHEDULE_POST) { @@ -1303,7 +1302,7 @@ fs_instruction_scheduler::choose_instruction_to_schedule() * then the MRFs for the next SEND, then the next SEND, then the * MRFs, etc., without ever consuming the results of a send. */ - if (brw->gen < 7) { + if (v->devinfo->gen < 7) { fs_inst *chosen_inst = (fs_inst *)chosen->inst; /* We use regs_written > 1 as our test for the kind of send @@ -1381,7 +1380,7 @@ vec4_instruction_scheduler::issue_time(backend_instruction *inst) void instruction_scheduler::schedule_instructions(bblock_t *block) { - struct brw_context *brw = bv->brw; + const struct brw_device_info *devinfo = bv->devinfo; backend_instruction *inst = block->end(); time = 0; @@ -1451,7 +1450,7 @@ instruction_scheduler::schedule_instructions(bblock_t *block) * the next math instruction isn't going to make progress until the first * is done. */ - if (brw->gen < 6 && chosen->inst->is_math()) { + if (devinfo->gen < 6 && chosen->inst->is_math()) { foreach_in_list(schedule_node, n, &instructions) { if (n->inst->is_math()) n->unblocked_time = MAX2(n->unblocked_time, diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index d90eac1..2e34057 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -999,10 +999,10 @@ backend_instruction::reads_accumulator_implicitly() const } bool -backend_instruction::writes_accumulator_implicitly(struct brw_context *brw) const +backend_instruction::writes_accumulator_implicitly(const struct brw_device_info *devinfo) const { return writes_accumulator || - (brw->gen < 6 && + (devinfo->gen < 6 && ((opcode >= BRW_OPCODE_ADD && opcode < BRW_OPCODE_NOP) || (opcode >= FS_OPCODE_DDX_COARSE && opcode <= FS_OPCODE_LINTERP && opcode != FS_OPCODE_CINTERP))); diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index b80e740..72e8be5 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -100,7 +100,7 @@ struct backend_instruction : public exec_node { bool can_do_saturate() const; bool can_do_cmod() const; bool reads_accumulator_implicitly() const; - bool writes_accumulator_implicitly(struct brw_context *brw) const; + bool writes_accumulator_implicitly(const struct brw_device_info *devinfo) const; void remove(bblock_t *block); void insert_after(bblock_t *block, backend_instruction *inst); From jekstrand at kemper.freedesktop.org Wed Apr 22 23:01:36 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 22 Apr 2015 16:01:36 -0700 (PDT) Subject: Mesa (master): i965: Use device_info instead of the context for computing vue maps Message-ID: <20150422230136.4636376102@kemper.freedesktop.org> Module: Mesa Branch: master Commit: cfc56fcee36912d5fb41262c71463292a737160e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=cfc56fcee36912d5fb41262c71463292a737160e Author: Jason Ekstrand Date: Fri Apr 17 12:52:00 2015 -0700 i965: Use device_info instead of the context for computing vue maps Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_context.h | 3 ++- src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +- src/mesa/drivers/dri/i965/brw_gs.c | 6 ++++-- src/mesa/drivers/dri/i965/brw_vs.c | 8 +++++--- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 3707fd0..5724932 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -507,7 +507,8 @@ static inline GLuint brw_varying_to_offset(struct brw_vue_map *vue_map, return brw_vue_slot_to_offset(vue_map->varying_to_slot[varying]); } -void brw_compute_vue_map(struct brw_context *brw, struct brw_vue_map *vue_map, +void brw_compute_vue_map(const struct brw_device_info *devinfo, + struct brw_vue_map *vue_map, GLbitfield64 slots_valid); diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 5f83470..2725270 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1793,7 +1793,7 @@ fs_visitor::calculate_urb_setup() * (geometry or vertex shader). */ struct brw_vue_map prev_stage_vue_map; - brw_compute_vue_map(brw, &prev_stage_vue_map, + brw_compute_vue_map(devinfo, &prev_stage_vue_map, key->input_slots_valid); int first_slot = 2 * BRW_SF_URB_ENTRY_READ_OFFSET; assert(prev_stage_vue_map.num_slots <= first_slot + 32); diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index bea90d8..74be9c4 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -126,7 +126,8 @@ brw_compile_gs_prog(struct brw_context *brw, outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1); } - brw_compute_vue_map(brw, &c.prog_data.base.vue_map, outputs_written); + brw_compute_vue_map(brw->intelScreen->devinfo, + &c.prog_data.base.vue_map, outputs_written); /* Compute the output vertex size. * @@ -248,7 +249,8 @@ brw_compile_gs_prog(struct brw_context *brw, c.prog_data.output_topology = get_hw_prim_for_gl_prim(gp->program.OutputType); - brw_compute_vue_map(brw, &c.input_vue_map, c.key.input_varyings); + brw_compute_vue_map(brw->intelScreen->devinfo, + &c.input_vue_map, c.key.input_varyings); /* GS inputs are read from the VUE 256 bits (2 vec4's) at a time, so we * need to program a URB read length of ceiling(num_slots / 2). diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index dabff43..2ff1a67 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -54,7 +54,8 @@ static inline void assign_vue_slot(struct brw_vue_map *vue_map, * Compute the VUE map for vertex shader program. */ void -brw_compute_vue_map(struct brw_context *brw, struct brw_vue_map *vue_map, +brw_compute_vue_map(const struct brw_device_info *devinfo, + struct brw_vue_map *vue_map, GLbitfield64 slots_valid) { vue_map->slots_valid = slots_valid; @@ -82,7 +83,7 @@ brw_compute_vue_map(struct brw_context *brw, struct brw_vue_map *vue_map, /* VUE header: format depends on chip generation and whether clipping is * enabled. */ - if (brw->gen < 6) { + if (devinfo->gen < 6) { /* There are 8 dwords in VUE header pre-Ironlake: * dword 0-3 is indices, point width, clip flags. * dword 4-7 is ndc position @@ -279,7 +280,8 @@ brw_compile_vs_prog(struct brw_context *brw, outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1); } - brw_compute_vue_map(brw, &prog_data.base.vue_map, outputs_written); + brw_compute_vue_map(brw->intelScreen->devinfo, + &prog_data.base.vue_map, outputs_written); if (0) { _mesa_fprint_program_opt(stderr, &c.vp->program.Base, PROG_PRINT_DEBUG, From jekstrand at kemper.freedesktop.org Wed Apr 22 23:01:36 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 22 Apr 2015 16:01:36 -0700 (PDT) Subject: Mesa (master): i965: Rename brw_compile to brw_codegen Message-ID: <20150422230136.5DC3D76102@kemper.freedesktop.org> Module: Mesa Branch: master Commit: a85c4c9b3f75cac9ab133caa91a40eec2e4816ae URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a85c4c9b3f75cac9ab133caa91a40eec2e4816ae Author: Jason Ekstrand Date: Thu Apr 16 11:06:57 2015 -0700 i965: Rename brw_compile to brw_codegen This name better matches what it's actually used for. The patch was generated with the following command: for file in *; do sed -i -e s/brw_compile/brw_codegen/g $file done Signed-off-by: Jason Ekstrand Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_clip.c | 2 +- src/mesa/drivers/dri/i965/brw_clip.h | 2 +- src/mesa/drivers/dri/i965/brw_clip_line.c | 2 +- src/mesa/drivers/dri/i965/brw_clip_tri.c | 16 +-- src/mesa/drivers/dri/i965/brw_clip_unfilled.c | 22 ++-- src/mesa/drivers/dri/i965/brw_clip_util.c | 20 +-- src/mesa/drivers/dri/i965/brw_eu.c | 28 ++--- src/mesa/drivers/dri/i965/brw_eu.h | 128 +++++++++---------- src/mesa/drivers/dri/i965/brw_eu_compact.c | 2 +- src/mesa/drivers/dri/i965/brw_eu_emit.c | 144 +++++++++++----------- src/mesa/drivers/dri/i965/brw_eu_util.c | 10 +- src/mesa/drivers/dri/i965/brw_ff_gs.c | 6 +- src/mesa/drivers/dri/i965/brw_ff_gs.h | 4 +- src/mesa/drivers/dri/i965/brw_ff_gs_emit.c | 14 +-- src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +- src/mesa/drivers/dri/i965/brw_fs.h | 2 +- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 4 +- src/mesa/drivers/dri/i965/brw_gs.c | 6 +- src/mesa/drivers/dri/i965/brw_gs.h | 2 +- src/mesa/drivers/dri/i965/brw_sf.c | 2 +- src/mesa/drivers/dri/i965/brw_sf.h | 2 +- src/mesa/drivers/dri/i965/brw_sf_emit.c | 24 ++-- src/mesa/drivers/dri/i965/brw_vec4.h | 2 +- src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 4 +- src/mesa/drivers/dri/i965/brw_vs.c | 6 +- src/mesa/drivers/dri/i965/brw_vs.h | 2 +- src/mesa/drivers/dri/i965/brw_wm.c | 4 +- src/mesa/drivers/dri/i965/brw_wm.h | 2 +- src/mesa/drivers/dri/i965/test_eu_compact.c | 28 ++--- 29 files changed, 246 insertions(+), 246 deletions(-) Diff: http://cgit.freedesktop.org/mesa/mesa/diff/?id=a85c4c9b3f75cac9ab133caa91a40eec2e4816ae From jekstrand at kemper.freedesktop.org Wed Apr 22 23:01:35 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 22 Apr 2015 16:01:35 -0700 (PDT) Subject: Mesa (master): i965/device_info: Add a supports_simd16_3src flag Message-ID: <20150422230135.E421776102@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 38dc2ddab4a25398ebd07e1ecf52daa7466b6963 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=38dc2ddab4a25398ebd07e1ecf52daa7466b6963 Author: Jason Ekstrand Date: Thu Apr 16 17:52:03 2015 -0700 i965/device_info: Add a supports_simd16_3src flag This also involves moving revision checking to screen creation time and passing that into brw_get_device_info so that we can get the right device_info for early versions of SKL. Since the only place we used revision was to check for SIMD16 3-src instruction support, it's safe to remove the revision field from brw_context. Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_context.c | 24 -------------------- src/mesa/drivers/dri/i965/brw_context.h | 4 ---- src/mesa/drivers/dri/i965/brw_device_info.c | 29 +++++++++++++++++++----- src/mesa/drivers/dri/i965/brw_device_info.h | 3 ++- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 25 +++++--------------- src/mesa/drivers/dri/i965/intel_screen.c | 26 ++++++++++++++++++++- 6 files changed, 56 insertions(+), 55 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index c7e1e81..9d90360 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -669,29 +669,6 @@ brw_process_driconf_options(struct brw_context *brw) driQueryOptionb(options, "allow_glsl_extension_directive_midshader"); } -/* drop when libdrm 2.4.61 is released */ -#ifndef I915_PARAM_REVISION -#define I915_PARAM_REVISION 32 -#endif - -static int -brw_get_revision(int fd) -{ - struct drm_i915_getparam gp; - int revision; - int ret; - - memset(&gp, 0, sizeof(gp)); - gp.param = I915_PARAM_REVISION; - gp.value = &revision; - - ret = drmCommandWriteRead(fd, DRM_I915_GETPARAM, &gp, sizeof(gp)); - if (ret) - revision = -1; - - return revision; -} - GLboolean brwCreateContext(gl_api api, const struct gl_config *mesaVis, @@ -750,7 +727,6 @@ brwCreateContext(gl_api api, brw->has_negative_rhw_bug = devinfo->has_negative_rhw_bug; brw->needs_unlit_centroid_workaround = devinfo->needs_unlit_centroid_workaround; - brw->revision = brw_get_revision(sPriv->fd); brw->must_use_separate_stencil = screen->hw_must_use_separate_stencil; brw->has_swizzling = screen->hw_has_swizzling; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 80556ea..3707fd0 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1083,10 +1083,6 @@ struct brw_context int gen; int gt; - /* GT revision. This will be -1 if the revision couldn't be determined (eg, - * if the kernel doesn't support the query). - */ - int revision; bool is_g4x; bool is_baytrail; diff --git a/src/mesa/drivers/dri/i965/brw_device_info.c b/src/mesa/drivers/dri/i965/brw_device_info.c index bdbe78f..928bf99 100644 --- a/src/mesa/drivers/dri/i965/brw_device_info.c +++ b/src/mesa/drivers/dri/i965/brw_device_info.c @@ -166,7 +166,8 @@ static const struct brw_device_info brw_device_info_byt = { #define HSW_FEATURES \ GEN7_FEATURES, \ - .is_haswell = true + .is_haswell = true, \ + .supports_simd16_3src = true static const struct brw_device_info brw_device_info_hsw_gt1 = { HSW_FEATURES, .gt = 1, @@ -225,6 +226,7 @@ static const struct brw_device_info brw_device_info_hsw_gt3 = { .must_use_separate_stencil = true, \ .has_llc = true, \ .has_pln = true, \ + .supports_simd16_3src = true, \ .max_vs_threads = 504, \ .max_hs_threads = 504, \ .max_ds_threads = 504, \ @@ -305,27 +307,42 @@ static const struct brw_device_info brw_device_info_chv = { .max_gs_entries = 640, \ } +static const struct brw_device_info brw_device_info_skl_early = { + GEN9_FEATURES, .gt = 1, + .supports_simd16_3src = false, +}; + static const struct brw_device_info brw_device_info_skl_gt1 = { - GEN9_FEATURES, .gt = 1 + GEN9_FEATURES, .gt = 1, + .supports_simd16_3src = true, }; static const struct brw_device_info brw_device_info_skl_gt2 = { - GEN9_FEATURES, .gt = 2 + GEN9_FEATURES, .gt = 2, + .supports_simd16_3src = true, }; static const struct brw_device_info brw_device_info_skl_gt3 = { - GEN9_FEATURES, .gt = 3 + GEN9_FEATURES, .gt = 3, + .supports_simd16_3src = true, }; const struct brw_device_info * -brw_get_device_info(int devid) +brw_get_device_info(int devid, int revision) { + const struct brw_device_info *devinfo; switch (devid) { #undef CHIPSET -#define CHIPSET(id, family, name) case id: return &brw_device_info_##family; +#define CHIPSET(id, family, name) \ + case id: devinfo = &brw_device_info_##family; break; #include "pci_ids/i965_pci_ids.h" default: fprintf(stderr, "i965_dri.so does not support the 0x%x PCI ID.\n", devid); return NULL; } + + if (devinfo->gen == 9 && (revision == 2 || revision == 3 || revision == -1)) + return &brw_device_info_skl_early; + + return devinfo; } diff --git a/src/mesa/drivers/dri/i965/brw_device_info.h b/src/mesa/drivers/dri/i965/brw_device_info.h index 7c9f5d0..b921c2b 100644 --- a/src/mesa/drivers/dri/i965/brw_device_info.h +++ b/src/mesa/drivers/dri/i965/brw_device_info.h @@ -44,6 +44,7 @@ struct brw_device_info bool has_pln; bool has_compr4; bool has_surface_tile_offset; + bool supports_simd16_3src; /** * Quirks: @@ -82,4 +83,4 @@ struct brw_device_info /** @} */ }; -const struct brw_device_info *brw_get_device_info(int devid); +const struct brw_device_info *brw_get_device_info(int devid, int revision); diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index baa83a1..1ef2d1d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -1534,24 +1534,11 @@ fs_generator::enable_debug(const char *shader_name) this->shader_name = shader_name; } -/** - * Some hardware doesn't support SIMD16 instructions with 3 sources. - */ -static bool -brw_supports_simd16_3src(const struct brw_context *brw) -{ - /* WaDisableSIMD16On3SrcInstr: 3-source instructions don't work in SIMD16 - * on a few steppings of Skylake. - */ - if (brw->gen == 9) - return brw->revision != 2 && brw->revision != 3 && brw->revision != -1; - - return brw->is_haswell || brw->gen >= 8; -} - int fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) { + const struct brw_device_info *devinfo = brw->intelScreen->devinfo; + /* align to 64 byte boundary. */ while (p->next_insn_offset % 64) brw_NOP(p); @@ -1647,7 +1634,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) case BRW_OPCODE_MAD: assert(brw->gen >= 6); brw_set_default_access_mode(p, BRW_ALIGN_16); - if (dispatch_width == 16 && !brw_supports_simd16_3src(brw)) { + if (dispatch_width == 16 && !devinfo->supports_simd16_3src) { brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); brw_inst *f = brw_MAD(p, firsthalf(dst), firsthalf(src[0]), firsthalf(src[1]), firsthalf(src[2])); @@ -1669,7 +1656,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) case BRW_OPCODE_LRP: assert(brw->gen >= 6); brw_set_default_access_mode(p, BRW_ALIGN_16); - if (dispatch_width == 16 && !brw_supports_simd16_3src(brw)) { + if (dispatch_width == 16 && !devinfo->supports_simd16_3src) { brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); brw_inst *f = brw_LRP(p, firsthalf(dst), firsthalf(src[0]), firsthalf(src[1]), firsthalf(src[2])); @@ -1808,7 +1795,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) case BRW_OPCODE_BFE: assert(brw->gen >= 7); brw_set_default_access_mode(p, BRW_ALIGN_16); - if (dispatch_width == 16 && !brw_supports_simd16_3src(brw)) { + if (dispatch_width == 16 && !devinfo->supports_simd16_3src) { brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); brw_BFE(p, firsthalf(dst), firsthalf(src[0]), firsthalf(src[1]), firsthalf(src[2])); @@ -1851,7 +1838,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) * do for the other three-source instructions. */ if (dispatch_width == 16 && - (brw->is_haswell || !brw_supports_simd16_3src(brw))) { + (brw->is_haswell || !devinfo->supports_simd16_3src)) { brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); brw_BFI2(p, firsthalf(dst), firsthalf(src[0]), firsthalf(src[1]), firsthalf(src[2])); diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index 5a9207a..f5e15a0 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -1304,6 +1304,29 @@ set_max_gl_versions(struct intel_screen *screen) } } +/* drop when libdrm 2.4.61 is released */ +#ifndef I915_PARAM_REVISION +#define I915_PARAM_REVISION 32 +#endif + +static int +brw_get_revision(int fd) +{ + struct drm_i915_getparam gp; + int revision; + int ret; + + memset(&gp, 0, sizeof(gp)); + gp.param = I915_PARAM_REVISION; + gp.value = &revision; + + ret = drmCommandWriteRead(fd, DRM_I915_GETPARAM, &gp, sizeof(gp)); + if (ret) + revision = -1; + + return revision; +} + /** * This is the driver specific part of the createNewScreen entry point. * Called when using DRI2. @@ -1340,7 +1363,8 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp) return false; intelScreen->deviceID = drm_intel_bufmgr_gem_get_devid(intelScreen->bufmgr); - intelScreen->devinfo = brw_get_device_info(intelScreen->deviceID); + intelScreen->devinfo = brw_get_device_info(intelScreen->deviceID, + brw_get_revision(psp->fd)); if (!intelScreen->devinfo) return false; From jekstrand at kemper.freedesktop.org Wed Apr 22 23:01:36 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 22 Apr 2015 16:01:36 -0700 (PDT) Subject: Mesa (master): i965: Remove remaining uses of ctx-> Const.UniformBooleanTrue in visitors Message-ID: <20150422230136.1565476102@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 73bf8f3d6b6b571175c5ce324b44fef26915875f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=73bf8f3d6b6b571175c5ce324b44fef26915875f Author: Jason Ekstrand Date: Wed Apr 15 17:39:25 2015 -0700 i965: Remove remaining uses of ctx->Const.UniformBooleanTrue in visitors Since commit 2881b123, we have used 0/~0 for representing booleans on all gens. However, we still had a bunch of places in the visitor code where we were still referring to ctx->Const.UniformBooleanTrue. Since this is always ~0, we can just remove them. Reviewed-by: Kenneth Graunke Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 4 +--- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 14 +++++--------- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 2b8dfe4..4d1afb1 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -2639,9 +2639,7 @@ fs_visitor::visit(ir_constant *ir) emit(MOV(dst_reg, fs_reg(ir->value.i[i]))); break; case GLSL_TYPE_BOOL: - emit(MOV(dst_reg, - fs_reg(ir->value.b[i] != 0 ? (int)ctx->Const.UniformBooleanTrue - : 0))); + emit(MOV(dst_reg, fs_reg(ir->value.b[i] != 0 ? ~0 : 0))); break; default: unreachable("Non-float/uint/int/bool constant"); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 20e6109..c9a496e 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1635,7 +1635,7 @@ vec4_visitor::visit(ir_expression *ir) ir->operands[1]->type->is_vector()) { emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z)); emit(MOV(result_dst, src_reg(0))); - inst = emit(MOV(result_dst, src_reg((int)ctx->Const.UniformBooleanTrue))); + inst = emit(MOV(result_dst, src_reg(~0))); inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H; } else { emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_Z)); @@ -1653,7 +1653,7 @@ vec4_visitor::visit(ir_expression *ir) emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ)); emit(MOV(result_dst, src_reg(0))); - inst = emit(MOV(result_dst, src_reg((int)ctx->Const.UniformBooleanTrue))); + inst = emit(MOV(result_dst, src_reg(~0))); inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; } else { emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_NZ)); @@ -1667,7 +1667,7 @@ vec4_visitor::visit(ir_expression *ir) emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ)); emit(MOV(result_dst, src_reg(0))); - inst = emit(MOV(result_dst, src_reg((int)ctx->Const.UniformBooleanTrue))); + inst = emit(MOV(result_dst, src_reg(~0))); inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; break; @@ -1862,9 +1862,7 @@ vec4_visitor::visit(ir_expression *ir) const_offset % 16 / 4, const_offset % 16 / 4); - /* UBO bools are any nonzero int. We need to convert them to use the - * value of true stored in ctx->Const.UniformBooleanTrue. - */ + /* UBO bools are any nonzero int. We need to convert them to 0/~0. */ if (ir->type->base_type == GLSL_TYPE_BOOL) { emit(CMP(result_dst, packed_consts, src_reg(0u), BRW_CONDITIONAL_NZ)); @@ -2370,9 +2368,7 @@ vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir) emit(MOV(*dst, src_reg(ir->value.u[i]))); break; case GLSL_TYPE_BOOL: - emit(MOV(*dst, - src_reg(ir->value.b[i] != 0 ? (int)ctx->Const.UniformBooleanTrue - : 0))); + emit(MOV(*dst, src_reg(ir->value.b[i] != 0 ? ~0 : 0))); break; default: unreachable("Non-float/uint/int/bool constant"); From jekstrand at kemper.freedesktop.org Wed Apr 22 23:01:36 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 22 Apr 2015 16:01:36 -0700 (PDT) Subject: Mesa (master): i965: Add a brw_compiler structure and store the register sets in it Message-ID: <20150422230136.6AC9776102@kemper.freedesktop.org> Module: Mesa Branch: master Commit: ae3870df7043861632aa553e12cc9284a9aef827 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ae3870df7043861632aa553e12cc9284a9aef827 Author: Jason Ekstrand Date: Thu Apr 16 12:01:09 2015 -0700 i965: Add a brw_compiler structure and store the register sets in it Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_context.h | 4 +- src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 54 +++++++++---------- src/mesa/drivers/dri/i965/brw_shader.cpp | 13 +++++ src/mesa/drivers/dri/i965/brw_shader.h | 55 ++++++++++++++++++++ .../drivers/dri/i965/brw_vec4_reg_allocate.cpp | 38 +++++++------- src/mesa/drivers/dri/i965/intel_screen.c | 5 +- src/mesa/drivers/dri/i965/intel_screen.h | 48 +---------------- 7 files changed, 120 insertions(+), 97 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 5724932..f79729b 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1613,10 +1613,10 @@ void brw_upload_cs_urb_state(struct brw_context *brw); /* brw_fs_reg_allocate.cpp */ -void brw_fs_alloc_reg_sets(struct intel_screen *screen); +void brw_fs_alloc_reg_sets(struct brw_compiler *compiler); /* brw_vec4_reg_allocate.cpp */ -void brw_vec4_alloc_reg_set(struct intel_screen *screen); +void brw_vec4_alloc_reg_set(struct brw_compiler *compiler); /* brw_disasm.c */ int brw_disassemble_inst(FILE *file, const struct brw_device_info *devinfo, diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 92d29a4..dc433b0 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -71,9 +71,9 @@ fs_visitor::assign_regs_trivial() } static void -brw_alloc_reg_set(struct intel_screen *screen, int reg_width) +brw_alloc_reg_set(struct brw_compiler *compiler, int reg_width) { - const struct brw_device_info *devinfo = screen->devinfo; + const struct brw_device_info *devinfo = compiler->devinfo; int base_reg_count = BRW_MAX_GRF; int index = reg_width - 1; @@ -112,9 +112,9 @@ brw_alloc_reg_set(struct intel_screen *screen, int reg_width) class_sizes[class_count++] = 8; } - memset(screen->wm_reg_sets[index].class_to_ra_reg_range, 0, - sizeof(screen->wm_reg_sets[index].class_to_ra_reg_range)); - int *class_to_ra_reg_range = screen->wm_reg_sets[index].class_to_ra_reg_range; + memset(compiler->fs_reg_sets[index].class_to_ra_reg_range, 0, + sizeof(compiler->fs_reg_sets[index].class_to_ra_reg_range)); + int *class_to_ra_reg_range = compiler->fs_reg_sets[index].class_to_ra_reg_range; /* Compute the total number of registers across all classes. */ int ra_reg_count = 0; @@ -144,16 +144,16 @@ brw_alloc_reg_set(struct intel_screen *screen, int reg_width) class_to_ra_reg_range[i] = class_to_ra_reg_range[i-1]; } - uint8_t *ra_reg_to_grf = ralloc_array(screen, uint8_t, ra_reg_count); - struct ra_regs *regs = ra_alloc_reg_set(screen, ra_reg_count); + uint8_t *ra_reg_to_grf = ralloc_array(compiler, uint8_t, ra_reg_count); + struct ra_regs *regs = ra_alloc_reg_set(compiler, ra_reg_count); if (devinfo->gen >= 6) ra_set_allocate_round_robin(regs); - int *classes = ralloc_array(screen, int, class_count); + int *classes = ralloc_array(compiler, int, class_count); int aligned_pairs_class = -1; /* Allocate space for q values. We allocate class_count + 1 because we * want to leave room for the aligned pairs class if we have it. */ - unsigned int **q_values = ralloc_array(screen, unsigned int *, + unsigned int **q_values = ralloc_array(compiler, unsigned int *, class_count + 1); for (int i = 0; i < class_count + 1; ++i) q_values[i] = ralloc_array(q_values, unsigned int, class_count + 1); @@ -273,20 +273,20 @@ brw_alloc_reg_set(struct intel_screen *screen, int reg_width) ralloc_free(q_values); - screen->wm_reg_sets[index].regs = regs; - for (unsigned i = 0; i < ARRAY_SIZE(screen->wm_reg_sets[index].classes); i++) - screen->wm_reg_sets[index].classes[i] = -1; + compiler->fs_reg_sets[index].regs = regs; + for (unsigned i = 0; i < ARRAY_SIZE(compiler->fs_reg_sets[index].classes); i++) + compiler->fs_reg_sets[index].classes[i] = -1; for (int i = 0; i < class_count; i++) - screen->wm_reg_sets[index].classes[class_sizes[i] - 1] = classes[i]; - screen->wm_reg_sets[index].ra_reg_to_grf = ra_reg_to_grf; - screen->wm_reg_sets[index].aligned_pairs_class = aligned_pairs_class; + compiler->fs_reg_sets[index].classes[class_sizes[i] - 1] = classes[i]; + compiler->fs_reg_sets[index].ra_reg_to_grf = ra_reg_to_grf; + compiler->fs_reg_sets[index].aligned_pairs_class = aligned_pairs_class; } void -brw_fs_alloc_reg_sets(struct intel_screen *screen) +brw_fs_alloc_reg_sets(struct brw_compiler *compiler) { - brw_alloc_reg_set(screen, 1); - brw_alloc_reg_set(screen, 2); + brw_alloc_reg_set(compiler, 1); + brw_alloc_reg_set(compiler, 2); } static int @@ -524,7 +524,7 @@ fs_visitor::setup_mrf_hack_interference(struct ra_graph *g, int first_mrf_node) bool fs_visitor::assign_regs(bool allow_spilling) { - struct intel_screen *screen = brw->intelScreen; + struct brw_compiler *compiler = brw->intelScreen->compiler; /* Most of this allocation was written for a reg_width of 1 * (dispatch_width == 8). In extending to SIMD16, the code was * left in place and it was converted to have the hardware @@ -534,7 +534,7 @@ fs_visitor::assign_regs(bool allow_spilling) int reg_width = dispatch_width / 8; unsigned hw_reg_mapping[this->alloc.count]; int payload_node_count = ALIGN(this->first_non_payload_grf, reg_width); - int rsi = reg_width - 1; /* Which screen->wm_reg_sets[] to use */ + int rsi = reg_width - 1; /* Which compiler->fs_reg_sets[] to use */ calculate_live_intervals(); int node_count = this->alloc.count; @@ -544,15 +544,15 @@ fs_visitor::assign_regs(bool allow_spilling) if (devinfo->gen >= 7) node_count += BRW_MAX_GRF - GEN7_MRF_HACK_START; struct ra_graph *g = - ra_alloc_interference_graph(screen->wm_reg_sets[rsi].regs, node_count); + ra_alloc_interference_graph(compiler->fs_reg_sets[rsi].regs, node_count); for (unsigned i = 0; i < this->alloc.count; i++) { unsigned size = this->alloc.sizes[i]; int c; - assert(size <= ARRAY_SIZE(screen->wm_reg_sets[rsi].classes) && + assert(size <= ARRAY_SIZE(compiler->fs_reg_sets[rsi].classes) && "Register allocation relies on split_virtual_grfs()"); - c = screen->wm_reg_sets[rsi].classes[size - 1]; + c = compiler->fs_reg_sets[rsi].classes[size - 1]; /* Special case: on pre-GEN6 hardware that supports PLN, the * second operand of a PLN instruction needs to be an @@ -563,10 +563,10 @@ fs_visitor::assign_regs(bool allow_spilling) * any other interpolation modes). So all we need to do is find * that register and set it to the appropriate class. */ - if (screen->wm_reg_sets[rsi].aligned_pairs_class >= 0 && + if (compiler->fs_reg_sets[rsi].aligned_pairs_class >= 0 && this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].file == GRF && this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].reg == i) { - c = screen->wm_reg_sets[rsi].aligned_pairs_class; + c = compiler->fs_reg_sets[rsi].aligned_pairs_class; } ra_set_node_class(g, i, c); @@ -595,7 +595,7 @@ fs_visitor::assign_regs(bool allow_spilling) */ if (inst->eot) { int size = alloc.sizes[inst->src[0].reg]; - int reg = screen->wm_reg_sets[rsi].class_to_ra_reg_range[size] - 1; + int reg = compiler->fs_reg_sets[rsi].class_to_ra_reg_range[size] - 1; ra_set_node_reg(g, inst->src[0].reg, reg); break; } @@ -663,7 +663,7 @@ fs_visitor::assign_regs(bool allow_spilling) for (unsigned i = 0; i < this->alloc.count; i++) { int reg = ra_get_node_reg(g, i); - hw_reg_mapping[i] = screen->wm_reg_sets[rsi].ra_reg_to_grf[reg]; + hw_reg_mapping[i] = compiler->fs_reg_sets[rsi].ra_reg_to_grf[reg]; this->grf_used = MAX2(this->grf_used, hw_reg_mapping[i] + this->alloc.sizes[i]); } diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 2e34057..79f0e1c 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -32,6 +32,19 @@ #include "glsl/glsl_parser_extras.h" #include "main/shaderapi.h" +struct brw_compiler * +brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) +{ + struct brw_compiler *compiler = rzalloc(mem_ctx, struct brw_compiler); + + compiler->devinfo = devinfo; + + brw_fs_alloc_reg_sets(compiler); + brw_vec4_alloc_reg_set(compiler); + + return compiler; +} + struct gl_shader * brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type) { diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index 72e8be5..ac4e62a 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -36,6 +36,58 @@ #define MAX_SAMPLER_MESSAGE_SIZE 11 #define MAX_VGRF_SIZE 16 +struct brw_compiler { + const struct brw_device_info *devinfo; + + struct { + struct ra_regs *regs; + + /** + * Array of the ra classes for the unaligned contiguous register + * block sizes used. + */ + int *classes; + + /** + * Mapping for register-allocated objects in *regs to the first + * GRF for that object. + */ + uint8_t *ra_reg_to_grf; + } vec4_reg_set; + + struct { + struct ra_regs *regs; + + /** + * Array of the ra classes for the unaligned contiguous register + * block sizes used, indexed by register size. + */ + int classes[16]; + + /** + * Mapping from classes to ra_reg ranges. Each of the per-size + * classes corresponds to a range of ra_reg nodes. This array stores + * those ranges in the form of first ra_reg in each class and the + * total number of ra_reg elements in the last array element. This + * way the range of the i'th class is given by: + * [ class_to_ra_reg_range[i], class_to_ra_reg_range[i+1] ) + */ + int class_to_ra_reg_range[17]; + + /** + * Mapping for register-allocated objects in *regs to the first + * GRF for that object. + */ + uint8_t *ra_reg_to_grf; + + /** + * ra class for the aligned pairs we use for PLN, which doesn't + * appear in *classes. + */ + int aligned_pairs_class; + } fs_reg_sets[2]; +}; + enum PACKED register_file { BAD_FILE, GRF, @@ -223,6 +275,9 @@ bool brw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg); extern "C" { #endif +struct brw_compiler * +brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo); + bool brw_vs_precompile(struct gl_context *ctx, struct gl_shader_program *shader_prog, struct gl_program *prog); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp index 3f2bb05..5368a75 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp @@ -91,10 +91,10 @@ vec4_visitor::reg_allocate_trivial() } extern "C" void -brw_vec4_alloc_reg_set(struct intel_screen *screen) +brw_vec4_alloc_reg_set(struct brw_compiler *compiler) { int base_reg_count = - screen->devinfo->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF; + compiler->devinfo->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF; /* After running split_virtual_grfs(), almost all VGRFs will be of size 1. * SEND-from-GRF sources cannot be split, so we also need classes for each @@ -112,14 +112,14 @@ brw_vec4_alloc_reg_set(struct intel_screen *screen) ra_reg_count += base_reg_count - (class_sizes[i] - 1); } - ralloc_free(screen->vec4_reg_set.ra_reg_to_grf); - screen->vec4_reg_set.ra_reg_to_grf = ralloc_array(screen, uint8_t, ra_reg_count); - ralloc_free(screen->vec4_reg_set.regs); - screen->vec4_reg_set.regs = ra_alloc_reg_set(screen, ra_reg_count); - if (screen->devinfo->gen >= 6) - ra_set_allocate_round_robin(screen->vec4_reg_set.regs); - ralloc_free(screen->vec4_reg_set.classes); - screen->vec4_reg_set.classes = ralloc_array(screen, int, class_count); + ralloc_free(compiler->vec4_reg_set.ra_reg_to_grf); + compiler->vec4_reg_set.ra_reg_to_grf = ralloc_array(compiler, uint8_t, ra_reg_count); + ralloc_free(compiler->vec4_reg_set.regs); + compiler->vec4_reg_set.regs = ra_alloc_reg_set(compiler, ra_reg_count); + if (compiler->devinfo->gen >= 6) + ra_set_allocate_round_robin(compiler->vec4_reg_set.regs); + ralloc_free(compiler->vec4_reg_set.classes); + compiler->vec4_reg_set.classes = ralloc_array(compiler, int, class_count); /* Now, add the registers to their classes, and add the conflicts * between them and the base GRF registers (and also each other). @@ -128,19 +128,19 @@ brw_vec4_alloc_reg_set(struct intel_screen *screen) unsigned *q_values[MAX_VGRF_SIZE]; for (int i = 0; i < class_count; i++) { int class_reg_count = base_reg_count - (class_sizes[i] - 1); - screen->vec4_reg_set.classes[i] = ra_alloc_reg_class(screen->vec4_reg_set.regs); + compiler->vec4_reg_set.classes[i] = ra_alloc_reg_class(compiler->vec4_reg_set.regs); q_values[i] = new unsigned[MAX_VGRF_SIZE]; for (int j = 0; j < class_reg_count; j++) { - ra_class_add_reg(screen->vec4_reg_set.regs, screen->vec4_reg_set.classes[i], reg); + ra_class_add_reg(compiler->vec4_reg_set.regs, compiler->vec4_reg_set.classes[i], reg); - screen->vec4_reg_set.ra_reg_to_grf[reg] = j; + compiler->vec4_reg_set.ra_reg_to_grf[reg] = j; for (int base_reg = j; base_reg < j + class_sizes[i]; base_reg++) { - ra_add_transitive_reg_conflict(screen->vec4_reg_set.regs, base_reg, reg); + ra_add_transitive_reg_conflict(compiler->vec4_reg_set.regs, base_reg, reg); } reg++; @@ -158,7 +158,7 @@ brw_vec4_alloc_reg_set(struct intel_screen *screen) } assert(reg == ra_reg_count); - ra_set_finalize(screen->vec4_reg_set.regs, q_values); + ra_set_finalize(compiler->vec4_reg_set.regs, q_values); for (int i = 0; i < MAX_VGRF_SIZE; i++) delete[] q_values[i]; @@ -191,7 +191,7 @@ vec4_visitor::setup_payload_interference(struct ra_graph *g, bool vec4_visitor::reg_allocate() { - struct intel_screen *screen = brw->intelScreen; + struct brw_compiler *compiler = brw->intelScreen->compiler; unsigned int hw_reg_mapping[alloc.count]; int payload_reg_count = this->first_non_payload_grf; @@ -207,12 +207,12 @@ vec4_visitor::reg_allocate() int first_payload_node = node_count; node_count += payload_reg_count; struct ra_graph *g = - ra_alloc_interference_graph(screen->vec4_reg_set.regs, node_count); + ra_alloc_interference_graph(compiler->vec4_reg_set.regs, node_count); for (unsigned i = 0; i < alloc.count; i++) { int size = this->alloc.sizes[i]; assert(size >= 1 && size <= MAX_VGRF_SIZE); - ra_set_node_class(g, i, screen->vec4_reg_set.classes[size - 1]); + ra_set_node_class(g, i, compiler->vec4_reg_set.classes[size - 1]); for (unsigned j = 0; j < i; j++) { if (virtual_grf_interferes(i, j)) { @@ -248,7 +248,7 @@ vec4_visitor::reg_allocate() for (unsigned i = 0; i < alloc.count; i++) { int reg = ra_get_node_reg(g, i); - hw_reg_mapping[i] = screen->vec4_reg_set.ra_reg_to_grf[reg]; + hw_reg_mapping[i] = compiler->vec4_reg_set.ra_reg_to_grf[reg]; prog_data->total_grf = MAX2(prog_data->total_grf, hw_reg_mapping[i] + alloc.sizes[i]); } diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index f5e15a0..92e638f 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -38,6 +38,7 @@ #include "main/version.h" #include "swrast/s_renderbuffer.h" #include "util/ralloc.h" +#include "brw_shader.h" #include "utils.h" #include "xmlpool.h" @@ -1406,8 +1407,8 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp) psp->extensions = !intelScreen->has_context_reset_notification ? intelScreenExtensions : intelRobustScreenExtensions; - brw_fs_alloc_reg_sets(intelScreen); - brw_vec4_alloc_reg_set(intelScreen); + intelScreen->compiler = brw_compiler_create(intelScreen, + intelScreen->devinfo); return (const __DRIconfig**) intel_screen_make_configs(psp); } diff --git a/src/mesa/drivers/dri/i965/intel_screen.h b/src/mesa/drivers/dri/i965/intel_screen.h index 393315e..f814ed0 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.h +++ b/src/mesa/drivers/dri/i965/intel_screen.h @@ -63,53 +63,7 @@ struct intel_screen int winsys_msaa_samples_override; - struct { - struct ra_regs *regs; - - /** - * Array of the ra classes for the unaligned contiguous register - * block sizes used. - */ - int *classes; - - /** - * Mapping for register-allocated objects in *regs to the first - * GRF for that object. - */ - uint8_t *ra_reg_to_grf; - } vec4_reg_set; - - struct { - struct ra_regs *regs; - - /** - * Array of the ra classes for the unaligned contiguous register - * block sizes used, indexed by register size. - */ - int classes[16]; - - /** - * Mapping from classes to ra_reg ranges. Each of the per-size - * classes corresponds to a range of ra_reg nodes. This array stores - * those ranges in the form of first ra_reg in each class and the - * total number of ra_reg elements in the last array element. This - * way the range of the i'th class is given by: - * [ class_to_ra_reg_range[i], class_to_ra_reg_range[i+1] ) - */ - int class_to_ra_reg_range[17]; - - /** - * Mapping for register-allocated objects in *regs to the first - * GRF for that object. - */ - uint8_t *ra_reg_to_grf; - - /** - * ra class for the aligned pairs we use for PLN, which doesn't - * appear in *classes. - */ - int aligned_pairs_class; - } wm_reg_sets[2]; + struct brw_compiler *compiler; /** * Configuration cache with default values for all contexts From jekstrand at kemper.freedesktop.org Wed Apr 22 23:01:36 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 22 Apr 2015 16:01:36 -0700 (PDT) Subject: Mesa (master): mesa: remove the gl_sl_pragmas structure Message-ID: <20150422230136.7D58976102@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 1948880720a631f959c6fa7f5bc533f26619a31a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1948880720a631f959c6fa7f5bc533f26619a31a Author: Jason Ekstrand Date: Wed Apr 22 09:30:30 2015 -0700 mesa: remove the gl_sl_pragmas structure This code was added by Brian Paul in 2009 but, as far as Matt and I can tell, it's been dead ever since the new GLSL compiler was added. Reviewed-by: Brian Paul --- src/glsl/standalone_scaffolding.cpp | 3 --- src/mesa/main/mtypes.h | 13 ------------- src/mesa/main/shaderapi.c | 6 ------ 3 files changed, 22 deletions(-) diff --git a/src/glsl/standalone_scaffolding.cpp b/src/glsl/standalone_scaffolding.cpp index 6f5a27f..a109c4e 100644 --- a/src/glsl/standalone_scaffolding.cpp +++ b/src/glsl/standalone_scaffolding.cpp @@ -189,9 +189,6 @@ void initialize_context_to_defaults(struct gl_context *ctx, gl_api api) options.MaxUnrollIterations = 32; options.MaxIfDepth = UINT_MAX; - /* Default pragma settings */ - options.DefaultPragmas.Optimize = true; - for (int sh = 0; sh < MESA_SHADER_STAGES; ++sh) memcpy(&ctx->Const.ShaderCompilerOptions[sh], &options, sizeof(options)); } diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 1c751cf..fb41430 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2365,16 +2365,6 @@ struct gl_ati_fragment_shader_state }; -/** Set by #pragma directives */ -struct gl_sl_pragmas -{ - GLboolean IgnoreOptimize; /**< ignore #pragma optimize(on/off) ? */ - GLboolean IgnoreDebug; /**< ignore #pragma debug(on/off) ? */ - GLboolean Optimize; /**< defaults on */ - GLboolean Debug; /**< defaults off */ -}; - - /** * A GLSL vertex or fragment shader object. */ @@ -2397,7 +2387,6 @@ struct gl_shader struct gl_program *Program; /**< Post-compile assembly code */ GLchar *InfoLog; - struct gl_sl_pragmas Pragmas; unsigned Version; /**< GLSL version used for linking */ @@ -2903,8 +2892,6 @@ struct gl_shader_compiler_options */ GLboolean OptimizeForAOS; - struct gl_sl_pragmas DefaultPragmas; /**< Default #pragma settings */ - const struct nir_shader_compiler_options *NirOptions; }; diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index 77e2b87..cc001ba 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -115,9 +115,6 @@ _mesa_init_shader_state(struct gl_context *ctx) options.MaxUnrollIterations = 32; options.MaxIfDepth = UINT_MAX; - /* Default pragma settings */ - options.DefaultPragmas.Optimize = GL_TRUE; - for (sh = 0; sh < MESA_SHADER_STAGES; ++sh) memcpy(&ctx->Const.ShaderCompilerOptions[sh], &options, sizeof(options)); @@ -872,9 +869,6 @@ compile_shader(struct gl_context *ctx, GLuint shaderObj) options = &ctx->Const.ShaderCompilerOptions[sh->Stage]; - /* set default pragma state for shader */ - sh->Pragmas = options->DefaultPragmas; - if (!sh->Source) { /* If the user called glCompileShader without first calling * glShaderSource, we should fail to compile, but not raise a GL_ERROR. From jekstrand at kemper.freedesktop.org Wed Apr 22 23:01:35 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 22 Apr 2015 16:01:35 -0700 (PDT) Subject: Mesa (master): i965: Make the brw_inst helpers take a device_info instead of a context Message-ID: <20150422230135.85CC476102@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 4e9c79c847c81701300b5b0d97d85dcfad32239a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4e9c79c847c81701300b5b0d97d85dcfad32239a Author: Jason Ekstrand Date: Tue Apr 14 18:00:06 2015 -0700 i965: Make the brw_inst helpers take a device_info instead of a context Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_clip_line.c | 21 +- src/mesa/drivers/dri/i965/brw_clip_tri.c | 45 +- src/mesa/drivers/dri/i965/brw_clip_unfilled.c | 26 +- src/mesa/drivers/dri/i965/brw_clip_util.c | 2 +- src/mesa/drivers/dri/i965/brw_disasm.c | 472 +++++----- src/mesa/drivers/dri/i965/brw_eu.c | 28 +- src/mesa/drivers/dri/i965/brw_eu.h | 10 +- src/mesa/drivers/dri/i965/brw_eu_compact.c | 123 +-- src/mesa/drivers/dri/i965/brw_eu_emit.c | 998 +++++++++++----------- src/mesa/drivers/dri/i965/brw_ff_gs_emit.c | 7 +- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 54 +- src/mesa/drivers/dri/i965/brw_inst.h | 140 +-- src/mesa/drivers/dri/i965/brw_reg.h | 4 +- src/mesa/drivers/dri/i965/brw_sf_emit.c | 7 +- src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 28 +- src/mesa/drivers/dri/i965/test_eu_compact.c | 36 +- 16 files changed, 1006 insertions(+), 995 deletions(-) Diff: http://cgit.freedesktop.org/mesa/mesa/diff/?id=4e9c79c847c81701300b5b0d97d85dcfad32239a From jekstrand at kemper.freedesktop.org Wed Apr 22 23:01:35 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 22 Apr 2015 16:01:35 -0700 (PDT) Subject: Mesa (master): i965: Make the annotation code take a device_info instead of a context Message-ID: <20150422230135.CAE3176102@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 9c89e47806ee0437a2617eb4b90a0b953869fea2 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=9c89e47806ee0437a2617eb4b90a0b953869fea2 Author: Jason Ekstrand Date: Wed Apr 15 15:01:25 2015 -0700 i965: Make the annotation code take a device_info instead of a context Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 5 +++-- src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 5 +++-- src/mesa/drivers/dri/i965/intel_asm_annotation.c | 9 +++++---- src/mesa/drivers/dri/i965/intel_asm_annotation.h | 5 +++-- 4 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 539619a..baa83a1 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -1573,7 +1573,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) bool multiple_instructions_emitted = false; if (unlikely(debug_flag)) - annotate(brw, &annotation, cfg, inst, p->next_insn_offset); + annotate(p->devinfo, &annotation, cfg, inst, p->next_insn_offset); for (unsigned int i = 0; i < inst->sources; i++) { src[i] = brw_reg_from_fs_reg(&inst->src[i]); @@ -2128,7 +2128,8 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) spill_count, fill_count, promoted_constants, before_size, after_size, 100.0f * (before_size - after_size) / before_size); - dump_assembly(p->store, annotation.ann_count, annotation.ann, brw, prog); + dump_assembly(p->store, annotation.ann_count, annotation.ann, + p->devinfo, prog); ralloc_free(annotation.ann); } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index 94ab32d..6e3a6a5 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -1151,7 +1151,7 @@ vec4_generator::generate_code(const cfg_t *cfg) struct brw_reg src[3], dst; if (unlikely(debug_flag)) - annotate(brw, &annotation, cfg, inst, p->next_insn_offset); + annotate(p->devinfo, &annotation, cfg, inst, p->next_insn_offset); for (unsigned int i = 0; i < 3; i++) { src[i] = inst->get_src(this->prog_data, i); @@ -1618,7 +1618,8 @@ vec4_generator::generate_code(const cfg_t *cfg) before_size / 16, loop_count, before_size, after_size, 100.0f * (before_size - after_size) / before_size); - dump_assembly(p->store, annotation.ann_count, annotation.ann, brw, prog); + dump_assembly(p->store, annotation.ann_count, annotation.ann, + p->devinfo, prog); ralloc_free(annotation.ann); } diff --git a/src/mesa/drivers/dri/i965/intel_asm_annotation.c b/src/mesa/drivers/dri/i965/intel_asm_annotation.c index b4a693f..bb8bb8d 100644 --- a/src/mesa/drivers/dri/i965/intel_asm_annotation.c +++ b/src/mesa/drivers/dri/i965/intel_asm_annotation.c @@ -33,7 +33,8 @@ void dump_assembly(void *assembly, int num_annotations, struct annotation *annotation, - struct brw_context *brw, const struct gl_program *prog) + const struct brw_device_info *devinfo, + const struct gl_program *prog) { const char *last_annotation_string = NULL; const void *last_annotation_ir = NULL; @@ -79,7 +80,7 @@ dump_assembly(void *assembly, int num_annotations, struct annotation *annotation fprintf(stderr, " %s\n", last_annotation_string); } - brw_disassemble(brw->intelScreen->devinfo, assembly, start_offset, end_offset, stderr); + brw_disassemble(devinfo, assembly, start_offset, end_offset, stderr); if (annotation[i].block_end) { fprintf(stderr, " END B%d", annotation[i].block_end->num); @@ -94,7 +95,7 @@ dump_assembly(void *assembly, int num_annotations, struct annotation *annotation fprintf(stderr, "\n"); } -void annotate(struct brw_context *brw, +void annotate(const struct brw_device_info *devinfo, struct annotation_info *annotation, const struct cfg_t *cfg, struct backend_instruction *inst, unsigned offset) { @@ -129,7 +130,7 @@ void annotate(struct brw_context *brw, * There's also only complication from emitting an annotation without * a corresponding hardware instruction to disassemble. */ - if (brw->gen >= 6 && inst->opcode == BRW_OPCODE_DO) { + if (devinfo->gen >= 6 && inst->opcode == BRW_OPCODE_DO) { annotation->ann_count--; } diff --git a/src/mesa/drivers/dri/i965/intel_asm_annotation.h b/src/mesa/drivers/dri/i965/intel_asm_annotation.h index d80f320..d9c69bc 100644 --- a/src/mesa/drivers/dri/i965/intel_asm_annotation.h +++ b/src/mesa/drivers/dri/i965/intel_asm_annotation.h @@ -60,10 +60,11 @@ struct annotation_info { void dump_assembly(void *assembly, int num_annotations, struct annotation *annotation, - struct brw_context *brw, const struct gl_program *prog); + const struct brw_device_info *devinfo, + const struct gl_program *prog); void -annotate(struct brw_context *brw, +annotate(const struct brw_device_info *devinfo, struct annotation_info *annotation, const struct cfg_t *cfg, struct backend_instruction *inst, unsigned offset); void From jekstrand at kemper.freedesktop.org Wed Apr 22 23:01:36 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 22 Apr 2015 16:01:36 -0700 (PDT) Subject: Mesa (master): i965/vec4: Add a devinfo field to the generator and use it for gen checks Message-ID: <20150422230136.07C5E76102@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 2bf207b47347ec1c672448e3019029f899a5d3b5 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=2bf207b47347ec1c672448e3019029f899a5d3b5 Author: Jason Ekstrand Date: Thu Apr 16 10:30:05 2015 -0700 i965/vec4: Add a devinfo field to the generator and use it for gen checks Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_vec4.h | 1 + src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 87 ++++++++++------------ 2 files changed, 42 insertions(+), 46 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index a0ee2cc..cafbb64 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -514,6 +514,7 @@ private: struct brw_reg surf_index); struct brw_context *brw; + const struct brw_device_info *devinfo; struct brw_compile *p; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index 6e3a6a5..3b62440 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -142,7 +142,8 @@ vec4_generator::vec4_generator(struct brw_context *brw, bool debug_flag, const char *stage_name, const char *stage_abbrev) - : brw(brw), shader_prog(shader_prog), prog(prog), prog_data(prog_data), + : brw(brw), devinfo(brw->intelScreen->devinfo), + shader_prog(shader_prog), prog(prog), prog_data(prog_data), mem_ctx(mem_ctx), stage_name(stage_name), stage_abbrev(stage_abbrev), debug_flag(debug_flag) { @@ -235,7 +236,7 @@ vec4_generator::generate_tex(vec4_instruction *inst, { int msg_type = -1; - if (brw->gen >= 5) { + if (devinfo->gen >= 5) { switch (inst->opcode) { case SHADER_OPCODE_TEX: case SHADER_OPCODE_TXL: @@ -248,7 +249,7 @@ vec4_generator::generate_tex(vec4_instruction *inst, case SHADER_OPCODE_TXD: if (inst->shadow_compare) { /* Gen7.5+. Otherwise, lowered by brw_lower_texture_gradients(). */ - assert(brw->gen >= 8 || brw->is_haswell); + assert(devinfo->gen >= 8 || devinfo->is_haswell); msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE; } else { msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS; @@ -258,13 +259,13 @@ vec4_generator::generate_tex(vec4_instruction *inst, msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD; break; case SHADER_OPCODE_TXF_CMS: - if (brw->gen >= 7) + if (devinfo->gen >= 7) msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS; else msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD; break; case SHADER_OPCODE_TXF_MCS: - assert(brw->gen >= 7); + assert(devinfo->gen >= 7); msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS; break; case SHADER_OPCODE_TXS: @@ -326,7 +327,7 @@ vec4_generator::generate_tex(vec4_instruction *inst, * use an implied move from g0 to the first message register. */ if (inst->header_present) { - if (brw->gen < 6 && !inst->offset) { + if (devinfo->gen < 6 && !inst->offset) { /* Set up an implied move from g0 to the MRF. */ src = brw_vec8_grf(0, 0); } else { @@ -345,7 +346,7 @@ vec4_generator::generate_tex(vec4_instruction *inst, /* Set the texel offset bits in DWord 2. */ dw2 = inst->offset; - if (brw->gen >= 9) + if (devinfo->gen >= 9) /* SKL+ overloads BRW_SAMPLER_SIMD_MODE_SIMD4X2 to also do SIMD8D, * based on bit 22 in the header. */ @@ -504,7 +505,7 @@ vec4_generator::generate_gs_thread_end(vec4_instruction *inst) inst->base_mrf, /* starting mrf reg nr */ src, BRW_URB_WRITE_EOT | inst->urb_write_flags, - brw->gen >= 8 ? 2 : 1,/* message len */ + devinfo->gen >= 8 ? 2 : 1,/* message len */ 0, /* response len */ 0, /* urb destination offset */ BRW_URB_SWIZZLE_INTERLEAVE); @@ -536,7 +537,7 @@ vec4_generator::generate_gs_set_write_offset(struct brw_reg dst, brw_push_insn_state(p); brw_set_default_access_mode(p, BRW_ALIGN_1); brw_set_default_mask_control(p, BRW_MASK_DISABLE); - assert(brw->gen >= 7 && + assert(devinfo->gen >= 7 && src1.file == BRW_IMMEDIATE_VALUE && src1.type == BRW_REGISTER_TYPE_UD && src1.dw1.ud <= USHRT_MAX); @@ -553,7 +554,7 @@ vec4_generator::generate_gs_set_vertex_count(struct brw_reg dst, brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); - if (brw->gen >= 8) { + if (devinfo->gen >= 8) { /* Move the vertex count into the second MRF for the EOT write. */ brw_MOV(p, retype(brw_message_reg(dst.nr + 1), BRW_REGISTER_TYPE_UD), src); @@ -824,7 +825,7 @@ vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1, { int second_vertex_offset; - if (brw->gen >= 6) + if (devinfo->gen >= 6) second_vertex_offset = 1; else second_vertex_offset = 16; @@ -887,9 +888,9 @@ vec4_generator::generate_scratch_read(vec4_instruction *inst, uint32_t msg_type; - if (brw->gen >= 6) + if (devinfo->gen >= 6) msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; - else if (brw->gen == 5 || brw->is_g4x) + else if (devinfo->gen == 5 || devinfo->is_g4x) msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; else msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; @@ -900,7 +901,7 @@ vec4_generator::generate_scratch_read(vec4_instruction *inst, brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND); brw_set_dest(p, send, dst); brw_set_src0(p, send, header); - if (brw->gen < 6) + if (devinfo->gen < 6) brw_inst_set_cond_modifier(p->devinfo, send, inst->base_mrf); brw_set_dp_read_message(p, send, 255, /* binding table index: stateless access */ @@ -937,9 +938,9 @@ vec4_generator::generate_scratch_write(vec4_instruction *inst, uint32_t msg_type; - if (brw->gen >= 7) + if (devinfo->gen >= 7) msg_type = GEN7_DATAPORT_DC_OWORD_DUAL_BLOCK_WRITE; - else if (brw->gen == 6) + else if (devinfo->gen == 6) msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE; else msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE; @@ -951,7 +952,7 @@ vec4_generator::generate_scratch_write(vec4_instruction *inst, * guaranteed and write commits only matter for inter-thread * synchronization. */ - if (brw->gen >= 6) { + if (devinfo->gen >= 6) { write_commit = false; } else { /* The visitor set up our destination register to be g0. This @@ -971,7 +972,7 @@ vec4_generator::generate_scratch_write(vec4_instruction *inst, brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND); brw_set_dest(p, send, dst); brw_set_src0(p, send, header); - if (brw->gen < 6) + if (devinfo->gen < 6) brw_inst_set_cond_modifier(p->devinfo, send, inst->base_mrf); brw_set_dp_write_message(p, send, 255, /* binding table index: stateless access */ @@ -1004,9 +1005,9 @@ vec4_generator::generate_pull_constant_load(vec4_instruction *inst, uint32_t msg_type; - if (brw->gen >= 6) + if (devinfo->gen >= 6) msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; - else if (brw->gen == 5 || brw->is_g4x) + else if (devinfo->gen == 5 || devinfo->is_g4x) msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; else msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; @@ -1017,7 +1018,7 @@ vec4_generator::generate_pull_constant_load(vec4_instruction *inst, brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND); brw_set_dest(p, send, dst); brw_set_src0(p, send, header); - if (brw->gen < 6) + if (devinfo->gen < 6) brw_inst_set_cond_modifier(p->devinfo, send, inst->base_mrf); brw_set_dp_read_message(p, send, surf_index, @@ -1208,7 +1209,7 @@ vec4_generator::generate_code(const cfg_t *cfg) break; case BRW_OPCODE_MAD: - assert(brw->gen >= 6); + assert(devinfo->gen >= 6); brw_MAD(p, dst, src[0], src[1], src[2]); break; @@ -1271,47 +1272,47 @@ vec4_generator::generate_code(const cfg_t *cfg) break; case BRW_OPCODE_F32TO16: - assert(brw->gen >= 7); + assert(devinfo->gen >= 7); brw_F32TO16(p, dst, src[0]); break; case BRW_OPCODE_F16TO32: - assert(brw->gen >= 7); + assert(devinfo->gen >= 7); brw_F16TO32(p, dst, src[0]); break; case BRW_OPCODE_LRP: - assert(brw->gen >= 6); + assert(devinfo->gen >= 6); brw_LRP(p, dst, src[0], src[1], src[2]); break; case BRW_OPCODE_BFREV: - assert(brw->gen >= 7); + assert(devinfo->gen >= 7); /* BFREV only supports UD type for src and dst. */ brw_BFREV(p, retype(dst, BRW_REGISTER_TYPE_UD), retype(src[0], BRW_REGISTER_TYPE_UD)); break; case BRW_OPCODE_FBH: - assert(brw->gen >= 7); + assert(devinfo->gen >= 7); /* FBH only supports UD type for dst. */ brw_FBH(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]); break; case BRW_OPCODE_FBL: - assert(brw->gen >= 7); + assert(devinfo->gen >= 7); /* FBL only supports UD type for dst. */ brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]); break; case BRW_OPCODE_CBIT: - assert(brw->gen >= 7); + assert(devinfo->gen >= 7); /* CBIT only supports UD type for dst. */ brw_CBIT(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]); break; case BRW_OPCODE_ADDC: - assert(brw->gen >= 7); + assert(devinfo->gen >= 7); brw_ADDC(p, dst, src[0], src[1]); break; case BRW_OPCODE_SUBB: - assert(brw->gen >= 7); + assert(devinfo->gen >= 7); brw_SUBB(p, dst, src[0], src[1]); break; case BRW_OPCODE_MAC: @@ -1319,23 +1320,23 @@ vec4_generator::generate_code(const cfg_t *cfg) break; case BRW_OPCODE_BFE: - assert(brw->gen >= 7); + assert(devinfo->gen >= 7); brw_BFE(p, dst, src[0], src[1], src[2]); break; case BRW_OPCODE_BFI1: - assert(brw->gen >= 7); + assert(devinfo->gen >= 7); brw_BFI1(p, dst, src[0], src[1]); break; case BRW_OPCODE_BFI2: - assert(brw->gen >= 7); + assert(devinfo->gen >= 7); brw_BFI2(p, dst, src[0], src[1], src[2]); break; case BRW_OPCODE_IF: if (inst->src[0].file != BAD_FILE) { /* The instruction has an embedded compare (only allowed on gen6) */ - assert(brw->gen == 6); + assert(devinfo->gen == 6); gen6_IF(p, inst->conditional_mod, src[0], src[1]); } else { brw_inst *if_inst = brw_IF(p, BRW_EXECUTE_8); @@ -1376,10 +1377,10 @@ vec4_generator::generate_code(const cfg_t *cfg) case SHADER_OPCODE_SIN: case SHADER_OPCODE_COS: assert(inst->conditional_mod == BRW_CONDITIONAL_NONE); - if (brw->gen >= 7) { + if (devinfo->gen >= 7) { gen6_math(p, dst, brw_math_function(inst->opcode), src[0], brw_null_reg()); - } else if (brw->gen == 6) { + } else if (devinfo->gen == 6) { generate_math_gen6(inst, dst, src[0], brw_null_reg()); } else { generate_math1_gen4(inst, dst, src[0]); @@ -1390,9 +1391,9 @@ vec4_generator::generate_code(const cfg_t *cfg) case SHADER_OPCODE_INT_QUOTIENT: case SHADER_OPCODE_INT_REMAINDER: assert(inst->conditional_mod == BRW_CONDITIONAL_NONE); - if (brw->gen >= 7) { + if (devinfo->gen >= 7) { gen6_math(p, dst, brw_math_function(inst->opcode), src[0], src[1]); - } else if (brw->gen == 6) { + } else if (devinfo->gen == 6) { generate_math_gen6(inst, dst, src[0], src[1]); } else { generate_math2_gen4(inst, dst, src[0], src[1]); @@ -1569,13 +1570,7 @@ vec4_generator::generate_code(const cfg_t *cfg) } default: - if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) { - _mesa_problem(&brw->ctx, "Unsupported opcode in `%s' in vec4\n", - opcode_descs[inst->opcode].name); - } else { - _mesa_problem(&brw->ctx, "Unsupported opcode %d in vec4", inst->opcode); - } - abort(); + unreachable("Unsupported opcode"); } if (inst->opcode == VEC4_OPCODE_PACK_BYTES) { From jekstrand at kemper.freedesktop.org Wed Apr 22 23:01:36 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 22 Apr 2015 16:01:36 -0700 (PDT) Subject: Mesa (master): i965: Add a devinfo field to backend_visitor and use it for gen checks Message-ID: <20150422230136.2DD3B76102@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 28e9601d0e681411b60a7de8be9f401b0df77d29 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=28e9601d0e681411b60a7de8be9f401b0df77d29 Author: Jason Ekstrand Date: Wed Apr 15 18:00:05 2015 -0700 i965: Add a devinfo field to backend_visitor and use it for gen checks Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs.cpp | 72 ++++++------ .../drivers/dri/i965/brw_fs_combine_constants.cpp | 10 +- .../drivers/dri/i965/brw_fs_copy_propagation.cpp | 10 +- src/mesa/drivers/dri/i965/brw_fs_fp.cpp | 4 +- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 42 +++---- .../dri/i965/brw_fs_peephole_predicated_break.cpp | 2 +- src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 10 +- src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp | 4 +- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 108 +++++++++--------- src/mesa/drivers/dri/i965/brw_ir_fs.h | 2 +- src/mesa/drivers/dri/i965/brw_ir_vec4.h | 2 +- src/mesa/drivers/dri/i965/brw_shader.cpp | 3 +- src/mesa/drivers/dri/i965/brw_shader.h | 1 + src/mesa/drivers/dri/i965/brw_vec4.cpp | 22 ++-- .../drivers/dri/i965/brw_vec4_copy_propagation.cpp | 20 ++-- .../dri/i965/brw_vec4_dead_code_eliminate.cpp | 6 +- src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 4 +- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 120 ++++++++++---------- src/mesa/drivers/dri/i965/brw_vec4_vp.cpp | 4 +- 19 files changed, 225 insertions(+), 221 deletions(-) Diff: http://cgit.freedesktop.org/mesa/mesa/diff/?id=28e9601d0e681411b60a7de8be9f401b0df77d29 From airlied at kemper.freedesktop.org Thu Apr 23 00:11:43 2015 From: airlied at kemper.freedesktop.org (Dave Airlie) Date: Wed, 22 Apr 2015 17:11:43 -0700 (PDT) Subject: Mesa (master): st/mesa: add ARB_texture_stencil8 support (v4) Message-ID: <20150423001143.B7AD276102@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 6cc49c4ce1dcb06528bfa2d6e650c26721355ae1 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6cc49c4ce1dcb06528bfa2d6e650c26721355ae1 Author: Dave Airlie Date: Sun Apr 5 14:45:25 2015 +1000 st/mesa: add ARB_texture_stencil8 support (v4) if we support stencil texturing, enable texture_stencil8 there is no requirement to support native S8 for this, the texture can be converted to x24s8 fine. v2: fold fixes from Marek in: a) put S8 last in the list b) fix renderable to always test for d/s renderable fixup the texture case to use a stencil only format for picking the format for the texture view. v3: hit fallback for getteximage v4: put s8 back in front, it shouldn't get picked now (Ilia) Reviewed-by: Ilia Mirkin Reviewed-by: Marek Ol??k Signed-off-by: Dave Airlie --- src/mesa/state_tracker/st_atom_texture.c | 14 ++++++++++---- src/mesa/state_tracker/st_cb_texture.c | 2 +- src/mesa/state_tracker/st_extensions.c | 3 +++ src/mesa/state_tracker/st_format.c | 19 ++++++++----------- 4 files changed, 22 insertions(+), 16 deletions(-) diff --git a/src/mesa/state_tracker/st_atom_texture.c b/src/mesa/state_tracker/st_atom_texture.c index eff28fc..04ba864 100644 --- a/src/mesa/state_tracker/st_atom_texture.c +++ b/src/mesa/state_tracker/st_atom_texture.c @@ -287,16 +287,22 @@ st_get_texture_sampler_view_from_stobj(struct st_context *st, enum pipe_format format) { struct pipe_sampler_view **sv; - + const struct st_texture_image *firstImage; if (!stObj || !stObj->pt) { return NULL; } sv = st_texture_get_sampler_view(st, stObj); - if (stObj->base.StencilSampling && - util_format_is_depth_and_stencil(format)) - format = util_format_stencil_only(format); + if (util_format_is_depth_and_stencil(format)) { + if (stObj->base.StencilSampling) + format = util_format_stencil_only(format); + else { + firstImage = st_texture_image_const(_mesa_base_tex_image(&stObj->base)); + if (firstImage->base._BaseFormat == GL_STENCIL_INDEX) + format = util_format_stencil_only(format); + } + } /* if sampler view has changed dereference it */ if (*sv) { diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index bdf236e..7ea3846 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -950,7 +950,7 @@ st_GetTexImage(struct gl_context * ctx, /* XXX Fallback to _mesa_GetTexImage_sw for depth-stencil formats * due to an incomplete stencil blit implementation in some drivers. */ - if (format == GL_DEPTH_STENCIL) { + if (format == GL_DEPTH_STENCIL || format == GL_STENCIL_INDEX) { goto fallback; } diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index bc20f73..25932dd 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -650,6 +650,9 @@ void st_init_extensions(struct pipe_screen *screen, ARRAY_SIZE(vertex_mapping), PIPE_BUFFER, PIPE_BIND_VERTEX_BUFFER); + if (extensions->ARB_stencil_texturing) + extensions->ARB_texture_stencil8 = GL_TRUE; + /* Figure out GLSL support. */ glsl_feature_level = screen->get_param(screen, PIPE_CAP_GLSL_FEATURE_LEVEL); diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c index 72dbf3b..181465d 100644 --- a/src/mesa/state_tracker/st_format.c +++ b/src/mesa/state_tracker/st_format.c @@ -1942,11 +1942,6 @@ st_ChooseTextureFormat(struct gl_context *ctx, GLenum target, GLint internalFormat, GLenum format, GLenum type) { - const boolean want_renderable = - internalFormat == 3 || internalFormat == 4 || - internalFormat == GL_RGB || internalFormat == GL_RGBA || - internalFormat == GL_RGB8 || internalFormat == GL_RGBA8 || - internalFormat == GL_BGRA; struct st_context *st = st_context(ctx); enum pipe_format pFormat; unsigned bindings; @@ -1962,15 +1957,17 @@ st_ChooseTextureFormat(struct gl_context *ctx, GLenum target, } /* GL textures may wind up being render targets, but we don't know - * that in advance. Specify potential render target flags now. + * that in advance. Specify potential render target flags now for formats + * that we know should always be renderable. */ bindings = PIPE_BIND_SAMPLER_VIEW; - if (want_renderable) { - if (_mesa_is_depth_or_stencil_format(internalFormat)) - bindings |= PIPE_BIND_DEPTH_STENCIL; - else + if (_mesa_is_depth_or_stencil_format(internalFormat)) + bindings |= PIPE_BIND_DEPTH_STENCIL; + else if (internalFormat == 3 || internalFormat == 4 || + internalFormat == GL_RGB || internalFormat == GL_RGBA || + internalFormat == GL_RGB8 || internalFormat == GL_RGBA8 || + internalFormat == GL_BGRA) bindings |= PIPE_BIND_RENDER_TARGET; - } /* GLES allows the driver to choose any format which matches * the format+type combo, because GLES only supports unsized internal From airlied at kemper.freedesktop.org Thu Apr 23 00:11:43 2015 From: airlied at kemper.freedesktop.org (Dave Airlie) Date: Wed, 22 Apr 2015 17:11:43 -0700 (PDT) Subject: Mesa (master): docs: mark off texture_stencil8 (v2.1) Message-ID: <20150423001143.C221F76102@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 734bceed8609a74d83032380e83488f3d2497012 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=734bceed8609a74d83032380e83488f3d2497012 Author: Dave Airlie Date: Sun Apr 5 14:46:11 2015 +1000 docs: mark off texture_stencil8 (v2.1) copy drivers from the stencil_texturing list, softpipe is definitely broken for stencil texturing since it uses float, but I'll look at that later. v2.1: update relnotes Reviewed-by: Ilia Mirkin Signed-off-by: Dave Airlie --- docs/GL3.txt | 2 +- docs/relnotes/10.6.0.html | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/GL3.txt b/docs/GL3.txt index 2dbd987..172fd3c 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -182,7 +182,7 @@ GL 4.4, GLSL 4.40: GL_ARB_multi_bind DONE (all drivers) GL_ARB_query_buffer_object not started GL_ARB_texture_mirror_clamp_to_edge DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe) - GL_ARB_texture_stencil8 not started + GL_ARB_texture_stencil8 DONE (nv50, nvc0, r600, radeonsi, llvmpipe, softpipe) GL_ARB_vertex_type_10f_11f_11f_rev DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe) GL 4.5, GLSL 4.50: diff --git a/docs/relnotes/10.6.0.html b/docs/relnotes/10.6.0.html index 82aea5c..48f76f9 100644 --- a/docs/relnotes/10.6.0.html +++ b/docs/relnotes/10.6.0.html @@ -54,6 +54,7 @@ Note: some of the new features are only available with certain drivers.
    • GL_EXT_draw_buffers2 on freedreno
    • GL_ARB_clip_control on i965
    • GL_ARB_program_interface_query (all drivers)
    • +
    • GL_ARB_texture_stencil8 on nv50, nvc0, r600, radeonsi, softpipe

    Bug fixes

    From airlied at kemper.freedesktop.org Thu Apr 23 00:11:43 2015 From: airlied at kemper.freedesktop.org (Dave Airlie) Date: Wed, 22 Apr 2015 17:11:43 -0700 (PDT) Subject: Mesa (master): mesa: finish implementing ARB_texture_stencil8 (v5) Message-ID: <20150423001143.AC1B276102@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 782e71cc078308dddd5d6f9505bff0cb8e67f455 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=782e71cc078308dddd5d6f9505bff0cb8e67f455 Author: Dave Airlie Date: Sun Apr 5 13:19:18 2015 +1000 mesa: finish implementing ARB_texture_stencil8 (v5) Parts of this were implemented previously, so finish it off. v2: fix getteximage falling into the integer check add fixes for the FBO paths, (fbo-stencil8 test). v3: fix getteximage path harder. v4: remove swapbytes from getteximage path (Ilia) v5: brown paper bag the swapbytes removal. (Ilia) Reviewed-by: Ilia Mirkin Signed-off-by: Dave Airlie --- src/mesa/main/extensions.c | 1 + src/mesa/main/fbobject.c | 9 +++++--- src/mesa/main/texgetimage.c | 50 ++++++++++++++++++++++++++++++++++++++++++- src/mesa/main/teximage.c | 3 ++- 4 files changed, 58 insertions(+), 5 deletions(-) diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c index 861b150..3d4965c 100644 --- a/src/mesa/main/extensions.c +++ b/src/mesa/main/extensions.c @@ -186,6 +186,7 @@ static const struct extension extension_table[] = { { "GL_ARB_texture_rectangle", o(NV_texture_rectangle), GL, 2004 }, { "GL_ARB_texture_rgb10_a2ui", o(ARB_texture_rgb10_a2ui), GL, 2009 }, { "GL_ARB_texture_rg", o(ARB_texture_rg), GL, 2008 }, + { "GL_ARB_texture_stencil8", o(ARB_texture_stencil8), GL, 2013 }, { "GL_ARB_texture_storage", o(dummy_true), GL, 2011 }, { "GL_ARB_texture_storage_multisample", o(ARB_texture_multisample), GL, 2012 }, { "GL_ARB_texture_view", o(ARB_texture_view), GL, 2012 }, diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 8032585..27cf97f 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -813,8 +813,10 @@ test_attachment_completeness(const struct gl_context *ctx, GLenum format, if (ctx->Extensions.ARB_depth_texture && baseFormat == GL_DEPTH_STENCIL) { /* OK */ - } - else { + } else if (ctx->Extensions.ARB_texture_stencil8 && + baseFormat == GL_STENCIL_INDEX) { + /* OK */ + } else { /* no such thing as stencil-only textures */ att_incomplete("illegal stencil texture"); att->Complete = GL_FALSE; @@ -978,7 +980,8 @@ _mesa_test_framebuffer_completeness(struct gl_context *ctx, if (!is_format_color_renderable(ctx, attFormat, texImg->InternalFormat) && - !is_legal_depth_format(ctx, f)) { + !is_legal_depth_format(ctx, f) && + f != GL_STENCIL_INDEX) { fb->_Status = GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT; fbo_incomplete(ctx, "texture attachment incomplete", -1); return; diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c index 908bb9b..92b4d67 100644 --- a/src/mesa/main/texgetimage.c +++ b/src/mesa/main/texgetimage.c @@ -175,6 +175,51 @@ get_tex_depth_stencil(struct gl_context *ctx, GLuint dimensions, } } +/** + * glGetTexImage for stencil pixels. + */ +static void +get_tex_stencil(struct gl_context *ctx, GLuint dimensions, + GLenum format, GLenum type, GLvoid *pixels, + struct gl_texture_image *texImage) +{ + const GLint width = texImage->Width; + const GLint height = texImage->Height; + const GLint depth = texImage->Depth; + GLint img, row; + + assert(format == GL_STENCIL_INDEX); + + for (img = 0; img < depth; img++) { + GLubyte *srcMap; + GLint rowstride; + + /* map src texture buffer */ + ctx->Driver.MapTextureImage(ctx, texImage, img, + 0, 0, width, height, GL_MAP_READ_BIT, + &srcMap, &rowstride); + + if (srcMap) { + for (row = 0; row < height; row++) { + const GLubyte *src = srcMap + row * rowstride; + void *dest = _mesa_image_address(dimensions, &ctx->Pack, pixels, + width, height, format, type, + img, row, 0); + _mesa_unpack_ubyte_stencil_row(texImage->TexFormat, + width, + (const GLuint *) src, + dest); + } + + ctx->Driver.UnmapTextureImage(ctx, texImage, img); + } + else { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage"); + break; + } + } +} + /** * glGetTexImage for YCbCr pixels. @@ -684,6 +729,9 @@ _mesa_GetTexImage_sw(struct gl_context *ctx, else if (format == GL_DEPTH_STENCIL_EXT) { get_tex_depth_stencil(ctx, dimensions, format, type, pixels, texImage); } + else if (format == GL_STENCIL_INDEX) { + get_tex_stencil(ctx, dimensions, format, type, pixels, texImage); + } else if (format == GL_YCBCR_MESA) { get_tex_ycbcr(ctx, dimensions, format, type, pixels, texImage); } @@ -879,7 +927,7 @@ getteximage_error_check(struct gl_context *ctx, "glGetTex%sImage(format mismatch)", suffix); return GL_TRUE; } - else if (_mesa_is_enum_format_integer(format) != + else if (!_mesa_is_stencil_format(format) && _mesa_is_enum_format_integer(format) != _mesa_is_format_integer(texImage->TexFormat)) { _mesa_error(ctx, GL_INVALID_OPERATION, "glGetTex%sImage(format mismatch)", suffix); diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c index 8d9d7cf..d07263c 100644 --- a/src/mesa/main/teximage.c +++ b/src/mesa/main/teximage.c @@ -1997,7 +1997,8 @@ _mesa_legal_texture_base_format_for_target(struct gl_context *ctx, const char *caller) { if (_mesa_base_tex_format(ctx, internalFormat) == GL_DEPTH_COMPONENT - || _mesa_base_tex_format(ctx, internalFormat) == GL_DEPTH_STENCIL) { + || _mesa_base_tex_format(ctx, internalFormat) == GL_DEPTH_STENCIL + || _mesa_base_tex_format(ctx, internalFormat) == GL_STENCIL_INDEX) { /* Section 3.8.3 (Texture Image Specification) of the OpenGL 3.3 Core * Profile spec says: * From jekstrand at kemper.freedesktop.org Thu Apr 23 01:11:07 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 22 Apr 2015 18:11:07 -0700 (PDT) Subject: Mesa (master): nir: Refactor tex_instr_dest_size to use a switch statement Message-ID: <20150423011107.5886776103@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 94669cb53483bd58f33e439411af3cb5c006da79 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=94669cb53483bd58f33e439411af3cb5c006da79 Author: Jason Ekstrand Date: Thu Apr 9 21:03:02 2015 -0700 nir: Refactor tex_instr_dest_size to use a switch statement Reviewed-by: Connor Abbott --- src/glsl/nir/nir.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 74772c7..c07a955 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -960,7 +960,8 @@ typedef struct { static inline unsigned nir_tex_instr_dest_size(nir_tex_instr *instr) { - if (instr->op == nir_texop_txs) { + switch (instr->op) { + case nir_texop_txs: { unsigned ret; switch (instr->sampler_dim) { case GLSL_SAMPLER_DIM_1D: @@ -985,13 +986,15 @@ nir_tex_instr_dest_size(nir_tex_instr *instr) return ret; } - if (instr->op == nir_texop_query_levels) + case nir_texop_query_levels: return 2; - if (instr->is_shadow && instr->is_new_style_shadow) - return 1; + default: + if (instr->is_shadow && instr->is_new_style_shadow) + return 1; - return 4; + return 4; + } } static inline unsigned From jekstrand at kemper.freedesktop.org Thu Apr 23 01:11:07 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 22 Apr 2015 18:11:07 -0700 (PDT) Subject: Mesa (master): nir/tex: Use the correct return size for query_levels and lod Message-ID: <20150423011107.637A776102@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 02f03fc0f111c484bd26497a85cbca7245400f68 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=02f03fc0f111c484bd26497a85cbca7245400f68 Author: Jason Ekstrand Date: Thu Apr 9 21:04:21 2015 -0700 nir/tex: Use the correct return size for query_levels and lod Reviewed-by: Connor Abbott --- src/glsl/nir/nir.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index c07a955..41e2120 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -986,9 +986,12 @@ nir_tex_instr_dest_size(nir_tex_instr *instr) return ret; } - case nir_texop_query_levels: + case nir_texop_lod: return 2; + case nir_texop_query_levels: + return 1; + default: if (instr->is_shadow && instr->is_new_style_shadow) return 1; From jekstrand at kemper.freedesktop.org Thu Apr 23 01:11:07 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 22 Apr 2015 18:11:07 -0700 (PDT) Subject: Mesa (master): nir/print: Print the closing paren on load_const instructions Message-ID: <20150423011107.6D70576102@kemper.freedesktop.org> Module: Mesa Branch: master Commit: e79120afdc17c04143ef4d7fb71394a5053114a5 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e79120afdc17c04143ef4d7fb71394a5053114a5 Author: Jason Ekstrand Date: Thu Apr 9 21:09:48 2015 -0700 nir/print: Print the closing paren on load_const instructions Reviewed-by: Connor Abbott --- src/glsl/nir/nir_print.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c index fb8c934..eb4045c 100644 --- a/src/glsl/nir/nir_print.c +++ b/src/glsl/nir/nir_print.c @@ -533,6 +533,8 @@ print_load_const_instr(nir_load_const_instr *instr, unsigned tabs, FILE *fp) fprintf(fp, "0x%08x /* %f */", instr->value.u[i], instr->value.f[i]); } + + fprintf(fp, ")"); } static void From jekstrand at kemper.freedesktop.org Thu Apr 23 01:11:07 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 22 Apr 2015 18:11:07 -0700 (PDT) Subject: Mesa (master): nir/lower_vars_to_ssa: Pass around the nir_shader instead of a void mem_ctx Message-ID: <20150423011107.82C9D76102@kemper.freedesktop.org> Module: Mesa Branch: master Commit: ba887602022340c596a09534e61b6554e3aeb533 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ba887602022340c596a09534e61b6554e3aeb533 Author: Jason Ekstrand Date: Fri Apr 10 14:43:28 2015 -0700 nir/lower_vars_to_ssa: Pass around the nir_shader instead of a void mem_ctx Reviewed-by: Connor Abbott --- src/glsl/nir/nir_lower_vars_to_ssa.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/glsl/nir/nir_lower_vars_to_ssa.c b/src/glsl/nir/nir_lower_vars_to_ssa.c index 8b7261c..a844038 100644 --- a/src/glsl/nir/nir_lower_vars_to_ssa.c +++ b/src/glsl/nir/nir_lower_vars_to_ssa.c @@ -55,7 +55,7 @@ struct deref_node { }; struct lower_variables_state { - void *mem_ctx; + nir_shader *shader; void *dead_ctx; nir_function_impl *impl; @@ -112,12 +112,12 @@ type_get_length(const struct glsl_type *type) static struct deref_node * deref_node_create(struct deref_node *parent, - const struct glsl_type *type, void *mem_ctx) + const struct glsl_type *type, nir_shader *shader) { size_t size = sizeof(struct deref_node) + type_get_length(type) * sizeof(struct deref_node *); - struct deref_node *node = rzalloc_size(mem_ctx, size); + struct deref_node *node = rzalloc_size(shader, size); node->type = type; node->parent = parent; node->deref = NULL; @@ -469,7 +469,7 @@ lower_copies_to_load_store(struct deref_node *node, set_foreach(node->copies, copy_entry) { nir_intrinsic_instr *copy = (void *)copy_entry->key; - nir_lower_var_copy_instr(copy, state->mem_ctx); + nir_lower_var_copy_instr(copy, state->shader); for (unsigned i = 0; i < 2; ++i) { struct deref_node *arg_node = @@ -527,7 +527,7 @@ get_const_initializer_load(const nir_deref_var *deref, } nir_load_const_instr *load = - nir_load_const_instr_create(state->mem_ctx, + nir_load_const_instr_create(state->shader, glsl_get_vector_elements(tail->type)); matrix_offset *= load->def.num_components; @@ -618,7 +618,7 @@ get_ssa_def_for_block(struct deref_node *node, nir_block *block, * given block. This means that we need to add an undef and use that. */ nir_ssa_undef_instr *undef = - nir_ssa_undef_instr_create(state->mem_ctx, + nir_ssa_undef_instr_create(state->shader, glsl_get_vector_elements(node->type)); nir_instr_insert_before_cf_list(&state->impl->body, &undef->instr); def_stack_push(node, &undef->def, state); @@ -698,7 +698,7 @@ rename_variables_block(nir_block *block, struct lower_variables_state *state) * should result in an undefined value. */ nir_ssa_undef_instr *undef = - nir_ssa_undef_instr_create(state->mem_ctx, + nir_ssa_undef_instr_create(state->shader, intrin->num_components); nir_instr_insert_before(&intrin->instr, &undef->instr); @@ -706,14 +706,14 @@ rename_variables_block(nir_block *block, struct lower_variables_state *state) nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(&undef->def), - state->mem_ctx); + state->shader); continue; } if (!node->lower_to_ssa) continue; - nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx, + nir_alu_instr *mov = nir_alu_instr_create(state->shader, nir_op_imov); mov->src[0].src.is_ssa = true; mov->src[0].src.ssa = get_ssa_def_for_block(node, block, state); @@ -731,7 +731,7 @@ rename_variables_block(nir_block *block, struct lower_variables_state *state) nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(&mov->dest.dest.ssa), - state->mem_ctx); + state->shader); break; } @@ -754,7 +754,7 @@ rename_variables_block(nir_block *block, struct lower_variables_state *state) assert(intrin->src[0].is_ssa); - nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx, + nir_alu_instr *mov = nir_alu_instr_create(state->shader, nir_op_imov); mov->src[0].src.is_ssa = true; mov->src[0].src.ssa = intrin->src[0].ssa; @@ -891,7 +891,7 @@ insert_phi_nodes(struct lower_variables_state *state) continue; if (has_already[next->index] < iter_count) { - nir_phi_instr *phi = nir_phi_instr_create(state->mem_ctx); + nir_phi_instr *phi = nir_phi_instr_create(state->shader); nir_ssa_dest_init(&phi->instr, &phi->dest, glsl_get_vector_elements(node->type), NULL); nir_instr_insert_before_block(next, &phi->instr); @@ -942,8 +942,8 @@ nir_lower_vars_to_ssa_impl(nir_function_impl *impl) { struct lower_variables_state state; - state.mem_ctx = ralloc_parent(impl); - state.dead_ctx = ralloc_context(state.mem_ctx); + state.shader = impl->overload->function->shader; + state.dead_ctx = ralloc_context(state.shader); state.impl = impl; state.deref_var_nodes = _mesa_hash_table_create(state.dead_ctx, From jekstrand at kemper.freedesktop.org Thu Apr 23 01:11:07 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 22 Apr 2015 18:11:07 -0700 (PDT) Subject: Mesa (master): nir: Move get_const_initializer_load from vars_to_ssa to NIR core Message-ID: <20150423011107.8E95776102@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 7e1d21edbff772f6dfc727e0e09788d87e00e0f5 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7e1d21edbff772f6dfc727e0e09788d87e00e0f5 Author: Jason Ekstrand Date: Fri Apr 10 14:46:22 2015 -0700 nir: Move get_const_initializer_load from vars_to_ssa to NIR core Reviewed-by: Connor Abbott --- src/glsl/nir/nir.c | 60 +++++++++++++++++++++++++++++++ src/glsl/nir/nir.h | 3 ++ src/glsl/nir/nir_lower_vars_to_ssa.c | 64 ++-------------------------------- 3 files changed, 65 insertions(+), 62 deletions(-) diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c index c6e5361..a7ee361 100644 --- a/src/glsl/nir/nir.c +++ b/src/glsl/nir/nir.c @@ -589,6 +589,66 @@ nir_copy_deref(void *mem_ctx, nir_deref *deref) return NULL; } +/* Returns a load_const instruction that represents the constant + * initializer for the given deref chain. The caller is responsible for + * ensuring that there actually is a constant initializer. + */ +nir_load_const_instr * +nir_deref_get_const_initializer_load(nir_shader *shader, nir_deref_var *deref) +{ + nir_constant *constant = deref->var->constant_initializer; + assert(constant); + + const nir_deref *tail = &deref->deref; + unsigned matrix_offset = 0; + while (tail->child) { + switch (tail->child->deref_type) { + case nir_deref_type_array: { + nir_deref_array *arr = nir_deref_as_array(tail->child); + assert(arr->deref_array_type == nir_deref_array_type_direct); + if (glsl_type_is_matrix(tail->type)) { + assert(arr->deref.child == NULL); + matrix_offset = arr->base_offset; + } else { + constant = constant->elements[arr->base_offset]; + } + break; + } + + case nir_deref_type_struct: { + constant = constant->elements[nir_deref_as_struct(tail->child)->index]; + break; + } + + default: + unreachable("Invalid deref child type"); + } + + tail = tail->child; + } + + nir_load_const_instr *load = + nir_load_const_instr_create(shader, glsl_get_vector_elements(tail->type)); + + matrix_offset *= load->def.num_components; + for (unsigned i = 0; i < load->def.num_components; i++) { + switch (glsl_get_base_type(tail->type)) { + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + load->value.u[i] = constant->value.u[matrix_offset + i]; + break; + case GLSL_TYPE_BOOL: + load->value.u[i] = constant->value.b[matrix_offset + i] ? + NIR_TRUE : NIR_FALSE; + break; + default: + unreachable("Invalid immediate type"); + } + } + + return load; +} /** * \name Control flow modification diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 41e2120..98b0ec3 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1520,6 +1520,9 @@ nir_deref_struct *nir_deref_struct_create(void *mem_ctx, unsigned field_index); nir_deref *nir_copy_deref(void *mem_ctx, nir_deref *deref); +nir_load_const_instr * +nir_deref_get_const_initializer_load(nir_shader *shader, nir_deref_var *deref); + void nir_instr_insert_before(nir_instr *instr, nir_instr *before); void nir_instr_insert_after(nir_instr *instr, nir_instr *after); diff --git a/src/glsl/nir/nir_lower_vars_to_ssa.c b/src/glsl/nir/nir_lower_vars_to_ssa.c index a844038..00b4fb6 100644 --- a/src/glsl/nir/nir_lower_vars_to_ssa.c +++ b/src/glsl/nir/nir_lower_vars_to_ssa.c @@ -489,67 +489,6 @@ lower_copies_to_load_store(struct deref_node *node, return true; } -/* Returns a load_const instruction that represents the constant - * initializer for the given deref chain. The caller is responsible for - * ensuring that there actually is a constant initializer. - */ -static nir_load_const_instr * -get_const_initializer_load(const nir_deref_var *deref, - struct lower_variables_state *state) -{ - nir_constant *constant = deref->var->constant_initializer; - const nir_deref *tail = &deref->deref; - unsigned matrix_offset = 0; - while (tail->child) { - switch (tail->child->deref_type) { - case nir_deref_type_array: { - nir_deref_array *arr = nir_deref_as_array(tail->child); - assert(arr->deref_array_type == nir_deref_array_type_direct); - if (glsl_type_is_matrix(tail->type)) { - assert(arr->deref.child == NULL); - matrix_offset = arr->base_offset; - } else { - constant = constant->elements[arr->base_offset]; - } - break; - } - - case nir_deref_type_struct: { - constant = constant->elements[nir_deref_as_struct(tail->child)->index]; - break; - } - - default: - unreachable("Invalid deref child type"); - } - - tail = tail->child; - } - - nir_load_const_instr *load = - nir_load_const_instr_create(state->shader, - glsl_get_vector_elements(tail->type)); - - matrix_offset *= load->def.num_components; - for (unsigned i = 0; i < load->def.num_components; i++) { - switch (glsl_get_base_type(tail->type)) { - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_INT: - case GLSL_TYPE_UINT: - load->value.u[i] = constant->value.u[matrix_offset + i]; - break; - case GLSL_TYPE_BOOL: - load->value.u[i] = constant->value.b[matrix_offset + i] ? - NIR_TRUE : NIR_FALSE; - break; - default: - unreachable("Invalid immediate type"); - } - } - - return load; -} - /** Pushes an SSA def onto the def stack for the given node * * Each node is potentially associated with a stack of SSA definitions. @@ -987,7 +926,8 @@ nir_lower_vars_to_ssa_impl(nir_function_impl *impl) progress = true; if (deref->var->constant_initializer) { - nir_load_const_instr *load = get_const_initializer_load(deref, &state); + nir_load_const_instr *load = + nir_deref_get_const_initializer_load(state.shader, deref); nir_ssa_def_init(&load->instr, &load->def, glsl_get_vector_elements(node->type), NULL); nir_instr_insert_before_cf_list(&impl->body, &load->instr); From jekstrand at kemper.freedesktop.org Thu Apr 23 01:11:07 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 22 Apr 2015 18:11:07 -0700 (PDT) Subject: Mesa (master): nir: Add a simple growing array data structure Message-ID: <20150423011107.A105976102@kemper.freedesktop.org> Module: Mesa Branch: master Commit: f50f59d3d92061e69713b072aade66195dececd8 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f50f59d3d92061e69713b072aade66195dececd8 Author: Jason Ekstrand Date: Fri Apr 10 17:06:05 2015 -0700 nir: Add a simple growing array data structure Reviewed-by: Connor Abbott --- src/glsl/nir/nir_array.h | 96 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/src/glsl/nir/nir_array.h b/src/glsl/nir/nir_array.h new file mode 100644 index 0000000..1db4e8c --- /dev/null +++ b/src/glsl/nir/nir_array.h @@ -0,0 +1,96 @@ +/* + * Copyright ? 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason at jlekstrand.net) + * + */ + +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + void *mem_ctx; + size_t size; + size_t alloc; + void *data; +} nir_array; + +static inline void +nir_array_init(nir_array *arr, void *mem_ctx) +{ + arr->mem_ctx = mem_ctx; + arr->size = 0; + arr->alloc = 0; + arr->data = NULL; +} + +static inline void +nir_array_fini(nir_array *arr) +{ + if (arr->mem_ctx) + ralloc_free(arr->data); + else + free(arr->data); +} + +#define NIR_ARRAY_INITIAL_SIZE 64 + +/* Increments the size of the array by the given ammount and returns a + * pointer to the beginning of the newly added space. + */ +static inline void * +nir_array_grow(nir_array *arr, size_t additional) +{ + size_t new_size = arr->size + additional; + if (new_size > arr->alloc) { + if (arr->alloc == 0) + arr->alloc = NIR_ARRAY_INITIAL_SIZE; + + while (new_size > arr->alloc) + arr->alloc *= 2; + + if (arr->mem_ctx) + arr->data = reralloc_size(arr->mem_ctx, arr->data, arr->alloc); + else + arr->data = realloc(arr->data, arr->alloc); + } + + void *ptr = (void *)((char *)arr->data + arr->size); + arr->size = new_size; + + return ptr; +} + +#define nir_array_add(arr, type, elem) \ + *(type *)nir_array_grow(arr, sizeof(type)) = (elem) + +#define nir_array_foreach(arr, type, elem) \ + for (type *elem = (type *)(arr)->data; \ + elem < (type *)((char *)(arr)->data + (arr)->size); elem++) + +#ifdef __cplusplus +} /* extern "C" */ +#endif From jekstrand at kemper.freedesktop.org Thu Apr 23 01:11:07 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 22 Apr 2015 18:11:07 -0700 (PDT) Subject: Mesa (master): nir/locals_to_regs: Pass around the nir_shader rather than a void * mem_ctx Message-ID: <20150423011107.AB15076102@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 4e9b3765947f2c98d618082cd449db2e319bd887 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4e9b3765947f2c98d618082cd449db2e319bd887 Author: Jason Ekstrand Date: Fri Apr 10 14:50:06 2015 -0700 nir/locals_to_regs: Pass around the nir_shader rather than a void * mem_ctx Reviewed-by: Connor Abbott --- src/glsl/nir/nir_lower_locals_to_regs.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/glsl/nir/nir_lower_locals_to_regs.c b/src/glsl/nir/nir_lower_locals_to_regs.c index 8c1977a..6ad8ab0 100644 --- a/src/glsl/nir/nir_lower_locals_to_regs.c +++ b/src/glsl/nir/nir_lower_locals_to_regs.c @@ -28,7 +28,7 @@ #include "nir.h" struct locals_to_regs_state { - void *mem_ctx; + nir_shader *shader; nir_function_impl *impl; /* A hash table mapping derefs to registers */ @@ -142,11 +142,11 @@ get_deref_reg_src(nir_deref_var *deref, nir_instr *instr, if (src.reg.indirect) { nir_load_const_instr *load_const = - nir_load_const_instr_create(state->mem_ctx, 1); + nir_load_const_instr_create(state->shader, 1); load_const->value.u[0] = glsl_get_length(parent_type); nir_instr_insert_before(instr, &load_const->instr); - nir_alu_instr *mul = nir_alu_instr_create(state->mem_ctx, nir_op_imul); + nir_alu_instr *mul = nir_alu_instr_create(state->shader, nir_op_imul); mul->src[0].src = *src.reg.indirect; mul->src[1].src.is_ssa = true; mul->src[1].src.ssa = &load_const->def; @@ -160,15 +160,15 @@ get_deref_reg_src(nir_deref_var *deref, nir_instr *instr, if (deref_array->deref_array_type == nir_deref_array_type_indirect) { if (src.reg.indirect == NULL) { - src.reg.indirect = ralloc(state->mem_ctx, nir_src); + src.reg.indirect = ralloc(state->shader, nir_src); nir_src_copy(src.reg.indirect, &deref_array->indirect, - state->mem_ctx); + state->shader); } else { - nir_alu_instr *add = nir_alu_instr_create(state->mem_ctx, + nir_alu_instr *add = nir_alu_instr_create(state->shader, nir_op_iadd); add->src[0].src = *src.reg.indirect; nir_src_copy(&add->src[1].src, &deref_array->indirect, - state->mem_ctx); + state->shader); add->dest.write_mask = 1; nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, NULL); nir_instr_insert_before(instr, &add->instr); @@ -198,7 +198,7 @@ lower_locals_to_regs_block(nir_block *block, void *void_state) if (intrin->variables[0]->var->data.mode != nir_var_local) continue; - nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx, nir_op_imov); + nir_alu_instr *mov = nir_alu_instr_create(state->shader, nir_op_imov); mov->src[0].src = get_deref_reg_src(intrin->variables[0], &intrin->instr, state); mov->dest.write_mask = (1 << intrin->num_components) - 1; @@ -207,9 +207,9 @@ lower_locals_to_regs_block(nir_block *block, void *void_state) intrin->num_components, NULL); nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(&mov->dest.dest.ssa), - state->mem_ctx); + state->shader); } else { - nir_dest_copy(&mov->dest.dest, &intrin->dest, state->mem_ctx); + nir_dest_copy(&mov->dest.dest, &intrin->dest, state->shader); } nir_instr_insert_before(&intrin->instr, &mov->instr); @@ -224,8 +224,8 @@ lower_locals_to_regs_block(nir_block *block, void *void_state) nir_src reg_src = get_deref_reg_src(intrin->variables[0], &intrin->instr, state); - nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx, nir_op_imov); - nir_src_copy(&mov->src[0].src, &intrin->src[0], state->mem_ctx); + nir_alu_instr *mov = nir_alu_instr_create(state->shader, nir_op_imov); + nir_src_copy(&mov->src[0].src, &intrin->src[0], state->shader); mov->dest.write_mask = (1 << intrin->num_components) - 1; mov->dest.dest.is_ssa = false; mov->dest.dest.reg.reg = reg_src.reg.reg; @@ -255,7 +255,7 @@ nir_lower_locals_to_regs_impl(nir_function_impl *impl) { struct locals_to_regs_state state; - state.mem_ctx = ralloc_parent(impl); + state.shader = impl->overload->function->shader; state.impl = impl; state.regs_table = _mesa_hash_table_create(NULL, hash_deref, derefs_equal); From jekstrand at kemper.freedesktop.org Thu Apr 23 01:11:07 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 22 Apr 2015 18:11:07 -0700 (PDT) Subject: Mesa (master): nir/locals_to_regs: Hanadle indirect accesses of length-1 arrays Message-ID: <20150423011107.BD0CB76102@kemper.freedesktop.org> Module: Mesa Branch: master Commit: d61bd972d861d9246fe7f9de71158aa79368bb79 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d61bd972d861d9246fe7f9de71158aa79368bb79 Author: Jason Ekstrand Date: Fri Apr 10 17:38:17 2015 -0700 nir/locals_to_regs: Hanadle indirect accesses of length-1 arrays Reviewed-by: Connor Abbott --- src/glsl/nir/nir_lower_locals_to_regs.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/glsl/nir/nir_lower_locals_to_regs.c b/src/glsl/nir/nir_lower_locals_to_regs.c index 48459f7..bc6a3d3 100644 --- a/src/glsl/nir/nir_lower_locals_to_regs.c +++ b/src/glsl/nir/nir_lower_locals_to_regs.c @@ -135,6 +135,14 @@ get_deref_reg_src(nir_deref_var *deref, nir_instr *instr, src.reg.base_offset = 0; src.reg.indirect = NULL; + /* It is possible for a user to create a shader that has an array with a + * single element and then proceed to access it indirectly. Indirectly + * accessing a non-array register is not allowed in NIR. In order to + * handle this case we just convert it to a direct reference. + */ + if (src.reg.reg->num_array_elems == 0) + return src; + nir_deref *tail = &deref->deref; while (tail->child != NULL) { const struct glsl_type *parent_type = tail->type; From jekstrand at kemper.freedesktop.org Thu Apr 23 01:11:07 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 22 Apr 2015 18:11:07 -0700 (PDT) Subject: Mesa (master): nir/types: Make glsl_get_length smarter Message-ID: <20150423011107.9815776102@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 8b900e74058dd48368511780a488ccb7a645c64f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8b900e74058dd48368511780a488ccb7a645c64f Author: Jason Ekstrand Date: Fri Apr 10 16:16:02 2015 -0700 nir/types: Make glsl_get_length smarter Previously, this function returned the number of elements for structures and arrays and 0 for everything else. In NIR, this is almost never what you want because we also treat matricies as arrays so you have to special-case constantly. This commit glsl_get_length treat matrices as an array of columns by returning the number of columns instead of 0 This also fixes a bug in locals_to_regs caused by not checking for the matrix case in one place. v2: Only special-case for matrices and return a length of 0 for vectors as we did before. This was needed to not break the TGSI-based drivers and doesn't really affect NIR at the moment. Reviewed-by: Connor Abbott Tested-by: Rob Clark --- src/glsl/nir/nir_lower_locals_to_regs.c | 11 ++--------- src/glsl/nir/nir_lower_var_copies.c | 24 ++---------------------- src/glsl/nir/nir_lower_vars_to_ssa.c | 26 +++----------------------- src/glsl/nir/nir_types.cpp | 2 +- 4 files changed, 8 insertions(+), 55 deletions(-) diff --git a/src/glsl/nir/nir_lower_locals_to_regs.c b/src/glsl/nir/nir_lower_locals_to_regs.c index 8c5df7b..8c1977a 100644 --- a/src/glsl/nir/nir_lower_locals_to_regs.c +++ b/src/glsl/nir/nir_lower_locals_to_regs.c @@ -100,15 +100,8 @@ get_reg_for_deref(nir_deref_var *deref, struct locals_to_regs_state *state) unsigned array_size = 1; nir_deref *tail = &deref->deref; while (tail->child) { - if (tail->child->deref_type == nir_deref_type_array) { - /* Multiply by the parent's type. */ - if (glsl_type_is_matrix(tail->type)) { - array_size *= glsl_get_matrix_columns(tail->type); - } else { - assert(glsl_get_length(tail->type) > 0); - array_size *= glsl_get_length(tail->type); - } - } + if (tail->child->deref_type == nir_deref_type_array) + array_size *= glsl_get_length(tail->type); tail = tail->child; } diff --git a/src/glsl/nir/nir_lower_var_copies.c b/src/glsl/nir/nir_lower_var_copies.c index 58389a7..2167290 100644 --- a/src/glsl/nir/nir_lower_var_copies.c +++ b/src/glsl/nir/nir_lower_var_copies.c @@ -64,26 +64,6 @@ get_deref_tail(nir_deref *deref) return deref; } -static int -type_get_length(const struct glsl_type *type) -{ - switch (glsl_get_base_type(type)) { - case GLSL_TYPE_STRUCT: - case GLSL_TYPE_ARRAY: - return glsl_get_length(type); - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_INT: - case GLSL_TYPE_UINT: - case GLSL_TYPE_BOOL: - if (glsl_type_is_matrix(type)) - return glsl_get_matrix_columns(type); - else - return glsl_get_vector_elements(type); - default: - unreachable("Invalid deref base type"); - } -} - /* This function recursively walks the given deref chain and replaces the * given copy instruction with an equivalent sequence load/store * operations. @@ -121,9 +101,9 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr, nir_deref_array *src_arr = nir_deref_as_array(src_arr_parent->child); nir_deref_array *dest_arr = nir_deref_as_array(dest_arr_parent->child); - unsigned length = type_get_length(src_arr_parent->type); + unsigned length = glsl_get_length(src_arr_parent->type); /* The wildcards should represent the same number of elements */ - assert(length == type_get_length(dest_arr_parent->type)); + assert(length == glsl_get_length(dest_arr_parent->type)); assert(length > 0); /* Walk over all of the elements that this wildcard refers to and diff --git a/src/glsl/nir/nir_lower_vars_to_ssa.c b/src/glsl/nir/nir_lower_vars_to_ssa.c index 00b4fb6..bb60f46 100644 --- a/src/glsl/nir/nir_lower_vars_to_ssa.c +++ b/src/glsl/nir/nir_lower_vars_to_ssa.c @@ -90,32 +90,12 @@ struct lower_variables_state { struct hash_table *phi_table; }; -static int -type_get_length(const struct glsl_type *type) -{ - switch (glsl_get_base_type(type)) { - case GLSL_TYPE_STRUCT: - case GLSL_TYPE_ARRAY: - return glsl_get_length(type); - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_INT: - case GLSL_TYPE_UINT: - case GLSL_TYPE_BOOL: - if (glsl_type_is_matrix(type)) - return glsl_get_matrix_columns(type); - else - return glsl_get_vector_elements(type); - default: - unreachable("Invalid deref base type"); - } -} - static struct deref_node * deref_node_create(struct deref_node *parent, const struct glsl_type *type, nir_shader *shader) { size_t size = sizeof(struct deref_node) + - type_get_length(type) * sizeof(struct deref_node *); + glsl_get_length(type) * sizeof(struct deref_node *); struct deref_node *node = rzalloc_size(shader, size); node->type = type; @@ -165,7 +145,7 @@ get_deref_node(nir_deref_var *deref, struct lower_variables_state *state) case nir_deref_type_struct: { nir_deref_struct *deref_struct = nir_deref_as_struct(tail); - assert(deref_struct->index < type_get_length(node->type)); + assert(deref_struct->index < glsl_get_length(node->type)); if (node->children[deref_struct->index] == NULL) node->children[deref_struct->index] = @@ -184,7 +164,7 @@ get_deref_node(nir_deref_var *deref, struct lower_variables_state *state) * out-of-bounds offset. We need to handle this at least * somewhat gracefully. */ - if (arr->base_offset >= type_get_length(node->type)) + if (arr->base_offset >= glsl_get_length(node->type)) return NULL; if (node->children[arr->base_offset] == NULL) diff --git a/src/glsl/nir/nir_types.cpp b/src/glsl/nir/nir_types.cpp index f0d0b46..62176f5 100644 --- a/src/glsl/nir/nir_types.cpp +++ b/src/glsl/nir/nir_types.cpp @@ -103,7 +103,7 @@ glsl_get_matrix_columns(const struct glsl_type *type) unsigned glsl_get_length(const struct glsl_type *type) { - return type->length; + return type->is_matrix() ? type->matrix_columns : type->length; } const char * From jekstrand at kemper.freedesktop.org Thu Apr 23 01:11:07 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 22 Apr 2015 18:11:07 -0700 (PDT) Subject: Mesa (master): nir/locals_to_regs: Initialize registers with constant initializers Message-ID: <20150423011107.B400A76102@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 06f3c98b9da35b5f5c02bd30599fbde57a19520e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=06f3c98b9da35b5f5c02bd30599fbde57a19520e Author: Jason Ekstrand Date: Fri Apr 10 15:39:34 2015 -0700 nir/locals_to_regs: Initialize registers with constant initializers Reviewed-by: Connor Abbott --- src/glsl/nir/nir_lower_locals_to_regs.c | 103 +++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) diff --git a/src/glsl/nir/nir_lower_locals_to_regs.c b/src/glsl/nir/nir_lower_locals_to_regs.c index 6ad8ab0..48459f7 100644 --- a/src/glsl/nir/nir_lower_locals_to_regs.c +++ b/src/glsl/nir/nir_lower_locals_to_regs.c @@ -26,6 +26,7 @@ */ #include "nir.h" +#include "nir_array.h" struct locals_to_regs_state { nir_shader *shader; @@ -33,6 +34,12 @@ struct locals_to_regs_state { /* A hash table mapping derefs to registers */ struct hash_table *regs_table; + + /* A growing array of derefs that we have encountered. There is exactly + * one element of this array per element in the hash table. This is + * used to make adding register initialization code deterministic. + */ + nir_array derefs_array; }; /* The following two functions implement a hash and equality check for @@ -112,6 +119,7 @@ get_reg_for_deref(nir_deref_var *deref, struct locals_to_regs_state *state) reg->num_array_elems = array_size > 1 ? array_size : 0; _mesa_hash_table_insert_pre_hashed(state->regs_table, hash, deref, reg); + nir_array_add(&state->derefs_array, nir_deref_var *, deref); return reg; } @@ -250,6 +258,82 @@ lower_locals_to_regs_block(nir_block *block, void *void_state) return true; } +static nir_block * +compute_reg_usedef_lca(nir_register *reg) +{ + struct set_entry *entry; + nir_block *lca = NULL; + + set_foreach(reg->defs, entry) + lca = nir_dominance_lca(lca, ((nir_instr *)entry->key)->block); + + set_foreach(reg->uses, entry) + lca = nir_dominance_lca(lca, ((nir_instr *)entry->key)->block); + + set_foreach(reg->if_uses, entry) { + nir_if *if_stmt = (nir_if *)entry->key; + nir_cf_node *prev_node = nir_cf_node_prev(&if_stmt->cf_node); + assert(prev_node->type == nir_cf_node_block); + lca = nir_dominance_lca(lca, nir_cf_node_as_block(prev_node)); + } + + return lca; +} + +static void +insert_constant_initializer(nir_deref_var *deref_head, nir_deref *deref_tail, + nir_block *block, + struct locals_to_regs_state *state) +{ + if (deref_tail->child) { + switch (deref_tail->child->deref_type) { + case nir_deref_type_array: { + unsigned array_elems = glsl_get_length(deref_tail->type); + + nir_deref_array arr_deref; + arr_deref.deref = *deref_tail->child; + arr_deref.deref_array_type = nir_deref_array_type_direct; + + nir_deref *old_child = deref_tail->child; + deref_tail->child = &arr_deref.deref; + for (unsigned i = 0; i < array_elems; i++) { + arr_deref.base_offset = i; + insert_constant_initializer(deref_head, &arr_deref.deref, + block, state); + } + deref_tail->child = old_child; + return; + } + + case nir_deref_type_struct: + insert_constant_initializer(deref_head, deref_tail->child, + block, state); + return; + + default: + unreachable("Invalid deref child type"); + } + } + + assert(deref_tail->child == NULL); + + nir_load_const_instr *load = + nir_deref_get_const_initializer_load(state->shader, deref_head); + nir_instr_insert_before_block(block, &load->instr); + + nir_src reg_src = get_deref_reg_src(deref_head, &load->instr, state); + + nir_alu_instr *mov = nir_alu_instr_create(state->shader, nir_op_imov); + mov->src[0].src = nir_src_for_ssa(&load->def); + mov->dest.write_mask = (1 << load->def.num_components) - 1; + mov->dest.dest.is_ssa = false; + mov->dest.dest.reg.reg = reg_src.reg.reg; + mov->dest.dest.reg.base_offset = reg_src.reg.base_offset; + mov->dest.dest.reg.indirect = reg_src.reg.indirect; + + nir_instr_insert_after(&load->instr, &mov->instr); +} + static void nir_lower_locals_to_regs_impl(nir_function_impl *impl) { @@ -258,12 +342,31 @@ nir_lower_locals_to_regs_impl(nir_function_impl *impl) state.shader = impl->overload->function->shader; state.impl = impl; state.regs_table = _mesa_hash_table_create(NULL, hash_deref, derefs_equal); + nir_array_init(&state.derefs_array, NULL); + + nir_metadata_require(impl, nir_metadata_dominance); nir_foreach_block(impl, lower_locals_to_regs_block, &state); + nir_array_foreach(&state.derefs_array, nir_deref_var *, deref_ptr) { + nir_deref_var *deref = *deref_ptr; + struct hash_entry *deref_entry = + _mesa_hash_table_search(state.regs_table, deref); + assert(deref_entry && deref_entry->key == deref); + nir_register *reg = (nir_register *)deref_entry->data; + + if (deref->var->constant_initializer == NULL) + continue; + + nir_block *usedef_lca = compute_reg_usedef_lca(reg); + + insert_constant_initializer(deref, &deref->deref, usedef_lca, &state); + } + nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance); + nir_array_fini(&state.derefs_array); _mesa_hash_table_destroy(state.regs_table, NULL); } From jekstrand at kemper.freedesktop.org Thu Apr 23 01:11:07 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 22 Apr 2015 18:11:07 -0700 (PDT) Subject: Mesa (master): nir/lower_source_mods: Don't propagate register sources Message-ID: <20150423011107.CF05576102@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 125574d1effcb3e3eda93f2b2975bc6cc606df3e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=125574d1effcb3e3eda93f2b2975bc6cc606df3e Author: Jason Ekstrand Date: Mon Apr 13 14:13:16 2015 -0700 nir/lower_source_mods: Don't propagate register sources The nir_lower_source_mods pass does a weak form of copy propagation to clean up all of the mov-with-negate's that get generated. However, we weren't properly checking that the sources were SSA and so we could end up moving a register read which is not, in general, valid. Reviewed-by: Connor Abbott --- src/glsl/nir/nir_lower_to_source_mods.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/glsl/nir/nir_lower_to_source_mods.c b/src/glsl/nir/nir_lower_to_source_mods.c index d6bf77f..7b4a0f6 100644 --- a/src/glsl/nir/nir_lower_to_source_mods.c +++ b/src/glsl/nir/nir_lower_to_source_mods.c @@ -67,6 +67,13 @@ nir_lower_to_source_mods_block(nir_block *block, void *state) continue; } + /* We can only do a rewrite if the source we are copying is SSA. + * Otherwise, moving the read might invalidly reorder reads/writes + * on a register. + */ + if (!parent->src[0].src.is_ssa) + continue; + nir_instr_rewrite_src(instr, &alu->src[i].src, parent->src[0].src); if (alu->src[i].abs) { /* abs trumps both neg and abs, do nothing */ From jekstrand at kemper.freedesktop.org Thu Apr 23 01:11:07 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 22 Apr 2015 18:11:07 -0700 (PDT) Subject: Mesa (master): nir: Rewrite instr_rewrite_src Message-ID: <20150423011107.C6A1876102@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 296131f4674ee979a8cffe56fc61bbd42110982e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=296131f4674ee979a8cffe56fc61bbd42110982e Author: Jason Ekstrand Date: Mon Apr 13 14:02:21 2015 -0700 nir: Rewrite instr_rewrite_src The old code wasn't correctly handling the case where the new value of the source contains an indirect. Reviewed-by: Connor Abbott --- src/glsl/nir/nir.c | 52 ++++++++++++++++++++++++++++------------------------ 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c index a7ee361..4cc074b 100644 --- a/src/glsl/nir/nir.c +++ b/src/glsl/nir/nir.c @@ -1860,33 +1860,37 @@ src_does_not_use_reg(nir_src *src, void *void_reg) void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src) { - if (src->is_ssa) { - nir_ssa_def *old_ssa = src->ssa; - *src = new_src; - if (old_ssa && nir_foreach_src(instr, src_does_not_use_def, old_ssa)) { - struct set_entry *entry = _mesa_set_search(old_ssa->uses, instr); - assert(entry); - _mesa_set_remove(old_ssa->uses, entry); - } - } else { - if (src->reg.indirect) - nir_instr_rewrite_src(instr, src->reg.indirect, new_src); - - nir_register *old_reg = src->reg.reg; - *src = new_src; - if (old_reg && nir_foreach_src(instr, src_does_not_use_reg, old_reg)) { - struct set_entry *entry = _mesa_set_search(old_reg->uses, instr); - assert(entry); - _mesa_set_remove(old_reg->uses, entry); + nir_src old_src = *src; + *src = new_src; + + for (nir_src *iter_src = &old_src; iter_src; + iter_src = iter_src->is_ssa ? NULL : iter_src->reg.indirect) { + if (iter_src->is_ssa) { + nir_ssa_def *ssa = iter_src->ssa; + if (ssa && nir_foreach_src(instr, src_does_not_use_def, ssa)) { + struct set_entry *entry = _mesa_set_search(ssa->uses, instr); + assert(entry); + _mesa_set_remove(ssa->uses, entry); + } + } else { + nir_register *reg = iter_src->reg.reg; + if (reg && nir_foreach_src(instr, src_does_not_use_reg, reg)) { + struct set_entry *entry = _mesa_set_search(reg->uses, instr); + assert(entry); + _mesa_set_remove(reg->uses, entry); + } } } - if (new_src.is_ssa) { - if (new_src.ssa) - _mesa_set_add(new_src.ssa->uses, instr); - } else { - if (new_src.reg.reg) - _mesa_set_add(new_src.reg.reg->uses, instr); + for (nir_src *iter_src = &new_src; iter_src; + iter_src = iter_src->is_ssa ? NULL : iter_src->reg.indirect) { + if (iter_src->is_ssa) { + if (iter_src->ssa) + _mesa_set_add(iter_src->ssa->uses, instr); + } else { + if (iter_src->reg.reg) + _mesa_set_add(iter_src->reg.reg->uses, instr); + } } } From jekstrand at kemper.freedesktop.org Thu Apr 23 01:11:07 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 22 Apr 2015 18:11:07 -0700 (PDT) Subject: Mesa (master): nir/lower_vars_to_ssa: Actually look for indirects when determining aliasing Message-ID: <20150423011107.4E28776102@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 73cc76362dd93a2b8b35583cc12cc3a61a61ea83 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=73cc76362dd93a2b8b35583cc12cc3a61a61ea83 Author: Jason Ekstrand Date: Thu Apr 9 20:45:45 2015 -0700 nir/lower_vars_to_ssa: Actually look for indirects when determining aliasing Reviewed-by: Connor Abbott --- src/glsl/nir/nir_lower_vars_to_ssa.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/glsl/nir/nir_lower_vars_to_ssa.c b/src/glsl/nir/nir_lower_vars_to_ssa.c index 2ca74d7..8b7261c 100644 --- a/src/glsl/nir/nir_lower_vars_to_ssa.c +++ b/src/glsl/nir/nir_lower_vars_to_ssa.c @@ -317,6 +317,10 @@ deref_may_be_aliased_node(struct deref_node *node, nir_deref *deref, if (arr->deref_array_type == nir_deref_array_type_indirect) return true; + /* If there is an indirect at this level, we're aliased. */ + if (node->indirect) + return true; + assert(arr->deref_array_type == nir_deref_array_type_direct); if (node->children[arr->base_offset] && From jekstrand at kemper.freedesktop.org Thu Apr 23 01:11:07 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Wed, 22 Apr 2015 18:11:07 -0700 (PDT) Subject: Mesa (master): i965/nir: Use the correct offsets when handling register indirects Message-ID: <20150423011107.7826E76102@kemper.freedesktop.org> Module: Mesa Branch: master Commit: c68364ac341d5fbbc5b6dcf74812a776359c0168 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c68364ac341d5fbbc5b6dcf74812a776359c0168 Author: Jason Ekstrand Date: Fri Apr 10 11:52:08 2015 -0700 i965/nir: Use the correct offsets when handling register indirects Reviewed-by: Connor Abbott --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 54 +++++++++++++++--------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 0dcbc3b..9564764 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -1157,6 +1157,28 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) } } +static fs_reg +fs_reg_for_nir_reg(fs_visitor *v, nir_register *nir_reg, + unsigned base_offset, nir_src *indirect) +{ + fs_reg reg; + if (nir_reg->is_global) + reg = v->nir_globals[nir_reg->index]; + else + reg = v->nir_locals[nir_reg->index]; + + reg = offset(reg, base_offset * nir_reg->num_components); + if (indirect) { + int multiplier = nir_reg->num_components * (v->dispatch_width / 8); + + reg.reladdr = new(v->mem_ctx) fs_reg(v->vgrf(glsl_type::int_type)); + v->emit(v->MUL(*reg.reladdr, v->get_nir_src(*indirect), + fs_reg(multiplier))); + } + + return reg; +} + fs_reg fs_visitor::get_nir_src(nir_src src) { @@ -1171,44 +1193,22 @@ fs_visitor::get_nir_src(nir_src src) return reg; } else { - fs_reg reg; - if (src.reg.reg->is_global) - reg = nir_globals[src.reg.reg->index]; - else - reg = nir_locals[src.reg.reg->index]; + fs_reg reg = fs_reg_for_nir_reg(this, src.reg.reg, src.reg.base_offset, + src.reg.indirect); /* to avoid floating-point denorm flushing problems, set the type by * default to D - instructions that need floating point semantics will set * this to F if they need to */ - reg = retype(offset(reg, src.reg.base_offset), BRW_REGISTER_TYPE_D); - if (src.reg.indirect) { - reg.reladdr = new(mem_ctx) fs_reg(); - *reg.reladdr = retype(get_nir_src(*src.reg.indirect), - BRW_REGISTER_TYPE_D); - } - - return reg; + return retype(reg, BRW_REGISTER_TYPE_D); } } fs_reg fs_visitor::get_nir_dest(nir_dest dest) { - fs_reg reg; - if (dest.reg.reg->is_global) - reg = nir_globals[dest.reg.reg->index]; - else - reg = nir_locals[dest.reg.reg->index]; - - reg = offset(reg, dest.reg.base_offset); - if (dest.reg.indirect) { - reg.reladdr = new(mem_ctx) fs_reg(); - *reg.reladdr = retype(get_nir_src(*dest.reg.indirect), - BRW_REGISTER_TYPE_D); - } - - return reg; + return fs_reg_for_nir_reg(this, dest.reg.reg, dest.reg.base_offset, + dest.reg.indirect); } void From evelikov at kemper.freedesktop.org Thu Apr 23 11:42:25 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Thu, 23 Apr 2015 04:42:25 -0700 (PDT) Subject: Mesa (10.5): 21 new commits Message-ID: <20150423114225.BB09F76102@kemper.freedesktop.org> URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e2dd5546517dd8d3b6790cea3c74c038629256b4 Author: Brian Paul Date: Thu Apr 16 15:29:18 2015 -0600 glsl: rewrite glsl_type::record_key_hash() to avoid buffer overflow This should be more efficient than the previous snprintf() solution. But more importantly, it avoids a buffer overflow bug that could result in crashes or unpredictable results when processing very large interface blocks. For the app in question, key->length = 103 for some interfaces. The check if size >= sizeof(hash_key) was insufficient to prevent overflows of the hash_key[128] array because it didn't account for the terminating zero. In this case, this caused the call to hash_table_string_hash() to return different results for identical inputs, and then shader linking failed. This new solution also takes all structure fields into account instead of just the first 15 when sizeof(pointer)==8. Cc: mesa-stable at lists.freedesktop.org Reviewed-by: Ian Romanick (cherry picked from commit 31667e6237d30188d0b29e17f5b9892f10c0d83a) URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=0f5ec7250d3e19eda1121f1bf3f340aedcc26dc5 Author: Emil Velikov Date: Sat Mar 28 18:23:01 2015 +0000 android: mesa: fix the path of the SSE4_1 optimisations Commit dd6f641303c(mesa: Build with subdir-objects.) removed the SRCDIR variable, but forgot to update all references of it. v2: Fix path - must be relative to LOCAL_PATH. (Chih-Wei) Cc: "10.5" Signed-off-by: Emil Velikov Reviewed-by: Chih-Wei Huang (cherry picked from commit 669cfc267a1102ff903b3e562f9aa45a410e0312) URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a2785a23e694df2213c9afe6d6f062cc366d24f1 Author: Mauro Rossi Date: Fri Mar 27 22:25:45 2015 +0000 android: add inital NIR build Required by the i965 driver. v2: - Split out the nir_builder_opcodes.h rules. - Do not unconditionally hide the python command - use $(hide) - Use LOCAL_EXPORT_C_INCLUDE_DIRS to manage includes for the generated sources. Cc: "10.5" [Emil Velikov: Split from a larger commit, v2] Signed-off-by: Emil Velikov Reviewed-by: Chih-Wei Huang (cherry picked from commit 06619749a11651a50e353168c7c793082820585d) URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=2ad5a068faa9f9cbd789d3a988a14f973a5f8720 Author: Emil Velikov Date: Fri Mar 27 21:39:15 2015 +0000 android: dri: link against libmesa_util The dri modules depend on symbols provided by it. Cc: "10.5" Signed-off-by: Emil Velikov Reviewed-by: Chih-Wei Huang (cherry picked from commit 618885f71fcacb3d68bf37fa23be36830d4178d2) URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1dfbc95b7664d3f52179870c016477d0ea950c91 Author: Emil Velikov Date: Sat Mar 28 01:11:54 2015 +0000 android: add $(mesa_top)/src/mesa/main to the includes list Required by the format_{un,}pack rework. Otherwise the build will fail to locate the respective headers - format_{un,}pack.h Cc: "10.5" Signed-off-by: Emil Velikov Reviewed-by: Chih-Wei Huang (cherry picked from commit 0afbd2df0485cd480979d9f4cdae00262d1a3c62) URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=215f28c695f8902a609ff01e1cbfbe2fa8783adc Author: Emil Velikov Date: Fri Mar 27 20:10:35 2015 +0000 android: add HAVE__BUILTIN_* and HAVE_FUNC_ATTRIBUTE_* defines All of those are available on gcc 4.5 and later with the current android build using gcc 4.7. Cc: "10.4 10.5" Signed-off-by: Emil Velikov Reviewed-by: Chih-Wei Huang (cherry picked from commit 39a175e0c792f569dfe73de1b4d01b0caea43a01) URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=95633d09e82116a6e502ac53d4563bca1bcc9e8f Author: Emil Velikov Date: Fri Mar 27 18:57:46 2015 +0000 android: dri/common: conditionally include drm_cflags/set __NOT_HAVE_DRM_H Otherwise we'll fail to find the drm.h header. Cc: "10.4 10.5" Signed-off-by: Emil Velikov (cherry picked from commit 8d90bfb724f89b04d703f869362cf2fc2a3d7567) URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=33c6a4c018976a673df7e94df5d43ea40369f878 Author: Emil Velikov Date: Fri Mar 27 18:36:10 2015 +0000 android: egl: add libsync_cflags to the build ... via local_shared_libraries. Otherwise the sync/sync.h header won't be found. Note: 10.5 and earlier will need similar change in st/egl. v2: Append the library to the local_shared_libraries list. (Chih-Wei) Cc: "10.4 10.5" Signed-off-by: Emil Velikov Reviewed-by: Chih-Wei Huang (cherry picked from commit 2d06791f6f9e8ab37109be52e63d247bbbcb42d4) URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=548fc14d362fd4ea49e0742aaaf1de04184f93d0 Author: Mauro Rossi Date: Fri Mar 27 18:20:53 2015 +0000 android: mesa: generate the format_{un,}pack.[ch] sources Missed out with commit e1fdcddafe9(mesa: Autogenerate format_unpack.c) v2: Conditionaly print the python commands - s/@/$(hide) / (Chih-Wei) Cc: "10.5" [Emil Velikov: Split our from a larger commit.] Signed-off-by: Emil Velikov (cherry picked from commit 5f7081eb90bc5a25f0740314fa22e04d189238ca) URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c614fef0929e44fc627d49a4c16b623c630b2b71 Author: Emil Velikov Date: Fri Mar 27 16:13:50 2015 +0000 android: add $(mesa_top)/src include to the whole of mesa Many parts of mesa already have the include with others depending on it but it's missing. Add it once at the top makefile and be done with it. Cc: "10.4 10.5" Signed-off-by: Emil Velikov Reviewed-by: Chih-Wei Huang (cherry picked from commit 6fb801786604c270fae99c3d665dcebaa0bff3a6) URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8ba18d75d66e3694923f04435670aef2db567798 Author: Emil Velikov Date: Fri Mar 27 11:40:42 2015 +0000 android: use LOCAL_SHARED_LIBRARIES over TARGET_OUT_HEADERS ... to manage the LIBDRM*_CFLAGS. The former is the recommended approach by the Android build system developers while the latter has been depreciated for quite some time. Cc: "10.4 10.5" Signed-off-by: Emil Velikov (cherry picked from commit 86919352e3da1c80409fdcb67c36f29a9687b7a9) URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=597d85c6b761028e00d9a75405afcba7612f9d58 Author: Kenneth Graunke Date: Fri Apr 10 10:24:33 2015 -0700 drirc: Add "Second Life" quirk (allow_glsl_extension_directive_midshader). Appears to fix shader compilation. Tested by starting the client, dragging the "quality and speed" slider back and forth, and watching the console output - instead of piles of "shader failed to compile", the CPU seems to be busy compiling shaders. I haven't actually tried to play. Signed-off-by: Kenneth Graunke Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=69226 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=71591 Cc: mesa-stable at lists.freedesktop.org (cherry picked from commit 00bf7d2e9cd60dbd82d25b459c448e11c545a89a) URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ef0e335c1422b8a394073d89a781ed5847da0eb1 Author: Marek Ol??k Date: Sat Apr 11 14:55:26 2015 +0200 glsl_to_tgsi: don't use a potentially-undefined immediate for ir_query_levels Cc: 10.4 10.5 Reviewed-by: Brian Paul (cherry picked from commit dcc74d47c40bf117f2dfaa359f9de7faef2c2200) URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f8966a07d94bb3e49db31b92ee01e339a908bd50 Author: Marek Ol??k Date: Sat Apr 11 13:49:38 2015 +0200 glsl_to_tgsi: fix out-of-bounds constant access and crash for uniforms This fixes piglit shaders at glsl-fs-uniform-array-loop-unroll with immediate shader compilation - it's a compiler test, so it has never been translated to TGSI before. Cc: 10.4 10.5 Reviewed-by: Brian Paul (cherry picked from commit 14c5bc3b9a6b03a8e42ef79da66d8b81b239cf96) URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ee63b31594a5919f93240fa209c0ffa74ec5b081 Author: Kristian H?gsberg Date: Tue Apr 14 15:02:18 2015 +0000 i965: Rewrite ir_tex to ir_txl with lod 0 for vertex shaders The ir_tex opcode turns into a sample or sample_c message, which will try to compute derivatives to determine the lod. This produces garbage for non-fragment shaders where the sample coordinates don't correspond to subspans. We fix this by rewriting the opcode from ir_tex to ir_txl and setting the lod to 0. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89457 Cc: "10.5" Signed-off-by: Kristian H?gsberg Reviewed-by: Kenneth Graunke Reviewed-by: Ian Romanick (cherry picked from commit 993a6288f72fa98932df7cdb6f64d9dd645e670d) URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1ebb42a6b2c21673aeb7ba09327043d278b0fe78 Author: Ian Romanick Date: Tue Apr 14 08:40:22 2015 -0700 nir: Fix typo in "ushr by 0" algebraic replacement Signed-off-by: Ian Romanick Reviewed-by: Jordan Justen Reviewed-by: Jason Ekstrand Reviewed-by: Connor Abbott Cc: "10.5" (cherry picked from commit bc672e261c5f7ff56cd2b8f6b518ebfdc0163bb7) URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a968edae6cdf5340785e94a78742df9b49bf7071 Author: Kenneth Graunke Date: Sat Apr 11 02:21:48 2015 -0700 i965: Fix software primitive restart with indirect draws. new_prim was declared as a stack variable within a nested scope; we tried to retain a pointer to that data beyond the scope, which is bogus. GCC with -O1 eliminated most of the code that set new_prim's fields. Move the declaration to fix the bug. v2: Also fix new_ib (thanks to Matt Turner and Ben Widawsky). Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=81025 Signed-off-by: Kenneth Graunke Reviewed-by: Matt Turner Reviewed-by: Ben Widawsky Cc: mesa-stable at lists.freedesktop.org (cherry picked from commit 406df68736a213f17f21a38a7c2da4ea15acd053) URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f44e518364876dbc210370edd2a8b7b0890e8aab Author: Dave Airlie Date: Wed Apr 8 10:00:27 2015 +1000 st/mesa: align cube map arrays layers We create textures internally for texsubimage, and we use the values from sub image to create a new texture, however we don't align these to valid sizes, and cube map arrays must have an array size aligned to 6. This fixes texsubimage cube_map_array on CAYMAN at least, (it was causing GPU hang and bad values), it probably also fixes it on radeonsi and evergreen. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89957 Tested-by: Tom Stellard Cc: mesa-stable at lists.freedesktop.org Reviewed-by: Marek Ol??k Signed-off-by: Dave Airlie (cherry picked from commit cc5860e40787b3afe36856674f028e830685271b) URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b2a6b01082e45fb32c607cfc64cd780185be62e3 Author: Dave Airlie Date: Wed Apr 8 10:59:20 2015 +1000 st/mesa: convert sub image for cube map arrays to 2d arrays for upload Since we can subimage upload a number of cube map array layers, that aren't a complete cube map array, we should specify things as a 2D array and blit from that. Suggested by Ilia Mirkin as an alternate fix for texsubimage cube map array issues. seems to work just as well. Cc: mesa-stable at lists.freedesktop.org Reviewed-by: Marek Ol??k Signed-off-by: Dave Airlie (cherry picked from commit 5ed79312ed99f3b141c35569b9767f82f5ba0a93) URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=abdae1c4d1af084aca56c89a20db85c867ce412d Author: Mathias Froehlich Date: Sun Apr 12 18:23:58 2015 +0200 i965: Flush batchbuffer containing the query on glQueryCounter. This change fixes a regression with timer queries introduced with commit 3eb6258. There the pending batchbuffer is flushed only if glEndQuery is executed. This present change adds such a flush to glQueryCounter which also schedules a value query just like glEndQuery does. The patch fixes GPU timer queries going mad from within osgviewer. Reviewed-by: Kenneth Graunke Signed-off-by: Mathias Froehlich Cc: mesa-stable at lists.freedesktop.org (cherry picked from commit 1e1d5456ba3dff82301ad4bbdde2fb6e2f562fe3) URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f73954e0a799fd9d2e64d62e2ee8c999a7c18126 Author: Emil Velikov Date: Wed Apr 22 16:10:47 2015 +0100 radeonsi: remove unused si_dump_key() Accidentally added with commit 64d0f0e3b24(radeonsi: Cache LLVMTargetMachineRef in context instead of in screen) Reported-by: Michel D?nzer Signed-off-by: Emil Velikov From mareko at kemper.freedesktop.org Thu Apr 23 14:36:38 2015 From: mareko at kemper.freedesktop.org (Marek Olšák) Date: Thu, 23 Apr 2015 07:36:38 -0700 (PDT) Subject: Mesa (master): gallium/radeon: don' t crash when getting out-of-bounds TEMP references Message-ID: <20150423143638.EE61876103@kemper.freedesktop.org> Module: Mesa Branch: master Commit: ecc7f2ed910345c4dd0d02ac9c72d69f10ac5572 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ecc7f2ed910345c4dd0d02ac9c72d69f10ac5572 Author: Marek Ol??k Date: Thu Mar 19 12:14:08 2015 +0100 gallium/radeon: don't crash when getting out-of-bounds TEMP references Reviewed-by: Tom Stellard --- src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c index 18afbcb..20e506b 100644 --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c @@ -191,6 +191,8 @@ emit_fetch( break; case TGSI_FILE_TEMPORARY: + if (reg->Register.Index >= ctx->temps_count) + return LLVMGetUndef(tgsi2llvmtype(bld_base, type)); if (uses_temp_indirect_addressing(bld_base)) { ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle); break; @@ -395,6 +397,8 @@ emit_store( break; case TGSI_FILE_TEMPORARY: + if (range.First + i >= ctx->temps_count) + continue; if (uses_temp_indirect_addressing(bld_base)) temp_ptr = lp_get_temp_ptr_soa(bld, i + range.First, chan_index); else @@ -416,6 +420,8 @@ emit_store( break; case TGSI_FILE_TEMPORARY: + if (reg->Register.Index >= ctx->temps_count) + continue; if (uses_temp_indirect_addressing(bld_base)) { temp_ptr = NULL; break; From sroland at kemper.freedesktop.org Thu Apr 23 16:15:16 2015 From: sroland at kemper.freedesktop.org (Roland Scheidegger) Date: Thu, 23 Apr 2015 09:15:16 -0700 (PDT) Subject: Mesa (master): draw: fix prim ids when there's no gs Message-ID: <20150423161516.5C75D76103@kemper.freedesktop.org> Module: Mesa Branch: master Commit: f2a7fd9943fcb7d3de3bc2b21907e0a157b88e96 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f2a7fd9943fcb7d3de3bc2b21907e0a157b88e96 Author: Roland Scheidegger Date: Thu Apr 23 18:13:32 2015 +0200 draw: fix prim ids when there's no gs We were resetting the prim id count for each run of the prim assembler, hence this only worked when the draw calls were very small (the exact limit depending on the vertex size), since larger draw calls get split up. So, do the same as we do already if there's a gs, reset it to zero explicitly for every new instance (this possibly could use the same variable but that isn't doable without some heavy refactoring and I'm not sure it makes sense). Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90130. Reviewed-by: Jose Fonseca CC: --- src/gallium/auxiliary/draw/draw_context.c | 1 + src/gallium/auxiliary/draw/draw_prim_assembler.c | 13 +++++++++++-- src/gallium/auxiliary/draw/draw_prim_assembler.h | 3 +++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 04cf5b7..ee009c1 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -182,6 +182,7 @@ boolean draw_init(struct draw_context *draw) void draw_new_instance(struct draw_context *draw) { draw_geometry_shader_new_instance(draw->gs.geometry_shader); + draw_prim_assembler_new_instance(draw->ia); } diff --git a/src/gallium/auxiliary/draw/draw_prim_assembler.c b/src/gallium/auxiliary/draw/draw_prim_assembler.c index 776c172..7ff705a 100644 --- a/src/gallium/auxiliary/draw/draw_prim_assembler.c +++ b/src/gallium/auxiliary/draw/draw_prim_assembler.c @@ -189,7 +189,6 @@ draw_prim_assembler_prepare_outputs(struct draw_assembler *ia) } else { ia->primid_slot = -1; } - ia->primid = 0; } @@ -233,7 +232,6 @@ draw_prim_assembler_run(struct draw_context *draw, asmblr->input_prims = input_prims; asmblr->input_verts = input_verts; asmblr->needs_primid = needs_primid(asmblr->draw); - asmblr->primid = 0; asmblr->num_prims = 0; output_prims->linear = TRUE; @@ -284,3 +282,14 @@ draw_prim_assembler_destroy(struct draw_assembler *ia) { FREE(ia); } + + +/* + * Called at the very begin of the draw call with a new instance + * Used to reset state that should persist between primitive restart. + */ +void +draw_prim_assembler_new_instance(struct draw_assembler *asmblr) +{ + asmblr->primid = 0; +} diff --git a/src/gallium/auxiliary/draw/draw_prim_assembler.h b/src/gallium/auxiliary/draw/draw_prim_assembler.h index 5ba715b..5ee7317 100644 --- a/src/gallium/auxiliary/draw/draw_prim_assembler.h +++ b/src/gallium/auxiliary/draw/draw_prim_assembler.h @@ -70,5 +70,8 @@ draw_prim_assembler_run(struct draw_context *draw, void draw_prim_assembler_prepare_outputs(struct draw_assembler *ia); +void +draw_prim_assembler_new_instance(struct draw_assembler *ia); + #endif From jrfonseca at kemper.freedesktop.org Thu Apr 23 21:03:00 2015 From: jrfonseca at kemper.freedesktop.org (Jose Fonseca) Date: Thu, 23 Apr 2015 14:03:00 -0700 (PDT) Subject: Mesa (master): os/os_memory_aligned.h: Handle integer overflow. Message-ID: <20150423210300.5BE8A76103@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 525be9c0791e4a447335307b5f225273b9da0cca URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=525be9c0791e4a447335307b5f225273b9da0cca Author: Jose Fonseca Date: Wed Apr 22 20:23:59 2015 +0100 os/os_memory_aligned.h: Handle integer overflow. This code is only used when our memory debugging wrappers are enabled, as we use the C runtime functions directly elsewhere. Tested llvmpipe on Windows w/ memory debugging enabled. VMware PR894263. Reviewed-by: Roland Scheidegger --- src/gallium/auxiliary/os/os_memory_aligned.h | 28 +++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/os/os_memory_aligned.h b/src/gallium/auxiliary/os/os_memory_aligned.h index 72c5cf6..bb15f24 100644 --- a/src/gallium/auxiliary/os/os_memory_aligned.h +++ b/src/gallium/auxiliary/os/os_memory_aligned.h @@ -39,6 +39,19 @@ #include "pipe/p_compiler.h" + +/** + * Add two size_t values with integer overflow check. + * TODO: leverage __builtin_add_overflow where available + */ +static inline bool +add_overflow_size_t(size_t a, size_t b, size_t *res) +{ + *res = a + b; + return *res < a || *res < b; +} + + /** * Return memory on given byte alignment */ @@ -46,8 +59,21 @@ static INLINE void * os_malloc_aligned(size_t size, size_t alignment) { char *ptr, *buf; + size_t alloc_size; + + /* + * Calculate + * + * alloc_size = size + alignment + sizeof(void *) + * + * while checking for overflow. + */ + if (add_overflow_size_t(size, alignment, &alloc_size) || + add_overflow_size_t(alloc_size, sizeof(void *), &alloc_size)) { + return NULL; + } - ptr = (char *) os_malloc(size + alignment + sizeof(void *)); + ptr = (char *) os_malloc(alloc_size); if (!ptr) return NULL; From jrfonseca at kemper.freedesktop.org Thu Apr 23 21:03:00 2015 From: jrfonseca at kemper.freedesktop.org (Jose Fonseca) Date: Thu, 23 Apr 2015 14:03:00 -0700 (PDT) Subject: Mesa (master): mapi: Adding missing string.h include. Message-ID: <20150423210300.64DF676104@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 29f0f976bd82c04c6c569658c260feaade7394cd URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=29f0f976bd82c04c6c569658c260feaade7394cd Author: Pali Roh?r Date: Thu Apr 23 22:02:07 2015 +0100 mapi: Adding missing string.h include. File glapi_entrypoint.c calls memcpy() function, but does not include string.h header. So compilation can fail at error: implicit declaration of function 'memcpy'. Signed-off-by: Jose Fonseca --- src/mapi/glapi/glapi_entrypoint.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mapi/glapi/glapi_entrypoint.c b/src/mapi/glapi/glapi_entrypoint.c index 7facd8a..f1f7f5e 100644 --- a/src/mapi/glapi/glapi_entrypoint.c +++ b/src/mapi/glapi/glapi_entrypoint.c @@ -29,6 +29,8 @@ */ +#include + #include "c11/threads.h" #include "glapi/glapi_priv.h" #include "u_execmem.h" From kwg at kemper.freedesktop.org Thu Apr 23 21:19:39 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Thu, 23 Apr 2015 14:19:39 -0700 (PDT) Subject: Mesa (master): i965: Make intel_emit_linear_blit handle Gen8+ alignment restrictions. Message-ID: <20150423211939.4019F76103@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 8c17d53823c77ac1c56b0548e4e54f69a33285f1 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8c17d53823c77ac1c56b0548e4e54f69a33285f1 Author: Kenneth Graunke Date: Wed Apr 15 03:04:33 2015 -0700 i965: Make intel_emit_linear_blit handle Gen8+ alignment restrictions. The BLT engine on Gen8+ requires linear surfaces to be cacheline aligned. This restriction was added as part of converting the BLT to use 48-bit addressing. intel_emit_linear_blit needs to handle blits that are not cacheline aligned, as we use it for arbitrary glBufferSubData calls and subrange mappings. Since intel_emit_linear_blit uses 1 byte per pixel, we can use the src/dst pixel X offset field to represent the unaligned portion, and subtract that from the address so it's cacheline aligned. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=88521 Signed-off-by: Kenneth Graunke Reviewed-by: Ian Romanick Reviewed-by: Anuj Phogat Cc: mesa-stable at lists.freedesktop.org --- src/mesa/drivers/dri/i965/intel_blit.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c index 4993f60..98d414c 100644 --- a/src/mesa/drivers/dri/i965/intel_blit.c +++ b/src/mesa/drivers/dri/i965/intel_blit.c @@ -524,6 +524,7 @@ intel_emit_linear_blit(struct brw_context *brw, { struct gl_context *ctx = &brw->ctx; GLuint pitch, height; + int16_t src_x, dst_x; bool ok; /* The pitch given to the GPU must be DWORD aligned, and @@ -532,11 +533,13 @@ intel_emit_linear_blit(struct brw_context *brw, */ pitch = ROUND_DOWN_TO(MIN2(size, (1 << 15) - 1), 4); height = (pitch == 0) ? 1 : size / pitch; + src_x = src_offset % 64; + dst_x = dst_offset % 64; ok = intelEmitCopyBlit(brw, 1, - pitch, src_bo, src_offset, I915_TILING_NONE, - pitch, dst_bo, dst_offset, I915_TILING_NONE, - 0, 0, /* src x/y */ - 0, 0, /* dst x/y */ + pitch, src_bo, src_offset - src_x, I915_TILING_NONE, + pitch, dst_bo, dst_offset - dst_x, I915_TILING_NONE, + src_x, 0, /* src x/y */ + dst_x, 0, /* dst x/y */ pitch, height, /* w, h */ GL_COPY); if (!ok) @@ -544,15 +547,18 @@ intel_emit_linear_blit(struct brw_context *brw, src_offset += pitch * height; dst_offset += pitch * height; + src_x = src_offset % 64; + dst_x = dst_offset % 64; size -= pitch * height; assert (size < (1 << 15)); pitch = ALIGN(size, 4); + if (size != 0) { ok = intelEmitCopyBlit(brw, 1, - pitch, src_bo, src_offset, I915_TILING_NONE, - pitch, dst_bo, dst_offset, I915_TILING_NONE, - 0, 0, /* src x/y */ - 0, 0, /* dst x/y */ + pitch, src_bo, src_offset - src_x, I915_TILING_NONE, + pitch, dst_bo, dst_offset - dst_x, I915_TILING_NONE, + src_x, 0, /* src x/y */ + dst_x, 0, /* dst x/y */ size, 1, /* w, h */ GL_COPY); if (!ok) From kwg at kemper.freedesktop.org Thu Apr 23 21:19:39 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Thu, 23 Apr 2015 14:19:39 -0700 (PDT) Subject: Mesa (master): i965: Disallow linear blits that are not cacheline aligned. Message-ID: <20150423211939.4666076101@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 5957da1edb9ad504d8af83878c10c3a24e41fc6c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5957da1edb9ad504d8af83878c10c3a24e41fc6c Author: Kenneth Graunke Date: Tue Apr 21 12:32:38 2015 -0700 i965: Disallow linear blits that are not cacheline aligned. The BLT engine on Gen8+ requires linear surfaces to be cacheline aligned. This restriction was added as part of converting the BLT to use 48-bit addressing. The main user, intel_emit_linear_blit, now handles this properly. But we might also have linear miptrees; just refuse to blit those. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=88521 Signed-off-by: Kenneth Graunke Reviewed-by: Ian Romanick Reviewed-by: Anuj Phogat Cc: mesa-stable at lists.freedesktop.org --- src/mesa/drivers/dri/i965/intel_blit.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c index 98d414c..7680a40 100644 --- a/src/mesa/drivers/dri/i965/intel_blit.c +++ b/src/mesa/drivers/dri/i965/intel_blit.c @@ -271,6 +271,20 @@ intel_miptree_blit(struct brw_context *brw, return true; } +static bool +alignment_valid(struct brw_context *brw, unsigned offset, uint32_t tiling) +{ + /* Tiled buffers must be page-aligned (4K). */ + if (tiling != I915_TILING_NONE) + return (offset & 4095) == 0; + + /* On Gen8+, linear buffers must be cacheline-aligned. */ + if (brw->gen >= 8) + return (offset & 63) == 0; + + return true; +} + /* Copy BitBlt */ bool @@ -296,14 +310,11 @@ intelEmitCopyBlit(struct brw_context *brw, bool dst_y_tiled = dst_tiling == I915_TILING_Y; bool src_y_tiled = src_tiling == I915_TILING_Y; - if (dst_tiling != I915_TILING_NONE) { - if (dst_offset & 4095) - return false; - } - if (src_tiling != I915_TILING_NONE) { - if (src_offset & 4095) - return false; - } + if (!alignment_valid(brw, dst_offset, dst_tiling)) + return false; + if (!alignment_valid(brw, src_offset, src_tiling)) + return false; + if ((dst_y_tiled || src_y_tiled) && brw->gen < 6) return false; From jekstrand at kemper.freedesktop.org Fri Apr 24 02:44:43 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Thu, 23 Apr 2015 19:44:43 -0700 (PDT) Subject: Mesa (master): i965: Add an INTEL_DEBUG=spill option to test spilling Message-ID: <20150424024443.02C2876104@kemper.freedesktop.org> Module: Mesa Branch: master Commit: d5a15a89f081bdd23237b8ba5391a2ebea60468b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d5a15a89f081bdd23237b8ba5391a2ebea60468b Author: Jason Ekstrand Date: Thu Apr 23 11:49:27 2015 -0700 i965: Add an INTEL_DEBUG=spill option to test spilling Reviewed-by: Jordan Justen Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 2 +- src/mesa/drivers/dri/i965/intel_debug.c | 1 + src/mesa/drivers/dri/i965/intel_debug.h | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index dc433b0..94e1a0a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -627,7 +627,7 @@ fs_visitor::assign_regs(bool allow_spilling) } /* Debug of register spilling: Go spill everything. */ - if (0) { + if (unlikely(INTEL_DEBUG & DEBUG_SPILL)) { int reg = choose_spill_reg(g); if (reg != -1) { diff --git a/src/mesa/drivers/dri/i965/intel_debug.c b/src/mesa/drivers/dri/i965/intel_debug.c index a5b883c..19be464 100644 --- a/src/mesa/drivers/dri/i965/intel_debug.c +++ b/src/mesa/drivers/dri/i965/intel_debug.c @@ -69,6 +69,7 @@ static const struct dri_debug_control debug_control[] = { { "ann", DEBUG_ANNOTATION }, { "no8", DEBUG_NO8 }, { "vec4vs", DEBUG_VEC4VS }, + { "spill", DEBUG_SPILL }, { NULL, 0 } }; diff --git a/src/mesa/drivers/dri/i965/intel_debug.h b/src/mesa/drivers/dri/i965/intel_debug.h index a741a16..e0e9cb7 100644 --- a/src/mesa/drivers/dri/i965/intel_debug.h +++ b/src/mesa/drivers/dri/i965/intel_debug.h @@ -64,6 +64,7 @@ extern uint64_t INTEL_DEBUG; #define DEBUG_ANNOTATION (1ull << 28) #define DEBUG_NO8 (1ull << 29) #define DEBUG_VEC4VS (1ull << 30) +#define DEBUG_SPILL (1ull << 31) #ifdef HAVE_ANDROID_PLATFORM #define LOG_TAG "INTEL-MESA" From jekstrand at kemper.freedesktop.org Fri Apr 24 02:44:42 2015 From: jekstrand at kemper.freedesktop.org (Jason Ekstrand) Date: Thu, 23 Apr 2015 19:44:42 -0700 (PDT) Subject: Mesa (master): i965/debug: Use the ull specifier for DEBUG enum defines Message-ID: <20150424024442.EF0D476103@kemper.freedesktop.org> Module: Mesa Branch: master Commit: bf55096207898437a5f48ba28f98fe1b992f60e3 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=bf55096207898437a5f48ba28f98fe1b992f60e3 Author: Jason Ekstrand Date: Thu Apr 23 13:59:32 2015 -0700 i965/debug: Use the ull specifier for DEBUG enum defines The INTEL_DEBUG variable is a uint64_t and if we want a enum value higer than 32 bits, you need to use ull. We might as well use it for all of them. Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/intel_debug.h | 62 +++++++++++++++---------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_debug.h b/src/mesa/drivers/dri/i965/intel_debug.h index 807ad98..a741a16 100644 --- a/src/mesa/drivers/dri/i965/intel_debug.h +++ b/src/mesa/drivers/dri/i965/intel_debug.h @@ -33,37 +33,37 @@ extern uint64_t INTEL_DEBUG; -#define DEBUG_TEXTURE (1 << 0) -#define DEBUG_STATE (1 << 1) -#define DEBUG_BLIT (1 << 2) -#define DEBUG_MIPTREE (1 << 3) -#define DEBUG_PERF (1 << 4) -#define DEBUG_PERFMON (1 << 5) -#define DEBUG_BATCH (1 << 6) -#define DEBUG_PIXEL (1 << 7) -#define DEBUG_BUFMGR (1 << 8) -#define DEBUG_FBO (1 << 9) -#define DEBUG_GS (1 << 10) -#define DEBUG_SYNC (1 << 11) -#define DEBUG_PRIMS (1 << 12) -#define DEBUG_VERTS (1 << 13) -#define DEBUG_DRI (1 << 14) -#define DEBUG_SF (1 << 15) -#define DEBUG_STATS (1 << 16) -#define DEBUG_WM (1 << 17) -#define DEBUG_URB (1 << 18) -#define DEBUG_VS (1 << 19) -#define DEBUG_CLIP (1 << 20) -#define DEBUG_AUB (1 << 21) -#define DEBUG_SHADER_TIME (1 << 22) -#define DEBUG_BLORP (1 << 23) -#define DEBUG_NO16 (1 << 24) -#define DEBUG_VUE (1 << 25) -#define DEBUG_NO_DUAL_OBJECT_GS (1 << 26) -#define DEBUG_OPTIMIZER (1 << 27) -#define DEBUG_ANNOTATION (1 << 28) -#define DEBUG_NO8 (1 << 29) -#define DEBUG_VEC4VS (1 << 30) +#define DEBUG_TEXTURE (1ull << 0) +#define DEBUG_STATE (1ull << 1) +#define DEBUG_BLIT (1ull << 2) +#define DEBUG_MIPTREE (1ull << 3) +#define DEBUG_PERF (1ull << 4) +#define DEBUG_PERFMON (1ull << 5) +#define DEBUG_BATCH (1ull << 6) +#define DEBUG_PIXEL (1ull << 7) +#define DEBUG_BUFMGR (1ull << 8) +#define DEBUG_FBO (1ull << 9) +#define DEBUG_GS (1ull << 10) +#define DEBUG_SYNC (1ull << 11) +#define DEBUG_PRIMS (1ull << 12) +#define DEBUG_VERTS (1ull << 13) +#define DEBUG_DRI (1ull << 14) +#define DEBUG_SF (1ull << 15) +#define DEBUG_STATS (1ull << 16) +#define DEBUG_WM (1ull << 17) +#define DEBUG_URB (1ull << 18) +#define DEBUG_VS (1ull << 19) +#define DEBUG_CLIP (1ull << 20) +#define DEBUG_AUB (1ull << 21) +#define DEBUG_SHADER_TIME (1ull << 22) +#define DEBUG_BLORP (1ull << 23) +#define DEBUG_NO16 (1ull << 24) +#define DEBUG_VUE (1ull << 25) +#define DEBUG_NO_DUAL_OBJECT_GS (1ull << 26) +#define DEBUG_OPTIMIZER (1ull << 27) +#define DEBUG_ANNOTATION (1ull << 28) +#define DEBUG_NO8 (1ull << 29) +#define DEBUG_VEC4VS (1ull << 30) #ifdef HAVE_ANDROID_PLATFORM #define LOG_TAG "INTEL-MESA" From tpalli at kemper.freedesktop.org Fri Apr 24 10:46:21 2015 From: tpalli at kemper.freedesktop.org (Tapani Pälli) Date: Fri, 24 Apr 2015 03:46:21 -0700 (PDT) Subject: Mesa (master): mesa: fix glGetActiveUniformsiv regression Message-ID: <20150424104621.B94B576101@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 18f44d303014c3c16084c1b15d1999833e0d55db URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=18f44d303014c3c16084c1b15d1999833e0d55db Author: Tapani P?lli Date: Thu Apr 23 14:19:33 2015 +0300 mesa: fix glGetActiveUniformsiv regression Commit 7519ddb caused regression to glGetActiveUniformsiv. Patch adds back validation loop of all given uniforms before writing any values, not touching params in case of errors is tested by the conformance suite. Signed-off-by: Tapani P?lli Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90149 Reviewed-by: Martin Peres --- src/mesa/main/uniform_query.cpp | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/mesa/main/uniform_query.cpp b/src/mesa/main/uniform_query.cpp index 4e77b32..3e857ed 100644 --- a/src/mesa/main/uniform_query.cpp +++ b/src/mesa/main/uniform_query.cpp @@ -129,14 +129,26 @@ _mesa_GetActiveUniformsiv(GLuint program, res_prop = resource_prop_from_uniform_prop(pname); + /* We need to first verify that each entry exists as active uniform. If + * not, generate error and do not cause any other side effects. + * + * In the case of and error condition, Page 16 (section 2.3.1 Errors) + * of the OpenGL 4.5 spec says: + * + * "If the generating command modifies values through a pointer argu- + * ment, no change is made to these values." + */ for (int i = 0; i < uniformCount; i++) { - res = _mesa_program_resource_find_index(shProg, GL_UNIFORM, - uniformIndices[i]); - if (!res) { + if (!_mesa_program_resource_find_index(shProg, GL_UNIFORM, + uniformIndices[i])) { _mesa_error(ctx, GL_INVALID_VALUE, "glGetActiveUniformsiv(index)"); - break; + return; } + } + for (int i = 0; i < uniformCount; i++) { + res = _mesa_program_resource_find_index(shProg, GL_UNIFORM, + uniformIndices[i]); if (!_mesa_program_resource_prop(shProg, res, uniformIndices[i], res_prop, ¶ms[i], "glGetActiveUniformsiv")) From tpalli at kemper.freedesktop.org Fri Apr 24 10:46:21 2015 From: tpalli at kemper.freedesktop.org (Tapani Pälli) Date: Fri, 24 Apr 2015 03:46:21 -0700 (PDT) Subject: Mesa (master): mesa: refactor active attrib queries for glGetProgramiv Message-ID: <20150424104621.B18DE76103@kemper.freedesktop.org> Module: Mesa Branch: master Commit: a563689a408b7a28c710fb0e382272a0d823f38a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a563689a408b7a28c710fb0e382272a0d823f38a Author: Tapani P?lli Date: Thu Apr 23 11:13:17 2015 +0300 mesa: refactor active attrib queries for glGetProgramiv Main motivation here is to get rid of iterating IR and encapsulate queries within program resources. No functional changes. Piglit tests calling the modified functionality: - gl-get-active-attrib-returns-all-inputs - glsl-1.50-get-active-attrib-array - getactiveattrib Signed-off-by: Tapani P?lli Reviewed-by: Martin Peres --- src/mesa/main/shader_query.cpp | 39 ++++++++++++++------------------------- 1 file changed, 14 insertions(+), 25 deletions(-) diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp index bc6fec5..a84ec84 100644 --- a/src/mesa/main/shader_query.cpp +++ b/src/mesa/main/shader_query.cpp @@ -291,7 +291,6 @@ _mesa_GetAttribLocation(GLhandleARB program, const GLcharARB * name) return (loc >= 0) ? loc : -1; } - unsigned _mesa_count_active_attribs(struct gl_shader_program *shProg) { @@ -300,19 +299,13 @@ _mesa_count_active_attribs(struct gl_shader_program *shProg) return 0; } - exec_list *const ir = shProg->_LinkedShaders[MESA_SHADER_VERTEX]->ir; - unsigned i = 0; - - foreach_in_list(ir_instruction, node, ir) { - const ir_variable *const var = node->as_variable(); - - if (!is_active_attrib(var)) - continue; - - i++; + struct gl_program_resource *res = shProg->ProgramResourceList; + unsigned count = 0; + for (unsigned j = 0; j < shProg->NumProgramResourceList; j++, res++) { + if (is_active_attrib(RESOURCE_VAR(res))) + count++; } - - return i; + return count; } @@ -324,20 +317,16 @@ _mesa_longest_attribute_name_length(struct gl_shader_program *shProg) return 0; } - exec_list *const ir = shProg->_LinkedShaders[MESA_SHADER_VERTEX]->ir; + struct gl_program_resource *res = shProg->ProgramResourceList; size_t longest = 0; + for (unsigned j = 0; j < shProg->NumProgramResourceList; j++, res++) { + if (res->Type == GL_PROGRAM_INPUT && + res->StageReferences & (1 << MESA_SHADER_VERTEX)) { - foreach_in_list(ir_instruction, node, ir) { - const ir_variable *const var = node->as_variable(); - - if (var == NULL - || var->data.mode != ir_var_shader_in - || var->data.location == -1) - continue; - - const size_t len = strlen(var->name); - if (len >= longest) - longest = len + 1; + const size_t length = strlen(RESOURCE_VAR(res)->name); + if (length >= longest) + longest = length + 1; + } } return longest; From mattst88 at kemper.freedesktop.org Fri Apr 24 18:40:04 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Fri, 24 Apr 2015 11:40:04 -0700 (PDT) Subject: Mesa (master): i965/fs: Allow 2-src math instructions to have immediate src1. Message-ID: <20150424184004.3E8B0761C1@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 0087cf23e8e399778e93369d67dd543e767ab526 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=0087cf23e8e399778e93369d67dd543e767ab526 Author: Matt Turner Date: Mon Mar 16 17:53:34 2015 -0700 i965/fs: Allow 2-src math instructions to have immediate src1. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp | 12 ++++++++---- src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp | 6 +++--- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp index a51b726..a5bacf4 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp @@ -62,9 +62,13 @@ could_coissue(const struct brw_device_info *devinfo, const fs_inst *inst) * Returns true for instructions that don't support immediate sources. */ static bool -must_promote_imm(const fs_inst *inst) +must_promote_imm(const struct brw_device_info *devinfo, const fs_inst *inst) { switch (inst->opcode) { + case SHADER_OPCODE_POW: + case SHADER_OPCODE_INT_QUOTIENT: + case SHADER_OPCODE_INT_REMAINDER: + return devinfo->gen < 8; case BRW_OPCODE_MAD: case BRW_OPCODE_LRP: return true; @@ -207,7 +211,7 @@ fs_visitor::opt_combine_constants() foreach_block_and_inst(block, fs_inst, inst, cfg) { ip++; - if (!could_coissue(devinfo, inst) && !must_promote_imm(inst)) + if (!could_coissue(devinfo, inst) && !must_promote_imm(devinfo, inst)) continue; for (int i = 0; i < inst->sources; i++) { @@ -225,7 +229,7 @@ fs_visitor::opt_combine_constants() imm->block = intersection; imm->uses->push_tail(link(const_ctx, &inst->src[i])); imm->uses_by_coissue += could_coissue(devinfo, inst); - imm->must_promote = imm->must_promote || must_promote_imm(inst); + imm->must_promote = imm->must_promote || must_promote_imm(devinfo, inst); imm->last_use_ip = ip; } else { imm = new_imm(&table, const_ctx); @@ -235,7 +239,7 @@ fs_visitor::opt_combine_constants() imm->uses->push_tail(link(const_ctx, &inst->src[i])); imm->val = val; imm->uses_by_coissue = could_coissue(devinfo, inst); - imm->must_promote = must_promote_imm(inst); + imm->must_promote = must_promote_imm(devinfo, inst); imm->first_use_ip = ip; imm->last_use_ip = ip; } diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index 6b6565f..af54deb 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -502,9 +502,9 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry) case SHADER_OPCODE_POW: case SHADER_OPCODE_INT_QUOTIENT: case SHADER_OPCODE_INT_REMAINDER: - if (devinfo->gen < 8) - break; - /* fallthrough */ + /* Allow constant propagation into src1 regardless of generation, and + * let constant combining promote the constant on Gen < 8. + */ case BRW_OPCODE_BFI1: case BRW_OPCODE_ASR: case BRW_OPCODE_SHL: From mattst88 at kemper.freedesktop.org Fri Apr 24 18:40:04 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Fri, 24 Apr 2015 11:40:04 -0700 (PDT) Subject: Mesa (master): glsl: Transform pow(x, 4) into (x*x)*(x*x). Message-ID: <20150424184004.2C83F76104@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 9b577d57029bb643f2b48b80648b4f901818e93b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=9b577d57029bb643f2b48b80648b4f901818e93b Author: Matt Turner Date: Mon Mar 16 21:33:31 2015 -0700 glsl: Transform pow(x, 4) into (x*x)*(x*x). Reviewed-by: Juha-Pekka Heikkila Reviewed-by: Kenneth Graunke --- src/glsl/opt_algebraic.cpp | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/glsl/opt_algebraic.cpp b/src/glsl/opt_algebraic.cpp index 3d2f2ca..fa5db70 100644 --- a/src/glsl/opt_algebraic.cpp +++ b/src/glsl/opt_algebraic.cpp @@ -99,6 +99,12 @@ is_vec_two(ir_constant *ir) } static inline bool +is_vec_four(ir_constant *ir) +{ + return (ir == NULL) ? false : ir->is_value(4.0, 4); +} + +static inline bool is_vec_negative_one(ir_constant *ir) { return (ir == NULL) ? false : ir->is_negative_one(); @@ -774,6 +780,20 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir) return mul(x, x); } + if (is_vec_four(op_const[1])) { + ir_variable *x = new(ir) ir_variable(ir->operands[1]->type, "x", + ir_var_temporary); + base_ir->insert_before(x); + base_ir->insert_before(assign(x, ir->operands[0])); + + ir_variable *squared = new(ir) ir_variable(ir->operands[1]->type, + "squared", + ir_var_temporary); + base_ir->insert_before(squared); + base_ir->insert_before(assign(squared, mul(x, x))); + return mul(squared, squared); + } + break; case ir_binop_min: From mattst88 at kemper.freedesktop.org Fri Apr 24 18:40:04 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Fri, 24 Apr 2015 11:40:04 -0700 (PDT) Subject: Mesa (master): nir: Transform pow(x, 4) into (x*x)*(x*x). Message-ID: <20150424184004.3592A76105@kemper.freedesktop.org> Module: Mesa Branch: master Commit: f251ea393bf3d01d242e2eb56cd0f2b0e140f7b2 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f251ea393bf3d01d242e2eb56cd0f2b0e140f7b2 Author: Matt Turner Date: Fri Apr 24 11:37:30 2015 -0700 nir: Transform pow(x, 4) into (x*x)*(x*x). --- src/glsl/nir/nir_opt_algebraic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py index cdb1924..2a2b956 100644 --- a/src/glsl/nir/nir_opt_algebraic.py +++ b/src/glsl/nir/nir_opt_algebraic.py @@ -144,6 +144,7 @@ optimizations = [ (('fexp', ('fmul', ('flog', a), b)), ('fpow', a, b), '!options->lower_fpow'), # e^(ln(a)*b) = a^b (('fpow', a, 1.0), a), (('fpow', a, 2.0), ('fmul', a, a)), + (('fpow', a, 4.0), ('fmul', ('fmul', a, a), ('fmul', a, a))), (('fpow', 2.0, a), ('fexp2', a)), (('fsqrt', ('fexp2', a)), ('fexp2', ('fmul', 0.5, a))), (('fsqrt', ('fexp', a)), ('fexp', ('fmul', 0.5, a))), From brianp at kemper.freedesktop.org Fri Apr 24 20:47:57 2015 From: brianp at kemper.freedesktop.org (Brian Paul) Date: Fri, 24 Apr 2015 13:47:57 -0700 (PDT) Subject: Mesa (master): mesa: put more info in glTexImage GL_OUT_OF_MEMORY error message Message-ID: <20150424204757.7DBBA76104@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 13b2e6a520d1f8979fc4da1dd2c6811585b16203 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=13b2e6a520d1f8979fc4da1dd2c6811585b16203 Author: Brian Paul Date: Fri Apr 24 12:56:04 2015 -0600 mesa: put more info in glTexImage GL_OUT_OF_MEMORY error message Give the user some idea about the size of the texture which caused the GL_OUT_OF_MEMORY error. Reviewed-by: Matt Turner --- src/mesa/main/teximage.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c index d07263c..7bc1da7 100644 --- a/src/mesa/main/teximage.c +++ b/src/mesa/main/teximage.c @@ -3320,7 +3320,9 @@ teximage(struct gl_context *ctx, GLboolean compressed, GLuint dims, if (!sizeOK) { _mesa_error(ctx, GL_OUT_OF_MEMORY, - "glTexImage%uD(image too large)", dims); + "glTexImage%uD(image too large: %d x %d x %d, %s format)", + dims, width, height, depth, + _mesa_lookup_enum_by_nr(internalFormat)); return; } From evelikov at kemper.freedesktop.org Fri Apr 24 21:57:26 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Fri, 24 Apr 2015 14:57:26 -0700 (PDT) Subject: Mesa (10.5): Add release notes for the 10.5.4 release Message-ID: <20150424215726.E948076105@kemper.freedesktop.org> Module: Mesa Branch: 10.5 Commit: ea0d1f575c214c09ba3df12644a960e86e031766 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ea0d1f575c214c09ba3df12644a960e86e031766 Author: Emil Velikov Date: Fri Apr 24 22:27:09 2015 +0100 Add release notes for the 10.5.4 release Signed-off-by: Emil Velikov --- docs/relnotes/10.5.4.html | 124 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) diff --git a/docs/relnotes/10.5.4.html b/docs/relnotes/10.5.4.html new file mode 100644 index 0000000..78767cc --- /dev/null +++ b/docs/relnotes/10.5.4.html @@ -0,0 +1,124 @@ + + + + + Mesa Release Notes + + + + +
    +

    The Mesa 3D Graphics Library

    +
    + + +
    + +

    Mesa 10.5.4 Release Notes / April 24, 2015

    + +

    +Mesa 10.5.4 is a bug fix release which fixes bugs found since the 10.5.3 release. +

    +

    +Mesa 10.5.4 implements the OpenGL 3.3 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 3.3. OpenGL +3.3 is only available if requested at context creation +because compatibility contexts are not supported. +

    + + +

    SHA256 checksums

    +
    +TBD
    +
    + + +

    New features

    +

    None

    + +

    Bug fixes

    + +

    This list is likely incomplete.

    + +
      + +
    • Bug 69226 - Cannot enable basic shaders with Second Life aborts attempt
    • + +
    • Bug 71591 - Second Life shaders fail to compile (extension declared in middle of shader)
    • + +
    • Bug 81025 - [IVB/BYT Bisected]Piglit spec_ARB_draw_indirect_arb_draw_indirect-draw-elements-prim-restart-ugly fails
    • + +
    • Bug 89457 - [BSW Bisected]ogles3conform ES3-CTS.gtf.GL3Tests.shadow.shadow_execution_vert fails
    • + +
    • Bug 89957 - vm protection faults in piglit lest: texsubimage cube_map_array pbo
    • + +
    + + +

    Changes

    + +

    Brian Paul (1):

    +
      +
    • glsl: rewrite glsl_type::record_key_hash() to avoid buffer overflow
    • +
    + +

    Dave Airlie (2):

    +
      +
    • st/mesa: convert sub image for cube map arrays to 2d arrays for upload
    • +
    • st/mesa: align cube map arrays layers
    • +
    + +

    Emil Velikov (11):

    +
      +
    • docs: Add 256 sums for the 10.5.3 release
    • +
    • radeonsi: remove unused si_dump_key()
    • +
    • android: use LOCAL_SHARED_LIBRARIES over TARGET_OUT_HEADERS
    • +
    • android: add $(mesa_top)/src include to the whole of mesa
    • +
    • android: egl: add libsync_cflags to the build
    • +
    • android: dri/common: conditionally include drm_cflags/set __NOT_HAVE_DRM_H
    • +
    • android: add HAVE__BUILTIN_* and HAVE_FUNC_ATTRIBUTE_* defines
    • +
    • android: add $(mesa_top)/src/mesa/main to the includes list
    • +
    • android: dri: link against libmesa_util
    • +
    • android: mesa: fix the path of the SSE4_1 optimisations
    • +
    • Update version to 10.5.4
    • +
    + +

    Ian Romanick (1):

    +
      +
    • nir: Fix typo in "ushr by 0" algebraic replacement
    • +
    + +

    Kenneth Graunke (2):

    +
      +
    • i965: Fix software primitive restart with indirect draws.
    • +
    • drirc: Add "Second Life" quirk (allow_glsl_extension_directive_midshader).
    • +
    + +

    Kristian H?gsberg (1):

    +
      +
    • i965: Rewrite ir_tex to ir_txl with lod 0 for vertex shaders
    • +
    + +

    Marek Ol??k (2):

    +
      +
    • glsl_to_tgsi: fix out-of-bounds constant access and crash for uniforms
    • +
    • glsl_to_tgsi: don't use a potentially-undefined immediate for ir_query_levels
    • +
    + +

    Mathias Froehlich (1):

    +
      +
    • i965: Flush batchbuffer containing the query on glQueryCounter.
    • +
    + +

    Mauro Rossi (2):

    +
      +
    • android: mesa: generate the format_{un,}pack.[ch] sources
    • +
    • android: add inital NIR build
    • +
    + + +
    + + From evelikov at kemper.freedesktop.org Fri Apr 24 21:57:26 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Fri, 24 Apr 2015 14:57:26 -0700 (PDT) Subject: Mesa (10.5): docs: Add sha256 sums for the 10.5.4 release Message-ID: <20150424215726.F00AB76104@kemper.freedesktop.org> Module: Mesa Branch: 10.5 Commit: adb47b5b279b6fd920151aa7926af6ffd2069339 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=adb47b5b279b6fd920151aa7926af6ffd2069339 Author: Emil Velikov Date: Fri Apr 24 22:51:25 2015 +0100 docs: Add sha256 sums for the 10.5.4 release Signed-off-by: Emil Velikov --- docs/relnotes/10.5.4.html | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/relnotes/10.5.4.html b/docs/relnotes/10.5.4.html index 78767cc..4c466d0 100644 --- a/docs/relnotes/10.5.4.html +++ b/docs/relnotes/10.5.4.html @@ -31,7 +31,8 @@ because compatibility contexts are not supported.

    SHA256 checksums

    -TBD
    +e1089567fc7bf8d9b2d8badcc9f2fc3b758701c8c0ccfe7af1805549fea53f11  mesa-10.5.4.tar.gz
    +b51e723f3a20d842c88a92d809435b229fc4744ca0dbec0317d9d4a3ac4c6803  mesa-10.5.4.tar.xz
     
    From evelikov at kemper.freedesktop.org Fri Apr 24 21:57:27 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Fri, 24 Apr 2015 14:57:27 -0700 (PDT) Subject: Mesa (master): Add release notes for the 10.5.4 release Message-ID: <20150424215727.0CA8976104@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 5b39cb47366d1cfce206d4644983a4e5b2b76709 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5b39cb47366d1cfce206d4644983a4e5b2b76709 Author: Emil Velikov Date: Fri Apr 24 22:27:09 2015 +0100 Add release notes for the 10.5.4 release Signed-off-by: Emil Velikov (cherry picked from commit ea0d1f575c214c09ba3df12644a960e86e031766) --- docs/relnotes/10.5.4.html | 124 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) diff --git a/docs/relnotes/10.5.4.html b/docs/relnotes/10.5.4.html new file mode 100644 index 0000000..78767cc --- /dev/null +++ b/docs/relnotes/10.5.4.html @@ -0,0 +1,124 @@ + + + + + Mesa Release Notes + + + + +
    +

    The Mesa 3D Graphics Library

    +
    + + +
    + +

    Mesa 10.5.4 Release Notes / April 24, 2015

    + +

    +Mesa 10.5.4 is a bug fix release which fixes bugs found since the 10.5.3 release. +

    +

    +Mesa 10.5.4 implements the OpenGL 3.3 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 3.3. OpenGL +3.3 is only available if requested at context creation +because compatibility contexts are not supported. +

    + + +

    SHA256 checksums

    +
    +TBD
    +
    + + +

    New features

    +

    None

    + +

    Bug fixes

    + +

    This list is likely incomplete.

    + +
      + +
    • Bug 69226 - Cannot enable basic shaders with Second Life aborts attempt
    • + +
    • Bug 71591 - Second Life shaders fail to compile (extension declared in middle of shader)
    • + +
    • Bug 81025 - [IVB/BYT Bisected]Piglit spec_ARB_draw_indirect_arb_draw_indirect-draw-elements-prim-restart-ugly fails
    • + +
    • Bug 89457 - [BSW Bisected]ogles3conform ES3-CTS.gtf.GL3Tests.shadow.shadow_execution_vert fails
    • + +
    • Bug 89957 - vm protection faults in piglit lest: texsubimage cube_map_array pbo
    • + +
    + + +

    Changes

    + +

    Brian Paul (1):

    +
      +
    • glsl: rewrite glsl_type::record_key_hash() to avoid buffer overflow
    • +
    + +

    Dave Airlie (2):

    +
      +
    • st/mesa: convert sub image for cube map arrays to 2d arrays for upload
    • +
    • st/mesa: align cube map arrays layers
    • +
    + +

    Emil Velikov (11):

    +
      +
    • docs: Add 256 sums for the 10.5.3 release
    • +
    • radeonsi: remove unused si_dump_key()
    • +
    • android: use LOCAL_SHARED_LIBRARIES over TARGET_OUT_HEADERS
    • +
    • android: add $(mesa_top)/src include to the whole of mesa
    • +
    • android: egl: add libsync_cflags to the build
    • +
    • android: dri/common: conditionally include drm_cflags/set __NOT_HAVE_DRM_H
    • +
    • android: add HAVE__BUILTIN_* and HAVE_FUNC_ATTRIBUTE_* defines
    • +
    • android: add $(mesa_top)/src/mesa/main to the includes list
    • +
    • android: dri: link against libmesa_util
    • +
    • android: mesa: fix the path of the SSE4_1 optimisations
    • +
    • Update version to 10.5.4
    • +
    + +

    Ian Romanick (1):

    +
      +
    • nir: Fix typo in "ushr by 0" algebraic replacement
    • +
    + +

    Kenneth Graunke (2):

    +
      +
    • i965: Fix software primitive restart with indirect draws.
    • +
    • drirc: Add "Second Life" quirk (allow_glsl_extension_directive_midshader).
    • +
    + +

    Kristian H?gsberg (1):

    +
      +
    • i965: Rewrite ir_tex to ir_txl with lod 0 for vertex shaders
    • +
    + +

    Marek Ol??k (2):

    +
      +
    • glsl_to_tgsi: fix out-of-bounds constant access and crash for uniforms
    • +
    • glsl_to_tgsi: don't use a potentially-undefined immediate for ir_query_levels
    • +
    + +

    Mathias Froehlich (1):

    +
      +
    • i965: Flush batchbuffer containing the query on glQueryCounter.
    • +
    + +

    Mauro Rossi (2):

    +
      +
    • android: mesa: generate the format_{un,}pack.[ch] sources
    • +
    • android: add inital NIR build
    • +
    + + +
    + + From evelikov at kemper.freedesktop.org Fri Apr 24 21:57:27 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Fri, 24 Apr 2015 14:57:27 -0700 (PDT) Subject: Mesa (master): docs: Add sha256 sums for the 10.5.4 release Message-ID: <20150424215727.1355976104@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 196cf8db6571b8cf7e44a4cf9d7e827e130a568d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=196cf8db6571b8cf7e44a4cf9d7e827e130a568d Author: Emil Velikov Date: Fri Apr 24 22:51:25 2015 +0100 docs: Add sha256 sums for the 10.5.4 release Signed-off-by: Emil Velikov (cherry picked from commit adb47b5b279b6fd920151aa7926af6ffd2069339) --- docs/relnotes/10.5.4.html | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/relnotes/10.5.4.html b/docs/relnotes/10.5.4.html index 78767cc..4c466d0 100644 --- a/docs/relnotes/10.5.4.html +++ b/docs/relnotes/10.5.4.html @@ -31,7 +31,8 @@ because compatibility contexts are not supported.

    SHA256 checksums

    -TBD
    +e1089567fc7bf8d9b2d8badcc9f2fc3b758701c8c0ccfe7af1805549fea53f11  mesa-10.5.4.tar.gz
    +b51e723f3a20d842c88a92d809435b229fc4744ca0dbec0317d9d4a3ac4c6803  mesa-10.5.4.tar.xz
     
    From evelikov at kemper.freedesktop.org Fri Apr 24 21:57:27 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Fri, 24 Apr 2015 14:57:27 -0700 (PDT) Subject: Mesa (master): docs: add news item and link release notes for mesa 10.5.4 Message-ID: <20150424215727.19FFA76104@kemper.freedesktop.org> Module: Mesa Branch: master Commit: e17018589651d449bbef5f98a6c2f65cc3ca70e5 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e17018589651d449bbef5f98a6c2f65cc3ca70e5 Author: Emil Velikov Date: Fri Apr 24 22:58:23 2015 +0100 docs: add news item and link release notes for mesa 10.5.4 Signed-off-by: Emil Velikov --- docs/index.html | 6 ++++++ docs/relnotes.html | 1 + 2 files changed, 7 insertions(+) diff --git a/docs/index.html b/docs/index.html index 9e4644c..c642604 100644 --- a/docs/index.html +++ b/docs/index.html @@ -16,6 +16,12 @@

    News

    +

    April 24, 2015

    +

    +Mesa 10.5.4 is released. +This is a bug-fix release. +

    +

    April 12, 2015

    Mesa 10.5.3 is released. diff --git a/docs/relnotes.html b/docs/relnotes.html index 6ec35d1..7f2e1d8 100644 --- a/docs/relnotes.html +++ b/docs/relnotes.html @@ -21,6 +21,7 @@ The release notes summarize what's new or changed in each Mesa release.

      +
    • 10.5.4 release notes
    • 10.5.3 release notes
    • 10.5.2 release notes
    • 10.4.7 release notes From evelikov at kemper.freedesktop.org Fri Apr 24 21:57:27 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Fri, 24 Apr 2015 14:57:27 -0700 (PDT) Subject: Mesa: tag mesa-10.5.4: Mesa 10.5.4 release Message-ID: <20150424215727.285AF76104@kemper.freedesktop.org> Module: Mesa Branch: refs/tags/mesa-10.5.4 Tag: 8beb2f4293cf0aac94c936b6ef39a4e2c6accf86 URL: http://cgit.freedesktop.org/mesa/mesa/tag/?id=8beb2f4293cf0aac94c936b6ef39a4e2c6accf86 Tagger: Emil Velikov Date: Fri Apr 24 22:27:30 2015 +0100 Mesa 10.5.4 release From evelikov at kemper.freedesktop.org Fri Apr 24 21:57:26 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Fri, 24 Apr 2015 14:57:26 -0700 (PDT) Subject: Mesa (10.5): Update version to 10.5.4 Message-ID: <20150424215726.E35AD76104@kemper.freedesktop.org> Module: Mesa Branch: 10.5 Commit: 776e47a305d33ec04a05ac2d81691b884baa9e62 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=776e47a305d33ec04a05ac2d81691b884baa9e62 Author: Emil Velikov Date: Fri Apr 24 22:22:47 2015 +0100 Update version to 10.5.4 Signed-off-by: Emil Velikov --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 1e9c35f..927fa80 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -10.5.3 +10.5.4 From mattst88 at kemper.freedesktop.org Fri Apr 24 23:25:30 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Fri, 24 Apr 2015 16:25:30 -0700 (PDT) Subject: Mesa (master): i965/fs: Add missing pixel_x/y to brw_instruction_name(). Message-ID: <20150424232530.696F576105@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 11d2305d7fe2f8e8cd6a4f2ac5afe79718a4fa10 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=11d2305d7fe2f8e8cd6a4f2ac5afe79718a4fa10 Author: Matt Turner Date: Fri Apr 24 16:23:46 2015 -0700 i965/fs: Add missing pixel_x/y to brw_instruction_name(). Forgotten in commit 529064f6. --- src/mesa/drivers/dri/i965/brw_shader.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 79f0e1c..0e94424 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -524,6 +524,11 @@ brw_instruction_name(enum opcode op) case FS_OPCODE_LINTERP: return "linterp"; + case FS_OPCODE_PIXEL_X: + return "pixel_x"; + case FS_OPCODE_PIXEL_Y: + return "pixel_y"; + case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: return "uniform_pull_const"; case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7: From mattst88 at kemper.freedesktop.org Fri Apr 24 23:25:30 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Fri, 24 Apr 2015 16:25:30 -0700 (PDT) Subject: Mesa (master): i965/fs: Don' t constant propagate into integer math instructions. Message-ID: <20150424232530.632A976104@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 51c61fff8f46472820ac413ad22e9f3edf670396 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=51c61fff8f46472820ac413ad22e9f3edf670396 Author: Matt Turner Date: Fri Apr 24 13:14:56 2015 -0700 i965/fs: Don't constant propagate into integer math instructions. Constant combining won't promote non-floats, so this isn't safe. Fixes regressions since commit 0087cf23e. --- src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp | 2 -- src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp | 6 +++++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp index a5bacf4..aa62031 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp @@ -66,8 +66,6 @@ must_promote_imm(const struct brw_device_info *devinfo, const fs_inst *inst) { switch (inst->opcode) { case SHADER_OPCODE_POW: - case SHADER_OPCODE_INT_QUOTIENT: - case SHADER_OPCODE_INT_REMAINDER: return devinfo->gen < 8; case BRW_OPCODE_MAD: case BRW_OPCODE_LRP: diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index af54deb..9542d6a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -499,9 +499,13 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry) progress = true; break; - case SHADER_OPCODE_POW: case SHADER_OPCODE_INT_QUOTIENT: case SHADER_OPCODE_INT_REMAINDER: + /* FINISHME: Promote non-float constants and remove this. */ + if (devinfo->gen < 8) + break; + /* fallthrough */ + case SHADER_OPCODE_POW: /* Allow constant propagation into src1 regardless of generation, and * let constant combining promote the constant on Gen < 8. */ From imirkin at kemper.freedesktop.org Sat Apr 25 01:56:25 2015 From: imirkin at kemper.freedesktop.org (Ilia Mirkin) Date: Fri, 24 Apr 2015 18:56:25 -0700 (PDT) Subject: Mesa (master): mesa: add support for exposing up to GL4.2 Message-ID: <20150425015625.B4DA776104@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 67ba388dc06456409762e66ed7ccdffebf956459 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=67ba388dc06456409762e66ed7ccdffebf956459 Author: Ilia Mirkin Date: Thu Apr 23 10:48:47 2015 -0400 mesa: add support for exposing up to GL4.2 Add the 4.0/4.1/4.2 extensions lists to compute_version. A couple of extensions aren't in mesa yet, so those are marked with 0 until they become supported. Signed-off-by: Ilia Mirkin Reviewed-by: Matt Turner --- src/mesa/main/version.c | 46 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/src/mesa/main/version.c b/src/mesa/main/version.c index 7c6d994..a65ace0 100644 --- a/src/mesa/main/version.c +++ b/src/mesa/main/version.c @@ -295,7 +295,51 @@ compute_version(const struct gl_extensions *extensions, extensions->EXT_texture_swizzle); /* ARB_sampler_objects is always enabled in mesa */ - if (ver_3_3) { + const GLboolean ver_4_0 = (ver_3_3 && + consts->GLSLVersion >= 400 && + extensions->ARB_draw_buffers_blend && + extensions->ARB_draw_indirect && + extensions->ARB_gpu_shader5 && + extensions->ARB_gpu_shader_fp64 && + extensions->ARB_sample_shading && + 0/*extensions->ARB_shader_subroutine*/ && + extensions->ARB_tessellation_shader && + extensions->ARB_texture_buffer_object_rgb32 && + extensions->ARB_texture_cube_map_array && + extensions->ARB_texture_query_lod && + extensions->ARB_transform_feedback2 && + extensions->ARB_transform_feedback3); + const GLboolean ver_4_1 = (ver_4_0 && + consts->GLSLVersion >= 410 && + extensions->ARB_ES2_compatibility && + extensions->ARB_shader_precision && + 0/*extensions->ARB_vertex_attrib_64bit*/ && + extensions->ARB_viewport_array); + const GLboolean ver_4_2 = (ver_4_1 && + consts->GLSLVersion >= 420 && + extensions->ARB_base_instance && + extensions->ARB_conservative_depth && + extensions->ARB_internalformat_query && + extensions->ARB_shader_atomic_counters && + extensions->ARB_shader_image_load_store && + extensions->ARB_shading_language_420pack && + extensions->ARB_shading_language_packing && + extensions->ARB_texture_compression_bptc && + extensions->ARB_transform_feedback_instanced); + + if (ver_4_2) { + major = 4; + minor = 2; + } + else if (ver_4_1) { + major = 4; + minor = 1; + } + else if (ver_4_0) { + major = 4; + minor = 0; + } + else if (ver_3_3) { major = 3; minor = 3; } From mattst88 at kemper.freedesktop.org Sat Apr 25 09:16:19 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Sat, 25 Apr 2015 02:16:19 -0700 (PDT) Subject: Mesa (master): i965/fs: Disallow constant propagation into POW on Gen 6. Message-ID: <20150425091619.372C776104@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 7f5a8ac155283e78df2da5b172a65361a80d38b6 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7f5a8ac155283e78df2da5b172a65361a80d38b6 Author: Matt Turner Date: Sat Apr 25 01:50:04 2015 -0700 i965/fs: Disallow constant propagation into POW on Gen 6. Fixes assertion failures in three piglit tests on Gen 6 since commit 0087cf23e. --- src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index 9542d6a..c9ce2bd 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -506,9 +506,15 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry) break; /* fallthrough */ case SHADER_OPCODE_POW: - /* Allow constant propagation into src1 regardless of generation, and - * let constant combining promote the constant on Gen < 8. + /* Allow constant propagation into src1 (except on Gen 6), and let + * constant combining promote the constant on Gen < 8. + * + * While Gen 6 MATH can take a scalar source, its source and + * destination offsets must be equal and we cannot ensure that. */ + if (devinfo->gen == 6) + break; + /* fallthrough */ case BRW_OPCODE_BFI1: case BRW_OPCODE_ASR: case BRW_OPCODE_SHL: From kwg at kemper.freedesktop.org Sat Apr 25 17:47:04 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Sat, 25 Apr 2015 10:47:04 -0700 (PDT) Subject: Mesa (master): i965: Disassemble sampler message names on Gen5+. Message-ID: <20150425174704.7D72676105@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 19165e3b6eff3a33379af127d27c6585ffbd1028 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=19165e3b6eff3a33379af127d27c6585ffbd1028 Author: Kenneth Graunke Date: Thu Apr 23 22:56:25 2015 -0700 i965: Disassemble sampler message names on Gen5+. Previously, sampler messages were decoded as sampler (1, 0, 2, 2) mlen 6 rlen 8 { align1 1H }; I don't know how much time we've collectly wasted trying to read this format. I can never recall which number is the surface index, sampler index, message type, or...whatever that other number is. Figuring out the message name from the numerical code is also painful. Now they decode as: sampler sample_l SIMD16 Surface = 1 Sampler = 0 mlen 6 rlen 8 { align1 1H }; This is easy to read at a glance, and matches the format I used for render target formats. Signed-off-by: Kenneth Graunke Reviewed-by: Matt Turner Reviewed-by: Chris Forbes --- src/mesa/drivers/dri/i965/brw_disasm.c | 38 ++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c index d1078c0..95e262a 100644 --- a/src/mesa/drivers/dri/i965/brw_disasm.c +++ b/src/mesa/drivers/dri/i965/brw_disasm.c @@ -579,6 +579,34 @@ static const char *const urb_complete[2] = { [1] = "complete" }; +static const char *const gen5_sampler_msg_type[] = { + [GEN5_SAMPLER_MESSAGE_SAMPLE] = "sample", + [GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS] = "sample_b", + [GEN5_SAMPLER_MESSAGE_SAMPLE_LOD] = "sample_l", + [GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE] = "sample_c", + [GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS] = "sample_d", + [GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE] = "sample_b_c", + [GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE] = "sample_l_c", + [GEN5_SAMPLER_MESSAGE_SAMPLE_LD] = "ld", + [GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4] = "gather4", + [GEN5_SAMPLER_MESSAGE_LOD] = "lod", + [GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO] = "resinfo", + [GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C] = "gather4_c", + [GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO] = "gather4_po", + [GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C] = "gather4_po_c", + [HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE] = "sample_d_c", + [GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS] = "ld_mcs", + [GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS] = "ld2dms", + [GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS] = "ld2dss", +}; + +static const char *const gen5_sampler_simd_mode[4] = { + [BRW_SAMPLER_SIMD_MODE_SIMD4X2] = "SIMD4x2", + [BRW_SAMPLER_SIMD_MODE_SIMD8] = "SIMD8", + [BRW_SAMPLER_SIMD_MODE_SIMD16] = "SIMD16", + [BRW_SAMPLER_SIMD_MODE_SIMD32_64] = "SIMD32/64", +}; + static const char *const sampler_target_format[4] = { [0] = "F", [2] = "UD", @@ -1374,11 +1402,13 @@ brw_disassemble_inst(FILE *file, const struct brw_device_info *devinfo, break; case BRW_SFID_SAMPLER: if (devinfo->gen >= 5) { - format(file, " (%ld, %ld, %ld, %ld)", + err |= control(file, "sampler message", gen5_sampler_msg_type, + brw_inst_sampler_msg_type(devinfo, inst), &space); + err |= control(file, "sampler simd mode", gen5_sampler_simd_mode, + brw_inst_sampler_simd_mode(devinfo, inst), &space); + format(file, " Surface = %ld Sampler = %ld", brw_inst_binding_table_index(devinfo, inst), - brw_inst_sampler(devinfo, inst), - brw_inst_sampler_msg_type(devinfo, inst), - brw_inst_sampler_simd_mode(devinfo, inst)); + brw_inst_sampler(devinfo, inst)); } else { format(file, " (%ld, %ld, %ld, ", brw_inst_binding_table_index(devinfo, inst), From kwg at kemper.freedesktop.org Sat Apr 25 17:47:04 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Sat, 25 Apr 2015 10:47:04 -0700 (PDT) Subject: Mesa (master): i965: Fill out the rest of brw_debug_recompile_sampler_key( ). Message-ID: <20150425174704.859C8761C1@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 30c8d8a831edcdbac0bbaccab18cf3b53dbd08c1 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=30c8d8a831edcdbac0bbaccab18cf3b53dbd08c1 Author: Kenneth Graunke Date: Thu Apr 23 23:17:10 2015 -0700 i965: Fill out the rest of brw_debug_recompile_sampler_key(). This makes INTEL_DEBUG=perf report shader recompiles due to CMS vs. UMS/IMS differences and Sandybridge textureGather workarounds. Previously, we just flagged them as "Something else". Signed-off-by: Kenneth Graunke Reviewed-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_wm.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 959f346..45a03bb 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -266,6 +266,14 @@ brw_debug_recompile_sampler_key(struct brw_context *brw, old_key->gl_clamp_mask[2], key->gl_clamp_mask[2]); found |= key_debug(brw, "gather channel quirk on any texture unit", old_key->gather_channel_quirk_mask, key->gather_channel_quirk_mask); + found |= key_debug(brw, "compressed multisample layout", + old_key->compressed_multisample_layout_mask, + key->compressed_multisample_layout_mask); + + for (unsigned int i = 0; i < MAX_SAMPLERS; i++) { + found |= key_debug(brw, "textureGather workarounds", + old_key->gen6_gather_wa[i], key->gen6_gather_wa[i]); + } return found; } From mareko at kemper.freedesktop.org Sat Apr 25 22:55:11 2015 From: mareko at kemper.freedesktop.org (Marek Olšák) Date: Sat, 25 Apr 2015 15:55:11 -0700 (PDT) Subject: Mesa (master): gallium/hud: add more options to customize HUD panes Message-ID: <20150425225511.46A7676105@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 6fc0cd2f52ddc4a1e7026c9c46fd6da3968fa439 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6fc0cd2f52ddc4a1e7026c9c46fd6da3968fa439 Author: Gediminas Jakutis Date: Sun Apr 12 02:58:33 2015 +0300 gallium/hud: add more options to customize HUD panes Extends the syntax of GALLIUM_HUD environment variable to: - Add options to set the size and exact location of each pane. - Add an option to limit the maximum allowed value of the X axis on a pane, clamping the graph down to not go above this value. - Add an option to auto-adjust the value of the Y axis down to the highest value still visible on the graph. v2: - Make the patch simpler and smaller. - With dynamic auto-adjusting on, adjust the Y axis once per pane update instead of updating once every several seconds. - No longer mishandle pane height when having more than one graph per pane. --- src/gallium/auxiliary/hud/hud_context.c | 163 +++++++++++++++++++++++++++++-- src/gallium/auxiliary/hud/hud_private.h | 4 + 2 files changed, 161 insertions(+), 6 deletions(-) diff --git a/src/gallium/auxiliary/hud/hud_context.c b/src/gallium/auxiliary/hud/hud_context.c index e46c68c..00ec205 100644 --- a/src/gallium/auxiliary/hud/hud_context.c +++ b/src/gallium/auxiliary/hud/hud_context.c @@ -569,9 +569,36 @@ hud_pane_set_max_value(struct hud_pane *pane, uint64_t value) pane->yscale = -(int)pane->inner_height / (float)pane->max_value; } +static void +hud_pane_update_dyn_ceiling(struct hud_graph *gr, struct hud_pane *pane) +{ + unsigned i; + float tmp = 0.0f; + + if (pane->dyn_ceil_last_ran != gr->index) { + LIST_FOR_EACH_ENTRY(gr, &pane->graph_list, head) { + for (i = 0; i < gr->num_vertices; ++i) { + tmp = gr->vertices[i * 2 + 1] > tmp ? + gr->vertices[i * 2 + 1] : tmp; + } + } + + /* Avoid setting it lower than the initial starting height. */ + tmp = tmp > pane->initial_max_value ? tmp : pane->initial_max_value; + hud_pane_set_max_value(pane, tmp); + } + + /* + * Mark this adjustment run so we could avoid repeating a full update + * again needlessly in case the pane has more than one graph. + */ + pane->dyn_ceil_last_ran = gr->index; +} + static struct hud_pane * hud_pane_create(unsigned x1, unsigned y1, unsigned x2, unsigned y2, - unsigned period, uint64_t max_value) + unsigned period, uint64_t max_value, uint64_t ceiling, + boolean dyn_ceiling) { struct hud_pane *pane = CALLOC_STRUCT(hud_pane); @@ -590,6 +617,10 @@ hud_pane_create(unsigned x1, unsigned y1, unsigned x2, unsigned y2, pane->inner_height = pane->inner_y2 - pane->inner_y1; pane->period = period; pane->max_num_vertices = (x2 - x1 + 2) / 2; + pane->ceiling = ceiling; + pane->dyn_ceiling = dyn_ceiling; + pane->dyn_ceil_last_ran = 0; + pane->initial_max_value = max_value; hud_pane_set_max_value(pane, max_value); LIST_INITHEAD(&pane->graph_list); return pane; @@ -633,6 +664,9 @@ hud_pane_add_graph(struct hud_pane *pane, struct hud_graph *gr) void hud_graph_add_value(struct hud_graph *gr, uint64_t value) { + gr->current_value = value; + value = value > gr->pane->ceiling ? gr->pane->ceiling : value; + if (gr->index == gr->pane->max_num_vertices) { gr->vertices[0] = 0; gr->vertices[1] = gr->vertices[(gr->index-1)*2+1]; @@ -646,7 +680,9 @@ hud_graph_add_value(struct hud_graph *gr, uint64_t value) gr->num_vertices++; } - gr->current_value = value; + if (gr->pane->dyn_ceiling == true) { + hud_pane_update_dyn_ceiling(gr, gr->pane); + } if (value > gr->pane->max_value) { hud_pane_set_max_value(gr->pane, value); } @@ -683,6 +719,69 @@ parse_string(const char *s, char *out) return i; } +static char * +read_pane_settings(char *str, unsigned * const x, unsigned * const y, + unsigned * const width, unsigned * const height, + uint64_t * const ceiling, boolean * const dyn_ceiling) +{ + char *ret = str; + unsigned tmp; + + while (*str == '.') { + ++str; + switch (*str) { + case 'x': + ++str; + *x = strtoul(str, &ret, 10); + str = ret; + break; + + case 'y': + ++str; + *y = strtoul(str, &ret, 10); + str = ret; + break; + + case 'w': + ++str; + tmp = strtoul(str, &ret, 10); + *width = tmp > 80 ? tmp : 80; /* 80 is chosen arbitrarily */ + str = ret; + break; + + /* + * Prevent setting height to less than 50. If the height is set to less, + * the text of the Y axis labels on the graph will start overlapping. + */ + case 'h': + ++str; + tmp = strtoul(str, &ret, 10); + *height = tmp > 50 ? tmp : 50; + str = ret; + break; + + case 'c': + ++str; + tmp = strtoul(str, &ret, 10); + *ceiling = tmp > 10 ? tmp : 10; + str = ret; + break; + + case 'd': + ++str; + ret = str; + *dyn_ceiling = true; + break; + + default: + fprintf(stderr, "gallium_hud: syntax error: unexpected '%c'\n", *str); + } + + } + + return ret; +} + static boolean has_occlusion_query(struct pipe_screen *screen) { @@ -705,11 +804,15 @@ static void hud_parse_env_var(struct hud_context *hud, const char *env) { unsigned num, i; - char name[256], s[256]; + char name_a[256], s[256]; + char *name; struct hud_pane *pane = NULL; unsigned x = 10, y = 10; unsigned width = 251, height = 100; unsigned period = 500 * 1000; /* default period (1/2 second) */ + uint64_t ceiling = UINT64_MAX; + unsigned column_width = 251; + boolean dyn_ceiling = false; const char *period_env; /* @@ -725,11 +828,23 @@ hud_parse_env_var(struct hud_context *hud, const char *env) } } - while ((num = parse_string(env, name)) != 0) { + while ((num = parse_string(env, name_a)) != 0) { env += num; + /* check for explicit location, size and etc. settings */ + name = read_pane_settings(name_a, &x, &y, &width, &height, &ceiling, + &dyn_ceiling); + + /* + * Keep track of overall column width to avoid pane overlapping in case + * later we create a new column while the bottom pane in the current + * column is less wide than the rest of the panes in it. + */ + column_width = width > column_width ? width : column_width; + if (!pane) { - pane = hud_pane_create(x, y, x + width, y + height, period, 10); + pane = hud_pane_create(x, y, x + width, y + height, period, 10, + ceiling, dyn_ceiling); if (!pane) return; } @@ -807,6 +922,7 @@ hud_parse_env_var(struct hud_context *hud, const char *env) if (num && sscanf(s, "%u", &i) == 1) { hud_pane_set_max_value(pane, i); + pane->initial_max_value = i; } else { fprintf(stderr, "gallium_hud: syntax error: unexpected '%c' (%i) " @@ -826,6 +942,7 @@ hud_parse_env_var(struct hud_context *hud, const char *env) case ',': env++; y += height + hud->font.glyph_height * (pane->num_graphs + 2); + height = 100; if (pane && pane->num_graphs) { LIST_ADDTAIL(&pane->head, &hud->pane_list); @@ -836,17 +953,27 @@ hud_parse_env_var(struct hud_context *hud, const char *env) case ';': env++; y = 10; - x += width + hud->font.glyph_width * 7; + x += column_width + hud->font.glyph_width * 7; + height = 100; if (pane && pane->num_graphs) { LIST_ADDTAIL(&pane->head, &hud->pane_list); pane = NULL; } + + /* Starting a new column; reset column width. */ + column_width = 251; break; default: fprintf(stderr, "gallium_hud: syntax error: unexpected '%c'\n", *env); } + + /* Reset to defaults for the next pane in case these were modified. */ + width = 251; + ceiling = UINT64_MAX; + dyn_ceiling = false; + } if (pane) { @@ -878,6 +1005,30 @@ print_help(struct pipe_screen *screen) puts(""); puts(" Example: GALLIUM_HUD=\"cpu,fps;primitives-generated\""); puts(""); + puts(" Additionally, by prepending '.[identifier][value]' modifiers to"); + puts(" a name, it is possible to explicitly set the location and size"); + puts(" of a pane, along with limiting overall maximum value of the"); + puts(" Y axis and activating dynamic readjustment of the Y axis."); + puts(" Several modifiers may be applied to the same pane simultaneously."); + puts(""); + puts(" 'x[value]' sets the location of the pane on the x axis relative"); + puts(" to the upper-left corner of the viewport, in pixels."); + puts(" 'y[value]' sets the location of the pane on the y axis relative"); + puts(" to the upper-left corner of the viewport, in pixels."); + puts(" 'w[value]' sets width of the graph pixels."); + puts(" 'h[value]' sets height of the graph in pixels."); + puts(" 'c[value]' sets the ceiling of the value of the Y axis."); + puts(" If the graph needs to draw values higher than"); + puts(" the ceiling allows, the value is clamped."); + puts(" 'd' activates dynamic Y axis readjustment to set the value of"); + puts(" the Y axis to match the highest value still visible in the graph."); + puts(""); + puts(" If 'c' and 'd' modifiers are used simultaneously, both are in effect:"); + puts(" the Y axis does not go above the restriction imposed by 'c' while"); + puts(" still adjusting the value of the Y axis down when appropriate."); + puts(""); + puts(" Example: GALLIUM_HUD=\".w256.h64.x1600.y520.d.c1000fps+cpu,.datom-count\""); + puts(""); puts(" Available names:"); puts(" fps"); puts(" cpu"); diff --git a/src/gallium/auxiliary/hud/hud_private.h b/src/gallium/auxiliary/hud/hud_private.h index 1606ada..230f026 100644 --- a/src/gallium/auxiliary/hud/hud_private.h +++ b/src/gallium/auxiliary/hud/hud_private.h @@ -62,6 +62,10 @@ struct hud_pane { float yscale; unsigned max_num_vertices; uint64_t max_value; + uint64_t initial_max_value; + uint64_t ceiling; + unsigned dyn_ceil_last_ran; + boolean dyn_ceiling; boolean uses_byte_units; uint64_t period; /* in microseconds */ From mperes at kemper.freedesktop.org Mon Apr 27 07:15:12 2015 From: mperes at kemper.freedesktop.org (Martin Peres) Date: Mon, 27 Apr 2015 00:15:12 -0700 (PDT) Subject: Mesa (master): docs/GL3: started adding support for shader_image_size Message-ID: <20150427071512.67F2D761C1@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 9ea38ee96d483a1bc5b525f5452216f454f450ab URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=9ea38ee96d483a1bc5b525f5452216f454f450ab Author: Martin Peres Date: Mon Apr 27 10:13:49 2015 +0300 docs/GL3: started adding support for shader_image_size Signed-off-by: Martin Peres --- docs/GL3.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/GL3.txt b/docs/GL3.txt index 172fd3c..617ff34 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -163,7 +163,7 @@ GL 4.3, GLSL 4.30: GL_ARB_multi_draw_indirect DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe) GL_ARB_program_interface_query DONE (all drivers) GL_ARB_robust_buffer_access_behavior not started - GL_ARB_shader_image_size not started + GL_ARB_shader_image_size in progress (Martin Peres) GL_ARB_shader_storage_buffer_object not started GL_ARB_stencil_texturing DONE (i965/gen8+, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe) GL_ARB_texture_buffer_range DONE (nv50, nvc0, i965, r600, radeonsi, llvmpipe) @@ -220,7 +220,7 @@ GLES3.1, GLSL ES 3.1 GL_ARB_program_interface_query DONE (all drivers) GL_ARB_shader_atomic_counters DONE (i965) GL_ARB_shader_image_load_store in progress (curro) - GL_ARB_shader_image_size not started + GL_ARB_shader_image_size in progress (Martin Peres) GL_ARB_shader_storage_buffer_object not started GL_ARB_shading_language_packing DONE (all drivers) GL_ARB_separate_shader_objects DONE (all drivers) From tarceri at kemper.freedesktop.org Mon Apr 27 11:39:53 2015 From: tarceri at kemper.freedesktop.org (Timothy Arceri) Date: Mon, 27 Apr 2015 04:39:53 -0700 (PDT) Subject: Mesa (master): glsl: add arrays of arrays support to without_array function Message-ID: <20150427113953.338D9761C1@kemper.freedesktop.org> Module: Mesa Branch: master Commit: fda5f7bb2f23b4adb2f2684162dfb36fe0cb8641 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=fda5f7bb2f23b4adb2f2684162dfb36fe0cb8641 Author: Timothy Arceri Date: Mon Aug 18 21:40:50 2014 -1000 glsl: add arrays of arrays support to without_array function Signed-off-by: Timothy Arceri Reviewed-by: Ilia Mirkin --- src/glsl/glsl_types.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/glsl/glsl_types.h b/src/glsl/glsl_types.h index d383dd5..5645dcd 100644 --- a/src/glsl/glsl_types.h +++ b/src/glsl/glsl_types.h @@ -540,7 +540,12 @@ struct glsl_type { */ const glsl_type *without_array() const { - return this->is_array() ? this->fields.array : this; + const glsl_type *t = this; + + while (t->is_array()) + t = t->fields.array; + + return t; } /** From tarceri at kemper.freedesktop.org Mon Apr 27 11:39:53 2015 From: tarceri at kemper.freedesktop.org (Timothy Arceri) Date: Mon, 27 Apr 2015 04:39:53 -0700 (PDT) Subject: Mesa (master): glsl: support packing of arrays of arrays Message-ID: <20150427113953.3BDFB761E7@kemper.freedesktop.org> Module: Mesa Branch: master Commit: f00c5f85b82efe9535b18dbf97c4591fb28aeae6 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f00c5f85b82efe9535b18dbf97c4591fb28aeae6 Author: Timothy Arceri Date: Fri Feb 27 22:43:39 2015 +1100 glsl: support packing of arrays of arrays Reviewed-by: Ilia Mirkin --- src/glsl/lower_packed_varyings.cpp | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/glsl/lower_packed_varyings.cpp b/src/glsl/lower_packed_varyings.cpp index 2c9a1c4..f8e79bd 100644 --- a/src/glsl/lower_packed_varyings.cpp +++ b/src/glsl/lower_packed_varyings.cpp @@ -645,14 +645,8 @@ lower_packed_varyings_visitor::needs_lowering(ir_variable *var) if (var->data.explicit_location) return false; - const glsl_type *type = var->type; - if (this->gs_input_vertices != 0) { - assert(type->is_array()); - type = type->element_type(); - } - if (type->is_array()) - type = type->fields.array; - if (type->vector_elements == 4 && !type->is_double()) + if (var->type->without_array()->vector_elements == 4 && + !var->type->is_double()) return false; return true; } From tarceri at kemper.freedesktop.org Mon Apr 27 11:39:53 2015 From: tarceri at kemper.freedesktop.org (Timothy Arceri) Date: Mon, 27 Apr 2015 04:39:53 -0700 (PDT) Subject: Mesa (master): glsl: replace while loop with without_array function Message-ID: <20150427113953.492ED761C1@kemper.freedesktop.org> Module: Mesa Branch: master Commit: ca9e280d897a307c485ef4dc7163e1f6900dc768 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ca9e280d897a307c485ef4dc7163e1f6900dc768 Author: Timothy Arceri Date: Mon Aug 18 21:46:44 2014 -1000 glsl: replace while loop with without_array function Signed-off-by: Timothy Arceri Reviewed-by: Ilia Mirkin --- src/glsl/ast_to_hir.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index 7836936..18b82e3 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -3532,9 +3532,7 @@ ast_declarator_list::hir(exec_list *instructions, * vectors. Vertex shader inputs cannot be arrays or * structures." */ - const glsl_type *check_type = var->type; - while (check_type->is_array()) - check_type = check_type->element_type(); + const glsl_type *check_type = var->type->without_array(); switch (check_type->base_type) { case GLSL_TYPE_FLOAT: From mareko at kemper.freedesktop.org Mon Apr 27 14:02:31 2015 From: mareko at kemper.freedesktop.org (Marek Olšák) Date: Mon, 27 Apr 2015 07:02:31 -0700 (PDT) Subject: Mesa (master): winsys/radeon: make radeon_bo_vtbl static Message-ID: <20150427140231.DA65E761E7@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 96bbdc5188faf4c3b1df6d3a65aeb0daff61e384 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=96bbdc5188faf4c3b1df6d3a65aeb0daff61e384 Author: Marek Ol??k Date: Thu Apr 16 19:09:57 2015 +0200 winsys/radeon: make radeon_bo_vtbl static Reviewed-by: Michel D?nzer --- src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c index e609d68..061c814 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c @@ -42,7 +42,7 @@ #include #include -extern const struct pb_vtbl radeon_bo_vtbl; +static const struct pb_vtbl radeon_bo_vtbl; static INLINE struct radeon_bo *radeon_bo(struct pb_buffer *bo) { @@ -471,7 +471,7 @@ static void radeon_bo_fence(struct pb_buffer *buf, { } -const struct pb_vtbl radeon_bo_vtbl = { +static const struct pb_vtbl radeon_bo_vtbl = { radeon_bo_destroy, NULL, /* never called */ NULL, /* never called */ From mareko at kemper.freedesktop.org Mon Apr 27 14:02:31 2015 From: mareko at kemper.freedesktop.org (Marek Olšák) Date: Mon, 27 Apr 2015 07:02:31 -0700 (PDT) Subject: Mesa (master): gallium/radeon: print winsys info with R600_DEBUG=info Message-ID: <20150427140231.E267A761E7@kemper.freedesktop.org> Module: Mesa Branch: master Commit: d8269be1ceed29d2e00829e1fd4a7bbac389d967 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d8269be1ceed29d2e00829e1fd4a7bbac389d967 Author: Marek Ol??k Date: Thu Apr 16 20:15:16 2015 +0200 gallium/radeon: print winsys info with R600_DEBUG=info Reviewed-by: Michel D?nzer --- src/gallium/drivers/radeon/r600_pipe_common.c | 27 +++++++++++++++++++++++++ src/gallium/drivers/radeon/r600_pipe_common.h | 1 + 2 files changed, 28 insertions(+) diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index 2b27e0a..c6d7918 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -306,6 +306,7 @@ static const struct debug_named_value common_debug_options[] = { { "compute", DBG_COMPUTE, "Print compute info" }, { "vm", DBG_VM, "Print virtual addresses when creating resources" }, { "trace_cs", DBG_TRACE_CS, "Trace cs and write rlockup_.c file with faulty cs" }, + { "info", DBG_INFO, "Print driver information" }, /* shaders */ { "fs", DBG_FS, "Print fetch shaders" }, @@ -874,6 +875,32 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen, } } + if (rscreen->debug_flags & DBG_INFO) { + printf("pci_id = 0x%x\n", rscreen->info.pci_id); + printf("family = %i\n", rscreen->info.family); + printf("chip_class = %i\n", rscreen->info.chip_class); + printf("gart_size = %i MB\n", (int)(rscreen->info.gart_size >> 20)); + printf("vram_size = %i MB\n", (int)(rscreen->info.vram_size >> 20)); + printf("max_sclk = %i\n", rscreen->info.max_sclk); + printf("max_compute_units = %i\n", rscreen->info.max_compute_units); + printf("max_se = %i\n", rscreen->info.max_se); + printf("max_sh_per_se = %i\n", rscreen->info.max_sh_per_se); + printf("drm = %i.%i.%i\n", rscreen->info.drm_major, + rscreen->info.drm_minor, rscreen->info.drm_patchlevel); + printf("has_uvd = %i\n", rscreen->info.has_uvd); + printf("vce_fw_version = %i\n", rscreen->info.vce_fw_version); + printf("r600_num_backends = %i\n", rscreen->info.r600_num_backends); + printf("r600_clock_crystal_freq = %i\n", rscreen->info.r600_clock_crystal_freq); + printf("r600_tiling_config = 0x%x\n", rscreen->info.r600_tiling_config); + printf("r600_num_tile_pipes = %i\n", rscreen->info.r600_num_tile_pipes); + printf("r600_max_pipes = %i\n", rscreen->info.r600_max_pipes); + printf("r600_virtual_address = %i\n", rscreen->info.r600_virtual_address); + printf("r600_has_dma = %i\n", rscreen->info.r600_has_dma); + printf("r600_backend_map = %i\n", rscreen->info.r600_backend_map); + printf("r600_backend_map_valid = %i\n", rscreen->info.r600_backend_map_valid); + printf("si_tile_mode_array_valid = %i\n", rscreen->info.si_tile_mode_array_valid); + printf("cik_macrotile_mode_array_valid = %i\n", rscreen->info.cik_macrotile_mode_array_valid); + } return true; } diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index febd2a1..384a9a6 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -84,6 +84,7 @@ #define DBG_SWITCH_ON_EOP (1 << 15) #define DBG_FORCE_DMA (1 << 16) #define DBG_PRECOMPILE (1 << 17) +#define DBG_INFO (1 << 18) /* The maximum allowed bit is 20. */ #define R600_MAP_BUFFER_ALIGNMENT 64 From mareko at kemper.freedesktop.org Mon Apr 27 14:02:31 2015 From: mareko at kemper.freedesktop.org (Marek Olšák) Date: Mon, 27 Apr 2015 07:02:31 -0700 (PDT) Subject: Mesa (master): radeonsi: remove deprecated and useless registers Message-ID: <20150427140231.F1421761E7@kemper.freedesktop.org> Module: Mesa Branch: master Commit: bed98eef9a29af39520002e4ac1525ae0e3859cd URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=bed98eef9a29af39520002e4ac1525ae0e3859cd Author: Marek Ol??k Date: Thu Apr 16 20:37:45 2015 +0200 radeonsi: remove deprecated and useless registers Reviewed-by: Michel D?nzer --- src/gallium/drivers/radeonsi/si_state.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 4bb6f2b..f24cbbd 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -3035,18 +3035,8 @@ void si_init_config(struct si_context *sctx) si_cmd_context_control(pm4); - si_pm4_set_reg(pm4, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0); - si_pm4_set_reg(pm4, R_028A14_VGT_HOS_CNTL, 0x0); si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0); si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0); - si_pm4_set_reg(pm4, R_028A20_VGT_HOS_REUSE_DEPTH, 0x0); - si_pm4_set_reg(pm4, R_028A24_VGT_GROUP_PRIM_TYPE, 0x0); - si_pm4_set_reg(pm4, R_028A28_VGT_GROUP_FIRST_DECR, 0x0); - si_pm4_set_reg(pm4, R_028A2C_VGT_GROUP_DECR, 0x0); - si_pm4_set_reg(pm4, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x0); - si_pm4_set_reg(pm4, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x0); - si_pm4_set_reg(pm4, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x0); - si_pm4_set_reg(pm4, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x0); /* FIXME calculate these values somehow ??? */ si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, 0x80); From mareko at kemper.freedesktop.org Mon Apr 27 14:02:31 2015 From: mareko at kemper.freedesktop.org (Marek Olšák) Date: Mon, 27 Apr 2015 07:02:31 -0700 (PDT) Subject: Mesa (master): radeonsi: remove useless includes Message-ID: <20150427140231.EA0C6761E7@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 393b0e05319389ded24b40b81617cbe7e25ab678 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=393b0e05319389ded24b40b81617cbe7e25ab678 Author: Marek Ol??k Date: Thu Apr 16 20:16:35 2015 +0200 radeonsi: remove useless includes Reviewed-by: Michel D?nzer --- src/gallium/drivers/radeonsi/si_pipe.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 5dc657c..e68c30e 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -30,9 +30,6 @@ #include "util/u_memory.h" #include "vl/vl_decoder.h" -#include -#include - /* * pipe_context */ From mareko at kemper.freedesktop.org Mon Apr 27 14:02:32 2015 From: mareko at kemper.freedesktop.org (Marek Olšák) Date: Mon, 27 Apr 2015 07:02:32 -0700 (PDT) Subject: Mesa (master): radeonsi: set an optimal value for DB_Z_INFO.ZRANGE_PRECISION Message-ID: <20150427140232.08137761E7@kemper.freedesktop.org> Module: Mesa Branch: master Commit: db2415189a04fd64106a739c635b1433192ef969 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=db2415189a04fd64106a739c635b1433192ef969 Author: Marek Ol??k Date: Thu Apr 16 20:40:31 2015 +0200 radeonsi: set an optimal value for DB_Z_INFO.ZRANGE_PRECISION Required because of a VI hw bug. Reviewed-by: Michel D?nzer --- src/gallium/drivers/radeonsi/si_state.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index f24cbbd..dc19d29 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -1948,12 +1948,6 @@ static void si_init_depth_surface(struct si_context *sctx, z_info |= S_028040_TILE_SURFACE_ENABLE(1) | S_028040_ALLOW_EXPCLEAR(1); - /* This is optimal for the clear value of 1.0 and using - * the LESS and LEQUAL test functions. Set this to 0 - * for the opposite case. This can only be changed when - * clearing. */ - z_info |= S_028040_ZRANGE_PRECISION(1); - /* Use all of the htile_buffer for depth, because we don't * use HTILE for stencil because of FAST_STENCIL_DISABLE. */ s_info |= S_028044_TILE_STENCIL_DISABLE(1); @@ -2183,7 +2177,8 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom r600_write_context_reg_seq(cs, R_02803C_DB_DEPTH_INFO, 9); radeon_emit(cs, zb->db_depth_info); /* R_02803C_DB_DEPTH_INFO */ - radeon_emit(cs, zb->db_z_info); /* R_028040_DB_Z_INFO */ + radeon_emit(cs, zb->db_z_info | /* R_028040_DB_Z_INFO */ + S_028040_ZRANGE_PRECISION(rtex->depth_clear_value != 0)); radeon_emit(cs, zb->db_stencil_info); /* R_028044_DB_STENCIL_INFO */ radeon_emit(cs, zb->db_depth_base); /* R_028048_DB_Z_READ_BASE */ radeon_emit(cs, zb->db_stencil_base); /* R_02804C_DB_STENCIL_READ_BASE */ From jrfonseca at kemper.freedesktop.org Mon Apr 27 14:21:21 2015 From: jrfonseca at kemper.freedesktop.org (Jose Fonseca) Date: Mon, 27 Apr 2015 07:21:21 -0700 (PDT) Subject: Mesa (master): scons: add target osmesa using gallium state tracker. Message-ID: <20150427142121.C7E46761E7@kemper.freedesktop.org> Module: Mesa Branch: master Commit: f9965347dc0cac8822c44faf7bade09093076c74 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f9965347dc0cac8822c44faf7bade09093076c74 Author: Olivier Pena Date: Wed Apr 22 15:36:28 2015 +0000 scons: add target osmesa using gallium state tracker. Reviewed-by: Jose Fonseca --- src/gallium/SConscript | 5 +++ src/gallium/state_trackers/osmesa/Makefile.am | 2 ++ src/gallium/state_trackers/osmesa/SConscript | 26 +++++++++++++++ src/gallium/targets/osmesa/Makefile.am | 6 +++- src/gallium/targets/osmesa/SConscript | 43 +++++++++++++++++++++++++ src/gallium/targets/osmesa/osmesa.def | 16 +++++++++ src/gallium/targets/osmesa/osmesa.mingw.def | 13 ++++++++ 7 files changed, 110 insertions(+), 1 deletion(-) diff --git a/src/gallium/SConscript b/src/gallium/SConscript index 680ad92..eeb1c78 100644 --- a/src/gallium/SConscript +++ b/src/gallium/SConscript @@ -60,6 +60,11 @@ SConscript([ ]) if not env['embedded']: + SConscript([ + 'state_trackers/osmesa/SConscript', + 'targets/osmesa/SConscript', + ]) + if env['x11']: SConscript([ 'state_trackers/glx/xlib/SConscript', diff --git a/src/gallium/state_trackers/osmesa/Makefile.am b/src/gallium/state_trackers/osmesa/Makefile.am index 4ba6c20..22e65c8 100644 --- a/src/gallium/state_trackers/osmesa/Makefile.am +++ b/src/gallium/state_trackers/osmesa/Makefile.am @@ -39,3 +39,5 @@ AM_CPPFLAGS = \ noinst_LTLIBRARIES = libosmesa.la libosmesa_la_SOURCES = $(C_SOURCES) + +EXTRA_DIST = SConscript diff --git a/src/gallium/state_trackers/osmesa/SConscript b/src/gallium/state_trackers/osmesa/SConscript new file mode 100644 index 0000000..f5519f1 --- /dev/null +++ b/src/gallium/state_trackers/osmesa/SConscript @@ -0,0 +1,26 @@ +import os + +Import('*') + +env = env.Clone() + +env.Append(CPPPATH = [ + '#src/mapi', + '#src/mesa', + '.', +]) + +if env['platform'] == 'windows': + env.AppendUnique(CPPDEFINES = [ + 'BUILD_GL32', # declare gl* as __declspec(dllexport) in Mesa headers + 'WIN32_LEAN_AND_MEAN', # http://msdn2.microsoft.com/en-us/library/6dwk3a1z.aspx + ]) + if not env['gles']: + # prevent _glapi_* from being declared __declspec(dllimport) + env.Append(CPPDEFINES = ['_GLAPI_NO_EXPORTS']) + +st_osmesa = env.ConvenienceLibrary( + target ='st_osmesa', + source = env.ParseSourceList('Makefile.sources', 'C_SOURCES'), +) +Export('st_osmesa') diff --git a/src/gallium/targets/osmesa/Makefile.am b/src/gallium/targets/osmesa/Makefile.am index f53823a..2c09736 100644 --- a/src/gallium/targets/osmesa/Makefile.am +++ b/src/gallium/targets/osmesa/Makefile.am @@ -76,7 +76,11 @@ lib at OSMESA_LIB@_la_LIBADD += $(top_builddir)/src/gallium/drivers/llvmpipe/libllv endif EXTRA_lib at OSMESA_LIB@_la_DEPENDENCIES = osmesa.sym -EXTRA_DIST = osmesa.sym +EXTRA_DIST = \ + osmesa.sym \ + osmesa.def \ + osmesa.mingw.def \ + SConscript include $(top_srcdir)/install-gallium-links.mk diff --git a/src/gallium/targets/osmesa/SConscript b/src/gallium/targets/osmesa/SConscript new file mode 100644 index 0000000..4a9115b --- /dev/null +++ b/src/gallium/targets/osmesa/SConscript @@ -0,0 +1,43 @@ +Import('*') + +env = env.Clone() + +env.Prepend(CPPPATH = [ + '#src/mapi', + '#src/mesa', + #Dir('../../../mapi'), # src/mapi build path for python-generated GL API files/headers +]) + +env.Prepend(LIBS = [ + st_osmesa, + ws_null, + glapi, + mesa, + gallium, + trace, + glsl, + mesautil, + softpipe +]) + +env.Append(CPPDEFINES = ['GALLIUM_TRACE', 'GALLIUM_SOFTPIPE']) + +sources = ['target.c'] + +if env['llvm']: + env.Append(CPPDEFINES = 'GALLIUM_LLVMPIPE') + env.Prepend(LIBS = [llvmpipe]) + +if env['platform'] == 'windows': + if env['gcc'] and env['machine'] != 'x86_64': + sources += ['osmesa.mingw.def'] + else: + sources += ['osmesa.def'] + +gallium_osmesa = env.SharedLibrary( + target ='osmesa', + source = sources, + LIBS = env['LIBS'], +) + +env.Alias('osmesa', gallium_osmesa) diff --git a/src/gallium/targets/osmesa/osmesa.def b/src/gallium/targets/osmesa/osmesa.def new file mode 100644 index 0000000..e2a31ab --- /dev/null +++ b/src/gallium/targets/osmesa/osmesa.def @@ -0,0 +1,16 @@ +;DESCRIPTION 'Mesa OSMesa lib for Win32' +VERSION 4.1 + +EXPORTS + OSMesaCreateContext + OSMesaCreateContextExt + OSMesaDestroyContext + OSMesaMakeCurrent + OSMesaGetCurrentContext + OSMesaPixelStore + OSMesaGetIntegerv + OSMesaGetDepthBuffer + OSMesaGetColorBuffer + OSMesaGetProcAddress + OSMesaColorClamp + OSMesaPostprocess diff --git a/src/gallium/targets/osmesa/osmesa.mingw.def b/src/gallium/targets/osmesa/osmesa.mingw.def new file mode 100644 index 0000000..874ac54 --- /dev/null +++ b/src/gallium/targets/osmesa/osmesa.mingw.def @@ -0,0 +1,13 @@ +EXPORTS + OSMesaCreateContext = OSMesaCreateContext at 8 + OSMesaCreateContextExt = OSMesaCreateContextExt at 20 + OSMesaDestroyContext = OSMesaDestroyContext at 4 + OSMesaMakeCurrent = OSMesaMakeCurrent at 20 + OSMesaGetCurrentContext = OSMesaGetCurrentContext at 0 + OSMesaPixelStore = OSMesaPixelStore at 8 + OSMesaGetIntegerv = OSMesaGetIntegerv at 8 + OSMesaGetDepthBuffer = OSMesaGetDepthBuffer at 20 + OSMesaGetColorBuffer = OSMesaGetColorBuffer at 20 + OSMesaGetProcAddress = OSMesaGetProcAddress at 4 + OSMesaColorClamp = OSMesaColorClamp at 4 + OSMesaPostprocess = OSMesaPostprocess at 12 From currojerez at kemper.freedesktop.org Mon Apr 27 15:58:54 2015 From: currojerez at kemper.freedesktop.org (Francisco Jerez) Date: Mon, 27 Apr 2015 08:58:54 -0700 (PDT) Subject: Mesa (master): i965: Add helper functions to calculate the slice pitch of an array or 3D miptree. Message-ID: <20150427155854.B721B761E7@kemper.freedesktop.org> Module: Mesa Branch: master Commit: e94c80c08b1744fb06dcdd083b92b97971e03892 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e94c80c08b1744fb06dcdd083b92b97971e03892 Author: Francisco Jerez Date: Wed Apr 22 21:32:49 2015 +0300 i965: Add helper functions to calculate the slice pitch of an array or 3D miptree. --- src/mesa/drivers/dri/i965/brw_tex_layout.c | 106 +++++++++++++++---------- src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 18 +++++ 2 files changed, 84 insertions(+), 40 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index 440ba6c..d1ac3ed 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -263,6 +263,66 @@ brw_miptree_layout_2d(struct intel_mipmap_tree *mt) } } +unsigned +brw_miptree_get_horizontal_slice_pitch(const struct brw_context *brw, + const struct intel_mipmap_tree *mt, + unsigned level) +{ + assert(brw->gen < 9); + + if (mt->target == GL_TEXTURE_3D || + (brw->gen == 4 && mt->target == GL_TEXTURE_CUBE_MAP)) { + return ALIGN(minify(mt->physical_width0, level), mt->align_w); + } else { + return 0; + } +} + +unsigned +brw_miptree_get_vertical_slice_pitch(const struct brw_context *brw, + const struct intel_mipmap_tree *mt, + unsigned level) +{ + if (brw->gen >= 9) { + /* ALL_SLICES_AT_EACH_LOD isn't supported on Gen8+ but this code will + * effectively end up with a packed qpitch anyway whenever + * mt->first_level == mt->last_level. + */ + assert(mt->array_layout != ALL_SLICES_AT_EACH_LOD); + + /* On Gen9 we can pick whatever qpitch we like as long as it's aligned + * to the vertical alignment so we don't need to add any extra rows. + */ + unsigned qpitch = mt->total_height; + + /* If the surface might be used as a stencil buffer or HiZ buffer then + * it needs to be a multiple of 8. + */ + const GLenum base_format = _mesa_get_format_base_format(mt->format); + if (_mesa_is_depth_or_stencil_format(base_format)) + qpitch = ALIGN(qpitch, 8); + + /* 3D textures need to be aligned to the tile height. At this point we + * don't know which tiling will be used so let's just align it to 32 + */ + if (mt->target == GL_TEXTURE_3D) + qpitch = ALIGN(qpitch, 32); + + return qpitch; + + } else if (mt->target == GL_TEXTURE_3D || + (brw->gen == 4 && mt->target == GL_TEXTURE_CUBE_MAP) || + mt->array_layout == ALL_SLICES_AT_EACH_LOD) { + return ALIGN(minify(mt->physical_height0, level), mt->align_h); + + } else { + const unsigned h0 = ALIGN(mt->physical_height0, mt->align_h); + const unsigned h1 = ALIGN(minify(mt->physical_height0, 1), mt->align_h); + + return h0 + h1 + (brw->gen >= 7 ? 12 : 11) * mt->align_h; + } +} + static void align_cube(struct intel_mipmap_tree *mt) { @@ -318,47 +378,13 @@ brw_miptree_layout_texture_array(struct brw_context *brw, * this case it's always 64). The vertical alignment is ignored. */ mt->qpitch = mt->total_width; - } else if (brw->gen >= 9) { - GLenum base_format; - - /* ALL_SLICES_AT_EACH_LOD isn't supported on Gen8+ but this code will - * effectively end up with a packed qpitch anyway whenever - * mt->first_level == mt->last_level. - */ - assert(mt->array_layout != ALL_SLICES_AT_EACH_LOD); - - /* On Gen9 we can pick whatever qpitch we like as long as it's aligned - * to the vertical alignment so we don't need to add any extra rows. - */ - mt->qpitch = mt->total_height; - - /* If the surface might be used as a stencil buffer or HiZ buffer then - * it needs to be a multiple of 8. - */ - base_format = _mesa_get_format_base_format(mt->format); - if (_mesa_is_depth_or_stencil_format(base_format)) - mt->qpitch = ALIGN(mt->qpitch, 8); - - /* 3D textures need to be aligned to the tile height. At this point we - * don't know which tiling will be used so let's just align it to 32 - */ - if (mt->target == GL_TEXTURE_3D) - mt->qpitch = ALIGN(mt->qpitch, 32); - - /* Unlike previous generations the qpitch is now a multiple of the - * compressed block size so physical_qpitch matches mt->qpitch. - */ - physical_qpitch = mt->qpitch; } else { - int h0 = ALIGN(mt->physical_height0, mt->align_h); - int h1 = ALIGN(minify(mt->physical_height0, 1), mt->align_h); - - if (mt->array_layout == ALL_SLICES_AT_EACH_LOD) - mt->qpitch = h0; - else - mt->qpitch = (h0 + h1 + (brw->gen >= 7 ? 12 : 11) * mt->align_h); - - physical_qpitch = mt->compressed ? mt->qpitch / 4 : mt->qpitch; + mt->qpitch = brw_miptree_get_vertical_slice_pitch(brw, mt, 0); + /* Unlike previous generations the qpitch is a multiple of the + * compressed block size on Gen9 so physical_qpitch matches mt->qpitch. + */ + physical_qpitch = (mt->compressed && brw->gen < 9 ? mt->qpitch / 4 : + mt->qpitch); } for (unsigned level = mt->first_level; level <= mt->last_level; level++) { diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h index 77b0294..8b42e4a 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h @@ -735,6 +735,24 @@ intel_miptree_updownsample(struct brw_context *brw, struct intel_mipmap_tree *src, struct intel_mipmap_tree *dst); +/** + * Horizontal distance from one slice to the next in the two-dimensional + * miptree layout. + */ +unsigned +brw_miptree_get_horizontal_slice_pitch(const struct brw_context *brw, + const struct intel_mipmap_tree *mt, + unsigned level); + +/** + * Vertical distance from one slice to the next in the two-dimensional miptree + * layout. + */ +unsigned +brw_miptree_get_vertical_slice_pitch(const struct brw_context *brw, + const struct intel_mipmap_tree *mt, + unsigned level); + void brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt); void *intel_miptree_map_raw(struct brw_context *brw, From currojerez at kemper.freedesktop.org Mon Apr 27 15:58:54 2015 From: currojerez at kemper.freedesktop.org (Francisco Jerez) Date: Mon, 27 Apr 2015 08:58:54 -0700 (PDT) Subject: Mesa (master): i965/gen7: Factor out texture surface state set-up from gen7_update_texture_surface( ). Message-ID: <20150427155854.BE9AB761E8@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 6f26ffaf66affb6563e548646fb6d6a049fe6bff URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6f26ffaf66affb6563e548646fb6d6a049fe6bff Author: Francisco Jerez Date: Mon Apr 13 21:37:02 2015 +0300 i965/gen7: Factor out texture surface state set-up from gen7_update_texture_surface(). This moves most of the surface state set-up logic that can be shared between textures and shader images to a separate function. --- src/mesa/drivers/dri/i965/brw_context.h | 11 ++ src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 127 ++++++++++++--------- 2 files changed, 84 insertions(+), 54 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index f79729b..d4b7b55 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -972,6 +972,17 @@ struct brw_context bool layered, unsigned unit); + void (*emit_texture_surface_state)(struct brw_context *brw, + struct intel_mipmap_tree *mt, + GLenum target, + unsigned min_layer, + unsigned max_layer, + unsigned min_level, + unsigned max_level, + unsigned format, + unsigned swizzle, + uint32_t *surf_offset, + bool rw, bool for_gather); void (*emit_buffer_surface_state)(struct brw_context *brw, uint32_t *out_offset, drm_intel_bo *bo, diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c index 10567f3..4b8503c 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c @@ -264,40 +264,28 @@ gen7_emit_buffer_surface_state(struct brw_context *brw, } static void -gen7_update_texture_surface(struct gl_context *ctx, - unsigned unit, - uint32_t *surf_offset, - bool for_gather) +gen7_emit_texture_surface_state(struct brw_context *brw, + struct intel_mipmap_tree *mt, + GLenum target, + unsigned min_layer, unsigned max_layer, + unsigned min_level, unsigned max_level, + unsigned format, + unsigned swizzle, + uint32_t *surf_offset, + bool rw, bool for_gather) { - struct brw_context *brw = brw_context(ctx); - struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current; - struct intel_texture_object *intelObj = intel_texture_object(tObj); - struct intel_mipmap_tree *mt = intelObj->mt; - struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel]; - struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit); - - if (tObj->Target == GL_TEXTURE_BUFFER) { - brw_update_buffer_texture_surface(ctx, unit, surf_offset); - return; - } - + const unsigned depth = max_layer - min_layer; uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 8 * 4, 32, surf_offset); - memset(surf, 0, 8 * 4); - - uint32_t tex_format = translate_tex_format(brw, - intelObj->_Format, - sampler->sRGBDecode); - if (for_gather && tex_format == BRW_SURFACEFORMAT_R32G32_FLOAT) - tex_format = BRW_SURFACEFORMAT_R32G32_FLOAT_LD; + memset(surf, 0, 8 * 4); - surf[0] = translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT | - tex_format << BRW_SURFACE_FORMAT_SHIFT | + surf[0] = translate_tex_target(target) << BRW_SURFACE_TYPE_SHIFT | + format << BRW_SURFACE_FORMAT_SHIFT | gen7_surface_tiling_mode(mt->tiling); /* mask of faces present in cube map; for other surfaces MBZ. */ - if (tObj->Target == GL_TEXTURE_CUBE_MAP || tObj->Target == GL_TEXTURE_CUBE_MAP_ARRAY) + if (target == GL_TEXTURE_CUBE_MAP || target == GL_TEXTURE_CUBE_MAP_ARRAY) surf[0] |= BRW_SURFACE_CUBEFACE_ENABLES; if (mt->align_h == 4) @@ -305,16 +293,9 @@ gen7_update_texture_surface(struct gl_context *ctx, if (mt->align_w == 8) surf[0] |= GEN7_SURFACE_HALIGN_8; - if (_mesa_is_array_texture(tObj->Target) || - tObj->Target == GL_TEXTURE_CUBE_MAP) + if (_mesa_is_array_texture(target) || target == GL_TEXTURE_CUBE_MAP) surf[0] |= GEN7_SURFACE_IS_ARRAY; - /* if this is a view with restricted NumLayers, then - * our effective depth is not just the miptree depth. - */ - uint32_t effective_depth = (tObj->Immutable && tObj->Target != GL_TEXTURE_3D) - ? tObj->NumLayers : mt->logical_depth0; - if (mt->array_layout == ALL_SLICES_AT_EACH_LOD) surf[0] |= GEN7_SURFACE_ARYSPC_LOD0; @@ -323,37 +304,25 @@ gen7_update_texture_surface(struct gl_context *ctx, surf[2] = SET_FIELD(mt->logical_width0 - 1, GEN7_SURFACE_WIDTH) | SET_FIELD(mt->logical_height0 - 1, GEN7_SURFACE_HEIGHT); - surf[3] = SET_FIELD(effective_depth - 1, BRW_SURFACE_DEPTH) | + surf[3] = SET_FIELD(depth - 1, BRW_SURFACE_DEPTH) | (mt->pitch - 1); - if (brw->is_haswell && tObj->_IsIntegerFormat) + if (brw->is_haswell && _mesa_is_format_integer(mt->format)) surf[3] |= HSW_SURFACE_IS_INTEGER_FORMAT; surf[4] = gen7_surface_msaa_bits(mt->num_samples, mt->msaa_layout) | - SET_FIELD(tObj->MinLayer, GEN7_SURFACE_MIN_ARRAY_ELEMENT) | - SET_FIELD((effective_depth - 1), - GEN7_SURFACE_RENDER_TARGET_VIEW_EXTENT); + SET_FIELD(min_layer, GEN7_SURFACE_MIN_ARRAY_ELEMENT) | + SET_FIELD(depth - 1, GEN7_SURFACE_RENDER_TARGET_VIEW_EXTENT); surf[5] = (SET_FIELD(GEN7_MOCS_L3, GEN7_SURFACE_MOCS) | - SET_FIELD(tObj->MinLevel + tObj->BaseLevel - mt->first_level, GEN7_SURFACE_MIN_LOD) | + SET_FIELD(min_level - mt->first_level, GEN7_SURFACE_MIN_LOD) | /* mip count */ - (intelObj->_MaxLevel - tObj->BaseLevel)); + (max_level - min_level - 1)); surf[7] = mt->fast_clear_color_value; if (brw->is_haswell) { - /* Handling GL_ALPHA as a surface format override breaks 1.30+ style - * texturing functions that return a float, as our code generation always - * selects the .x channel (which would always be 0). - */ - const bool alpha_depth = tObj->DepthMode == GL_ALPHA && - (firstImage->_BaseFormat == GL_DEPTH_COMPONENT || - firstImage->_BaseFormat == GL_DEPTH_STENCIL); - - const int swizzle = unlikely(alpha_depth) - ? SWIZZLE_XYZW : brw_get_texture_swizzle(ctx, tObj); - - const bool need_scs_green_to_blue = for_gather && tex_format == BRW_SURFACEFORMAT_R32G32_FLOAT_LD; + const bool need_scs_green_to_blue = for_gather && format == BRW_SURFACEFORMAT_R32G32_FLOAT_LD; surf[7] |= SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 0), need_scs_green_to_blue), GEN7_SURFACE_SCS_R) | @@ -372,11 +341,60 @@ gen7_update_texture_surface(struct gl_context *ctx, *surf_offset + 4, mt->bo, surf[1] - mt->bo->offset64, - I915_GEM_DOMAIN_SAMPLER, 0); + I915_GEM_DOMAIN_SAMPLER, + (rw ? I915_GEM_DOMAIN_SAMPLER : 0)); gen7_check_surface_setup(surf, false /* is_render_target */); } +static void +gen7_update_texture_surface(struct gl_context *ctx, + unsigned unit, + uint32_t *surf_offset, + bool for_gather) +{ + struct brw_context *brw = brw_context(ctx); + struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current; + + if (obj->Target == GL_TEXTURE_BUFFER) { + brw_update_buffer_texture_surface(ctx, unit, surf_offset); + + } else { + struct intel_texture_object *intel_obj = intel_texture_object(obj); + struct intel_mipmap_tree *mt = intel_obj->mt; + struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit); + /* If this is a view with restricted NumLayers, then our effective depth + * is not just the miptree depth. + */ + const unsigned depth = (obj->Immutable && obj->Target != GL_TEXTURE_3D ? + obj->NumLayers : mt->logical_depth0); + + /* Handling GL_ALPHA as a surface format override breaks 1.30+ style + * texturing functions that return a float, as our code generation always + * selects the .x channel (which would always be 0). + */ + struct gl_texture_image *firstImage = obj->Image[0][obj->BaseLevel]; + const bool alpha_depth = obj->DepthMode == GL_ALPHA && + (firstImage->_BaseFormat == GL_DEPTH_COMPONENT || + firstImage->_BaseFormat == GL_DEPTH_STENCIL); + const unsigned swizzle = (unlikely(alpha_depth) ? SWIZZLE_XYZW : + brw_get_texture_swizzle(&brw->ctx, obj)); + + unsigned format = translate_tex_format( + brw, intel_obj->_Format, sampler->sRGBDecode); + + if (for_gather && format == BRW_SURFACEFORMAT_R32G32_FLOAT) + format = BRW_SURFACEFORMAT_R32G32_FLOAT_LD; + + gen7_emit_texture_surface_state(brw, mt, obj->Target, + obj->MinLayer, obj->MinLayer + depth, + obj->MinLevel + obj->BaseLevel, + obj->MinLevel + intel_obj->_MaxLevel + 1, + format, swizzle, + surf_offset, false, for_gather); + } +} + /** * Creates a null surface. * @@ -550,5 +568,6 @@ gen7_init_vtable_surface_functions(struct brw_context *brw) brw->vtbl.update_texture_surface = gen7_update_texture_surface; brw->vtbl.update_renderbuffer_surface = gen7_update_renderbuffer_surface; brw->vtbl.emit_null_surface_state = gen7_emit_null_surface_state; + brw->vtbl.emit_texture_surface_state = gen7_emit_texture_surface_state; brw->vtbl.emit_buffer_surface_state = gen7_emit_buffer_surface_state; } From currojerez at kemper.freedesktop.org Mon Apr 27 15:58:55 2015 From: currojerez at kemper.freedesktop.org (Francisco Jerez) Date: Mon, 27 Apr 2015 08:58:55 -0700 (PDT) Subject: Mesa (master): Fix a few typos Message-ID: <20150427155855.09C79761E7@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 05e7f7f4388bde882b7ce74124000a4d435affff URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=05e7f7f4388bde882b7ce74124000a4d435affff Author: Zoe? Blade Date: Wed Apr 22 11:33:17 2015 +0100 Fix a few typos Reviewed-by: Francisco Jerez --- docs/VERSIONS | 4 ++-- docs/index.html | 2 +- docs/libGL.txt | 2 +- docs/relnotes/10.0.3.html | 2 +- docs/relnotes/7.10.html | 2 +- docs/versions.html | 4 ++-- src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c | 2 +- src/gallium/auxiliary/rbug/rbug_connection.c | 2 +- src/gallium/drivers/i915/i915_prim_vbuf.c | 2 +- src/gallium/drivers/i915/i915_resource_texture.c | 2 +- src/gallium/drivers/i915/i915_state_dynamic.c | 2 +- src/gallium/drivers/i915/i915_state_immediate.c | 2 +- src/gallium/drivers/llvmpipe/lp_rast.c | 2 +- src/gallium/drivers/llvmpipe/lp_scene.h | 2 +- src/gallium/drivers/r300/compiler/memory_pool.h | 2 +- src/gallium/drivers/r300/compiler/radeon_variable.c | 2 +- src/gallium/drivers/r300/r300_emit.c | 2 +- src/gallium/drivers/r300/r300_reg.h | 2 +- src/gallium/drivers/r300/r300_tgsi_to_rc.h | 2 +- src/gallium/drivers/r600/evergreen_hw_context.c | 2 +- src/gallium/drivers/r600/evergreen_state.c | 2 +- src/gallium/drivers/r600/r600_hw_context.c | 2 +- src/gallium/drivers/r600/r600_state.c | 2 +- src/gallium/drivers/r600/r600_state_common.c | 4 ++-- src/gallium/drivers/vc4/vc4_program.c | 2 +- src/gallium/state_trackers/glx/xlib/xm_api.c | 2 +- src/gallium/state_trackers/nine/swapchain9.c | 2 +- src/gallium/state_trackers/vdpau/device.c | 2 +- src/gallium/state_trackers/wgl/stw_framebuffer.h | 2 +- src/gallium/winsys/svga/drm/pb_buffer_simple_fenced.c | 4 ++-- src/glsl/ast_function.cpp | 2 +- src/glsl/glcpp/glcpp-parse.y | 2 +- src/glsl/link_uniforms.cpp | 2 +- src/glsl/lower_packing_builtins.cpp | 8 ++++---- src/glsl/nir/nir_from_ssa.c | 2 +- src/glsl/opt_dead_builtin_varyings.cpp | 8 ++++---- src/glsl/opt_function_inlining.cpp | 2 +- src/glx/XF86dri.c | 2 +- src/glx/apple/apple_glx.c | 2 +- src/glx/apple/apple_glx_context.c | 2 +- src/glx/apple/apple_glx_surface.c | 4 ++-- src/glx/glxcmds.c | 2 +- src/glx/indirect_vertex_array.c | 2 +- src/glx/single2.c | 10 +++++----- src/gtest/include/gtest/internal/gtest-filepath.h | 2 +- src/gtest/src/gtest-filepath.cc | 2 +- src/loader/loader.h | 2 +- src/mapi/glapi/gen/glX_proto_send.py | 2 +- src/mapi/glapi/gen/glX_proto_size.py | 2 +- src/mapi/glapi/gen/gl_x86-64_asm.py | 2 +- src/mesa/drivers/dri/i915/i830_state.c | 6 +++--- src/mesa/drivers/dri/i915/i915_state.c | 4 ++-- src/mesa/drivers/dri/i915/intel_context.h | 2 +- src/mesa/drivers/dri/i915/intel_tex_layout.c | 2 +- src/mesa/drivers/dri/i965/brw_clip_unfilled.c | 6 +++--- src/mesa/drivers/dri/i965/brw_clip_util.c | 4 ++-- src/mesa/drivers/dri/i965/brw_context.c | 4 ++-- src/mesa/drivers/dri/i965/brw_context.h | 4 ++-- src/mesa/drivers/dri/i965/brw_eu_emit.c | 4 ++-- src/mesa/drivers/dri/i965/brw_tex_layout.c | 2 +- src/mesa/drivers/dri/i965/brw_wm_state.c | 2 +- src/mesa/drivers/dri/i965/gen7_sol_state.c | 2 +- src/mesa/drivers/dri/r200/r200_ioctl.h | 2 +- src/mesa/drivers/dri/r200/r200_texstate.c | 2 +- src/mesa/drivers/dri/radeon/radeon_common_context.h | 2 +- src/mesa/drivers/dri/radeon/radeon_ioctl.h | 2 +- src/mesa/drivers/x11/xm_api.c | 2 +- src/mesa/main/context.c | 2 +- src/mesa/main/ffvertex_prog.c | 2 +- src/mesa/main/formats.c | 2 +- src/mesa/math/m_matrix.c | 2 +- src/mesa/program/hash_table.h | 4 ++-- src/mesa/program/prog_cache.c | 2 +- src/mesa/state_tracker/st_cb_flush.c | 2 +- src/mesa/swrast/s_texcombine.c | 2 +- src/mesa/tnl/t_vertex.h | 2 +- src/mesa/vbo/vbo_exec_api.c | 2 +- src/mesa/vbo/vbo_save_draw.c | 2 +- 78 files changed, 103 insertions(+), 103 deletions(-) Diff: http://cgit.freedesktop.org/mesa/mesa/diff/?id=05e7f7f4388bde882b7ce74124000a4d435affff From currojerez at kemper.freedesktop.org Mon Apr 27 15:58:54 2015 From: currojerez at kemper.freedesktop.org (Francisco Jerez) Date: Mon, 27 Apr 2015 08:58:54 -0700 (PDT) Subject: Mesa (master): i965/gen8: Factor out texture surface state set-up from gen8_update_texture_surface( ). Message-ID: <20150427155854.C6D9C761E7@kemper.freedesktop.org> Module: Mesa Branch: master Commit: e17dc004fd96e589e92ee64deeb45339af4bf671 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e17dc004fd96e589e92ee64deeb45339af4bf671 Author: Francisco Jerez Date: Mon Apr 13 21:38:06 2015 +0300 i965/gen8: Factor out texture surface state set-up from gen8_update_texture_surface(). This moves most of the surface state set-up logic that can be shared between textures and shader images to a separate function. --- src/mesa/drivers/dri/i965/gen8_surface_state.c | 137 +++++++++++++----------- 1 file changed, 77 insertions(+), 60 deletions(-) diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c index 011c685..b8ef353 100644 --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c @@ -150,34 +150,23 @@ gen8_emit_buffer_surface_state(struct brw_context *brw, } static void -gen8_update_texture_surface(struct gl_context *ctx, - unsigned unit, - uint32_t *surf_offset, - bool for_gather) +gen8_emit_texture_surface_state(struct brw_context *brw, + struct intel_mipmap_tree *mt, + GLenum target, + unsigned min_layer, unsigned max_layer, + unsigned min_level, unsigned max_level, + unsigned format, + unsigned swizzle, + uint32_t *surf_offset, + bool rw, bool for_gather) { - struct brw_context *brw = brw_context(ctx); - struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current; - struct intel_texture_object *intelObj = intel_texture_object(tObj); - struct intel_mipmap_tree *mt = intelObj->mt; - struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel]; - struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit); + const unsigned depth = max_layer - min_layer; struct intel_mipmap_tree *aux_mt = NULL; uint32_t aux_mode = 0; - mesa_format format = intelObj->_Format; uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB; - - if (tObj->Target == GL_TEXTURE_BUFFER) { - brw_update_buffer_texture_surface(ctx, unit, surf_offset); - return; - } - - if (tObj->StencilSampling && firstImage->_BaseFormat == GL_DEPTH_STENCIL) { - mt = mt->stencil_mt; - format = MESA_FORMAT_S_UINT8; - } - unsigned tiling_mode, pitch; - if (format == MESA_FORMAT_S_UINT8) { + + if (mt->format == MESA_FORMAT_S_UINT8) { tiling_mode = GEN8_SURFACE_TILING_W; pitch = 2 * mt->pitch; } else { @@ -190,30 +179,20 @@ gen8_update_texture_surface(struct gl_context *ctx, aux_mode = GEN8_SURFACE_AUX_MODE_MCS; } - /* If this is a view with restricted NumLayers, then our effective depth - * is not just the miptree depth. - */ - uint32_t effective_depth = - (tObj->Immutable && tObj->Target != GL_TEXTURE_3D) ? tObj->NumLayers - : mt->logical_depth0; - - uint32_t tex_format = translate_tex_format(brw, format, sampler->sRGBDecode); - uint32_t *surf = allocate_surface_state(brw, surf_offset); - surf[0] = translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT | - tex_format << BRW_SURFACE_FORMAT_SHIFT | + surf[0] = translate_tex_target(target) << BRW_SURFACE_TYPE_SHIFT | + format << BRW_SURFACE_FORMAT_SHIFT | vertical_alignment(mt) | horizontal_alignment(mt) | tiling_mode; - if (tObj->Target == GL_TEXTURE_CUBE_MAP || - tObj->Target == GL_TEXTURE_CUBE_MAP_ARRAY) { + if (target == GL_TEXTURE_CUBE_MAP || + target == GL_TEXTURE_CUBE_MAP_ARRAY) { surf[0] |= BRW_SURFACE_CUBEFACE_ENABLES; } - if (_mesa_is_array_texture(tObj->Target) || - tObj->Target == GL_TEXTURE_CUBE_MAP) + if (_mesa_is_array_texture(target) || target == GL_TEXTURE_CUBE_MAP) surf[0] |= GEN8_SURFACE_IS_ARRAY; surf[1] = SET_FIELD(mocs_wb, GEN8_SURFACE_MOCS) | mt->qpitch >> 2; @@ -221,16 +200,14 @@ gen8_update_texture_surface(struct gl_context *ctx, surf[2] = SET_FIELD(mt->logical_width0 - 1, GEN7_SURFACE_WIDTH) | SET_FIELD(mt->logical_height0 - 1, GEN7_SURFACE_HEIGHT); - surf[3] = SET_FIELD(effective_depth - 1, BRW_SURFACE_DEPTH) | (pitch - 1); + surf[3] = SET_FIELD(depth - 1, BRW_SURFACE_DEPTH) | (pitch - 1); surf[4] = gen7_surface_msaa_bits(mt->num_samples, mt->msaa_layout) | - SET_FIELD(tObj->MinLayer, GEN7_SURFACE_MIN_ARRAY_ELEMENT) | - SET_FIELD(effective_depth - 1, - GEN7_SURFACE_RENDER_TARGET_VIEW_EXTENT); + SET_FIELD(min_layer, GEN7_SURFACE_MIN_ARRAY_ELEMENT) | + SET_FIELD(depth - 1, GEN7_SURFACE_RENDER_TARGET_VIEW_EXTENT); - surf[5] = SET_FIELD(tObj->MinLevel + tObj->BaseLevel - mt->first_level, - GEN7_SURFACE_MIN_LOD) | - (intelObj->_MaxLevel - tObj->BaseLevel); /* mip count */ + surf[5] = SET_FIELD(min_level - mt->first_level, GEN7_SURFACE_MIN_LOD) | + (max_level - min_level - 1); /* mip count */ if (aux_mt) { surf[6] = SET_FIELD(mt->qpitch / 4, GEN8_SURFACE_AUX_QPITCH) | @@ -240,19 +217,7 @@ gen8_update_texture_surface(struct gl_context *ctx, surf[6] = 0; } - /* Handling GL_ALPHA as a surface format override breaks 1.30+ style - * texturing functions that return a float, as our code generation always - * selects the .x channel (which would always be 0). - */ - const bool alpha_depth = tObj->DepthMode == GL_ALPHA && - (firstImage->_BaseFormat == GL_DEPTH_COMPONENT || - firstImage->_BaseFormat == GL_DEPTH_STENCIL); - - surf[7] = mt->fast_clear_color_value; - - const int swizzle = - unlikely(alpha_depth) ? SWIZZLE_XYZW : brw_get_texture_swizzle(ctx, tObj); - surf[7] |= + surf[7] = mt->fast_clear_color_value | SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 0)), GEN7_SURFACE_SCS_R) | SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 1)), GEN7_SURFACE_SCS_G) | SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 2)), GEN7_SURFACE_SCS_B) | @@ -264,7 +229,8 @@ gen8_update_texture_surface(struct gl_context *ctx, *((uint64_t *) &surf[10]) = aux_mt->bo->offset64; drm_intel_bo_emit_reloc(brw->batch.bo, *surf_offset + 10 * 4, aux_mt->bo, 0, - I915_GEM_DOMAIN_SAMPLER, 0); + I915_GEM_DOMAIN_SAMPLER, + (rw ? I915_GEM_DOMAIN_SAMPLER : 0)); } else { surf[10] = 0; surf[11] = 0; @@ -276,7 +242,57 @@ gen8_update_texture_surface(struct gl_context *ctx, *surf_offset + 8 * 4, mt->bo, mt->offset, - I915_GEM_DOMAIN_SAMPLER, 0); + I915_GEM_DOMAIN_SAMPLER, + (rw ? I915_GEM_DOMAIN_SAMPLER : 0)); +} + +static void +gen8_update_texture_surface(struct gl_context *ctx, + unsigned unit, + uint32_t *surf_offset, + bool for_gather) +{ + struct brw_context *brw = brw_context(ctx); + struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current; + + if (obj->Target == GL_TEXTURE_BUFFER) { + brw_update_buffer_texture_surface(ctx, unit, surf_offset); + + } else { + struct gl_texture_image *firstImage = obj->Image[0][obj->BaseLevel]; + struct intel_texture_object *intel_obj = intel_texture_object(obj); + struct intel_mipmap_tree *mt = intel_obj->mt; + struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit); + /* If this is a view with restricted NumLayers, then our effective depth + * is not just the miptree depth. + */ + const unsigned depth = (obj->Immutable && obj->Target != GL_TEXTURE_3D ? + obj->NumLayers : mt->logical_depth0); + + /* Handling GL_ALPHA as a surface format override breaks 1.30+ style + * texturing functions that return a float, as our code generation always + * selects the .x channel (which would always be 0). + */ + const bool alpha_depth = obj->DepthMode == GL_ALPHA && + (firstImage->_BaseFormat == GL_DEPTH_COMPONENT || + firstImage->_BaseFormat == GL_DEPTH_STENCIL); + const unsigned swizzle = (unlikely(alpha_depth) ? SWIZZLE_XYZW : + brw_get_texture_swizzle(&brw->ctx, obj)); + + unsigned format = translate_tex_format(brw, intel_obj->_Format, + sampler->sRGBDecode); + if (obj->StencilSampling && firstImage->_BaseFormat == GL_DEPTH_STENCIL) { + mt = mt->stencil_mt; + format = BRW_SURFACEFORMAT_R8_UINT; + } + + gen8_emit_texture_surface_state(brw, mt, obj->Target, + obj->MinLayer, obj->MinLayer + depth, + obj->MinLevel + obj->BaseLevel, + obj->MinLevel + intel_obj->_MaxLevel + 1, + format, swizzle, surf_offset, + false, for_gather); + } } /** @@ -446,5 +462,6 @@ gen8_init_vtable_surface_functions(struct brw_context *brw) brw->vtbl.update_texture_surface = gen8_update_texture_surface; brw->vtbl.update_renderbuffer_surface = gen8_update_renderbuffer_surface; brw->vtbl.emit_null_surface_state = gen8_emit_null_surface_state; + brw->vtbl.emit_texture_surface_state = gen8_emit_texture_surface_state; brw->vtbl.emit_buffer_surface_state = gen8_emit_buffer_surface_state; } From imirkin at kemper.freedesktop.org Mon Apr 27 16:07:54 2015 From: imirkin at kemper.freedesktop.org (Ilia Mirkin) Date: Mon, 27 Apr 2015 09:07:54 -0700 (PDT) Subject: Mesa (master): mesa: the function name appears to have a gl prefix already Message-ID: <20150427160754.351C0761E7@kemper.freedesktop.org> Module: Mesa Branch: master Commit: dfb274af4c6e0991fa20af1606e45bea6f947fed URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=dfb274af4c6e0991fa20af1606e45bea6f947fed Author: Ilia Mirkin Date: Fri Apr 24 19:33:05 2015 -0400 mesa: the function name appears to have a gl prefix already Currently we're producing errors like User error: GL_INVALID_OPERATION in glglDeleteProgramsARB(invalid call) And noop_warn appears to be called with the full function name. Don't prepend a gl prefix. Signed-off-by: Ilia Mirkin Reviewed-by: Brian Paul --- src/mesa/main/context.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c index f36fb8d..4aaf8b1 100644 --- a/src/mesa/main/context.c +++ b/src/mesa/main/context.c @@ -894,14 +894,14 @@ update_default_objects(struct gl_context *ctx) * If there's no current OpenGL context for the calling thread, we can * print a message to stderr. * - * \param name the name of the OpenGL function, without the "gl" prefix + * \param name the name of the OpenGL function */ static void nop_handler(const char *name) { GET_CURRENT_CONTEXT(ctx); if (ctx) { - _mesa_error(ctx, GL_INVALID_OPERATION, "gl%s(invalid call)", name); + _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid call)", name); } #if defined(DEBUG) else if (getenv("MESA_DEBUG") || getenv("LIBGL_DEBUG")) { From sroland at kemper.freedesktop.org Mon Apr 27 16:53:04 2015 From: sroland at kemper.freedesktop.org (Roland Scheidegger) Date: Mon, 27 Apr 2015 09:53:04 -0700 (PDT) Subject: Mesa (master): softpipe: fix another stencil-as-float issue Message-ID: <20150427165304.75241761E7@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 7c3d1c132eea83fb257cbc473a8a79638ddc3014 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7c3d1c132eea83fb257cbc473a8a79638ddc3014 Author: Roland Scheidegger Date: Sat Apr 25 22:10:42 2015 +0200 softpipe: fix another stencil-as-float issue Hopefully this is the last one now (for texture X32_S8X24_UINT views). +4 piglits. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90167 Reviewed-by: Brian Paul --- src/gallium/auxiliary/util/u_tile.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c index 8e19920..b91bb41 100644 --- a/src/gallium/auxiliary/util/u_tile.c +++ b/src/gallium/auxiliary/util/u_tile.c @@ -341,13 +341,13 @@ x32_s8_get_tile_rgba(const unsigned *src, unsigned i, j; for (i = 0; i < h; i++) { - float *pRow = p; + uint32_t *pRow = (uint32_t *)p; for (j = 0; j < w; j++, pRow += 4) { src++; pRow[0] = pRow[1] = pRow[2] = - pRow[3] = (float)(*src++ & 0xff); + pRow[3] = (*src++ & 0xff); } p += dst_stride; } From imirkin at kemper.freedesktop.org Mon Apr 27 20:04:16 2015 From: imirkin at kemper.freedesktop.org (Ilia Mirkin) Date: Mon, 27 Apr 2015 13:04:16 -0700 (PDT) Subject: Mesa (master): mesa: fix up GLSL version when computing GL version Message-ID: <20150427200416.48B0C761E7@kemper.freedesktop.org> Module: Mesa Branch: master Commit: dfb0b36e8ff93873920495647381b3faf30df028 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=dfb0b36e8ff93873920495647381b3faf30df028 Author: Ilia Mirkin Date: Mon Apr 27 14:00:44 2015 -0400 mesa: fix up GLSL version when computing GL version In some situations it is convenient for a driver to expose a higher GLSL version while some extensions are still incomplete. However in that situation, it would report a GLSL version that was higher than the GL version. Avoid that situation by limiting the GLSL version to the GL version. Signed-off-by: Ilia Mirkin Reviewed-by: Brian Paul --- src/mesa/main/version.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/mesa/main/version.c b/src/mesa/main/version.c index a65ace0..5b8ac0a 100644 --- a/src/mesa/main/version.c +++ b/src/mesa/main/version.c @@ -483,6 +483,23 @@ _mesa_compute_version(struct gl_context *ctx) ctx->Version = _mesa_get_version(&ctx->Extensions, &ctx->Const, ctx->API); + /* Make sure that the GLSL version lines up with the GL version. In some + * cases it can be too high, e.g. if an extension is missing. + */ + if (ctx->API == API_OPENGL_CORE) { + switch (ctx->Version) { + case 31: + ctx->Const.GLSLVersion = 140; + break; + case 32: + ctx->Const.GLSLVersion = 150; + break; + default: + ctx->Const.GLSLVersion = ctx->Version * 10; + break; + } + } + switch (ctx->API) { case API_OPENGL_COMPAT: case API_OPENGL_CORE: From jrfonseca at kemper.freedesktop.org Mon Apr 27 20:47:32 2015 From: jrfonseca at kemper.freedesktop.org (Jose Fonseca) Date: Mon, 27 Apr 2015 13:47:32 -0700 (PDT) Subject: Mesa (master): scons: Support LLVM 3.5 and 3.6 on windows. Message-ID: <20150427204732.C4A92761E7@kemper.freedesktop.org> Module: Mesa Branch: master Commit: b94a4e84987f6bdefabf45527ec434d8510da2b8 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b94a4e84987f6bdefabf45527ec434d8510da2b8 Author: Olivier Pena Date: Mon Apr 27 10:23:58 2015 +0000 scons: Support LLVM 3.5 and 3.6 on windows. llvm/Config/llvm-config.h is parsed instead of llvm/Config/config.h for detecting LLVM version (http://lists.cs.uiuc.edu/pipermail/llvmdev/2014-June/073707.html). Reviewed-by: Jose Fonseca --- scons/llvm.py | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/scons/llvm.py b/scons/llvm.py index be7df9f..17278df 100644 --- a/scons/llvm.py +++ b/scons/llvm.py @@ -72,18 +72,25 @@ def generate(env): return # Try to determine the LLVM version from llvm/Config/config.h - llvm_config = os.path.join(llvm_dir, 'include/llvm/Config/config.h') + llvm_config = os.path.join(llvm_dir, 'include/llvm/Config/llvm-config.h') if not os.path.exists(llvm_config): print 'scons: could not find %s' % llvm_config return - llvm_version_re = re.compile(r'^#define PACKAGE_VERSION "([^"]*)"') + llvm_version_major_re = re.compile(r'^#define LLVM_VERSION_MAJOR ([0-9]+)') + llvm_version_minor_re = re.compile(r'^#define LLVM_VERSION_MINOR ([0-9]+)') llvm_version = None + llvm_version_major = None + llvm_version_minor = None for line in open(llvm_config, 'rt'): - mo = llvm_version_re.match(line) + mo = llvm_version_major_re.match(line) if mo: - llvm_version = mo.group(1) - llvm_version = distutils.version.LooseVersion(llvm_version) - break + llvm_version_major = mo.group(1) + mo = llvm_version_minor_re.match(line) + if mo: + llvm_version_minor = mo.group(1) + if llvm_version_major is not None and llvm_version_minor is not None: + llvm_version = distutils.version.LooseVersion('%s.%s' % (llvm_version_major, llvm_version_minor)) + if llvm_version is None: print 'scons: could not determine the LLVM version from %s' % llvm_config return @@ -99,7 +106,19 @@ def generate(env): ]) env.Prepend(LIBPATH = [os.path.join(llvm_dir, 'lib')]) # LIBS should match the output of `llvm-config --libs engine mcjit bitwriter x86asmprinter` - if llvm_version >= distutils.version.LooseVersion('3.5'): + if llvm_version >= distutils.version.LooseVersion('3.6'): + env.Prepend(LIBS = [ + 'LLVMBitWriter', 'LLVMX86Disassembler', 'LLVMX86AsmParser', + 'LLVMX86CodeGen', 'LLVMSelectionDAG', 'LLVMAsmPrinter', + 'LLVMCodeGen', 'LLVMScalarOpts', 'LLVMProfileData', + 'LLVMInstCombine', 'LLVMTransformUtils', 'LLVMipa', + 'LLVMAnalysis', 'LLVMX86Desc', 'LLVMMCDisassembler', + 'LLVMX86Info', 'LLVMX86AsmPrinter', 'LLVMX86Utils', + 'LLVMMCJIT', 'LLVMTarget', 'LLVMExecutionEngine', + 'LLVMRuntimeDyld', 'LLVMObject', 'LLVMMCParser', + 'LLVMBitReader', 'LLVMMC', 'LLVMCore', 'LLVMSupport' + ]) + elif llvm_version >= distutils.version.LooseVersion('3.5'): env.Prepend(LIBS = [ 'LLVMBitWriter', 'LLVMMCJIT', 'LLVMRuntimeDyld', 'LLVMX86Disassembler', 'LLVMX86AsmParser', 'LLVMX86CodeGen', From mattst88 at kemper.freedesktop.org Mon Apr 27 21:49:13 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Mon, 27 Apr 2015 14:49:13 -0700 (PDT) Subject: Mesa (master): i965/fs: Fix stride for multiply in macro. Message-ID: <20150427214913.6982F761E8@kemper.freedesktop.org> Module: Mesa Branch: master Commit: ecf428aa5945c5e1941b6bf496a381c7a09cdda2 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ecf428aa5945c5e1941b6bf496a381c7a09cdda2 Author: Matt Turner Date: Fri Apr 24 11:28:04 2015 -0700 i965/fs: Fix stride for multiply in macro. We have to use W/UW type for src1 of the multiply in the MUL/MACH macro, but in order to read the low 16-bits of each 32-bit integer, we need to set the appropriate stride. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index cf59570..f37fdea 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -916,8 +916,10 @@ fs_visitor::visit(ir_expression *ir) mul->src[1].type == BRW_REGISTER_TYPE_UD); if (mul->src[1].type == BRW_REGISTER_TYPE_D) { mul->src[1].type = BRW_REGISTER_TYPE_W; + mul->src[1].stride = 2; } else { mul->src[1].type = BRW_REGISTER_TYPE_UW; + mul->src[1].stride = 2; } } From mattst88 at kemper.freedesktop.org Mon Apr 27 21:49:13 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Mon, 27 Apr 2015 14:49:13 -0700 (PDT) Subject: Mesa (master): Revert "i965/fs: Allow SIMD16 borrow/carry/ 64-bit multiply on Gen > 7." Message-ID: <20150427214913.630F1761E7@kemper.freedesktop.org> Module: Mesa Branch: master Commit: b3e29a20225756c8db299088e0b6ead82139cabb URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b3e29a20225756c8db299088e0b6ead82139cabb Author: Matt Turner Date: Fri Apr 24 11:28:03 2015 -0700 Revert "i965/fs: Allow SIMD16 borrow/carry/64-bit multiply on Gen > 7." This reverts commit 9f5e5bd34d8ba48c851b442fb88f742b1ba6a571. I have no idea what made me believe these didn't apply to Gen > 7. They do, and without them we generate bad code that causes failures on Gen 8. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 11f38c2..cf59570 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -892,7 +892,7 @@ fs_visitor::visit(ir_expression *ir) } break; case ir_binop_imul_high: { - if (devinfo->gen == 7) + if (devinfo->gen >= 7) no16("SIMD16 explicit accumulator operands unsupported\n"); struct brw_reg acc = retype(brw_acc_reg(dispatch_width), @@ -929,7 +929,7 @@ fs_visitor::visit(ir_expression *ir) emit_math(SHADER_OPCODE_INT_QUOTIENT, this->result, op[0], op[1]); break; case ir_binop_carry: { - if (devinfo->gen == 7) + if (devinfo->gen >= 7) no16("SIMD16 explicit accumulator operands unsupported\n"); struct brw_reg acc = retype(brw_acc_reg(dispatch_width), @@ -940,7 +940,7 @@ fs_visitor::visit(ir_expression *ir) break; } case ir_binop_borrow: { - if (devinfo->gen == 7) + if (devinfo->gen >= 7) no16("SIMD16 explicit accumulator operands unsupported\n"); struct brw_reg acc = retype(brw_acc_reg(dispatch_width), From mattst88 at kemper.freedesktop.org Mon Apr 27 21:49:13 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Mon, 27 Apr 2015 14:49:13 -0700 (PDT) Subject: Mesa (master): i965/fs: Fix code emission for imul_high in NIR. Message-ID: <20150427214913.730A376250@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 0c06d019bcf626b289ae94ca791dc25c216c1e5c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=0c06d019bcf626b289ae94ca791dc25c216c1e5c Author: Matt Turner Date: Fri Apr 24 11:28:05 2015 -0700 i965/fs: Fix code emission for imul_high in NIR. Copy over from brw_fs_visitor.cpp. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 9564764..523e56d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -827,8 +827,30 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) struct brw_reg acc = retype(brw_acc_reg(dispatch_width), result.type); - emit(MUL(acc, op[0], op[1])); + fs_inst *mul = emit(MUL(acc, op[0], op[1])); emit(MACH(result, op[0], op[1])); + + /* Until Gen8, integer multiplies read 32-bits from one source, and + * 16-bits from the other, and relying on the MACH instruction to + * generate the high bits of the result. + * + * On Gen8, the multiply instruction does a full 32x32-bit multiply, + * but in order to do a 64x64-bit multiply we have to simulate the + * previous behavior and then use a MACH instruction. + * + * FINISHME: Don't use source modifiers on src1. + */ + if (devinfo->gen >= 8) { + assert(mul->src[1].type == BRW_REGISTER_TYPE_D || + mul->src[1].type == BRW_REGISTER_TYPE_UD); + if (mul->src[1].type == BRW_REGISTER_TYPE_D) { + mul->src[1].type = BRW_REGISTER_TYPE_W; + mul->src[1].stride = 2; + } else { + mul->src[1].type = BRW_REGISTER_TYPE_UW; + mul->src[1].stride = 2; + } + } break; } From mattst88 at kemper.freedesktop.org Mon Apr 27 21:49:13 2015 From: mattst88 at kemper.freedesktop.org (Matt Turner) Date: Mon, 27 Apr 2015 14:49:13 -0700 (PDT) Subject: Mesa (master): i965: Enable ARB_gpu_shader5 on Gen8+. Message-ID: <20150427214913.7A5EE76101@kemper.freedesktop.org> Module: Mesa Branch: master Commit: ff6ee39c19adc59225d1f0664695c669cd7e5a94 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ff6ee39c19adc59225d1f0664695c669cd7e5a94 Author: Matt Turner Date: Fri Apr 24 11:28:06 2015 -0700 i965: Enable ARB_gpu_shader5 on Gen8+. Reviewed-by: Kenneth Graunke --- docs/relnotes/10.6.0.html | 1 + src/mesa/drivers/dri/i965/intel_extensions.c | 8 ++------ 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/docs/relnotes/10.6.0.html b/docs/relnotes/10.6.0.html index 48f76f9..dbf1229 100644 --- a/docs/relnotes/10.6.0.html +++ b/docs/relnotes/10.6.0.html @@ -55,6 +55,7 @@ Note: some of the new features are only available with certain drivers.
    • GL_ARB_clip_control on i965
    • GL_ARB_program_interface_query (all drivers)
    • GL_ARB_texture_stencil8 on nv50, nvc0, r600, radeonsi, softpipe
    • +
    • GL_ARB_gpu_shader5 on i965/gen8+

    Bug fixes

    diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c index 48064e1..c28c171 100644 --- a/src/mesa/drivers/dri/i965/intel_extensions.c +++ b/src/mesa/drivers/dri/i965/intel_extensions.c @@ -303,6 +303,8 @@ intelInitExtensions(struct gl_context *ctx) if (brw->gen >= 7) { ctx->Extensions.ARB_conservative_depth = true; + ctx->Extensions.ARB_gpu_shader5 = true; + ctx->Extensions.ARB_shader_atomic_counters = true; ctx->Extensions.ARB_texture_view = true; if (can_do_pipelined_register_writes(brw)) { ctx->Extensions.ARB_transform_feedback2 = true; @@ -342,12 +344,6 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.ANGLE_texture_compression_dxt = true; - if (brw->gen >= 7) - ctx->Extensions.ARB_shader_atomic_counters = true; - - if (brw->gen == 7) - ctx->Extensions.ARB_gpu_shader5 = true; - ctx->Extensions.OES_texture_float = true; ctx->Extensions.OES_texture_float_linear = true; ctx->Extensions.OES_texture_half_float = true; From tarceri at kemper.freedesktop.org Mon Apr 27 21:49:46 2015 From: tarceri at kemper.freedesktop.org (Timothy Arceri) Date: Mon, 27 Apr 2015 14:49:46 -0700 (PDT) Subject: Mesa (master): glsl: fix packing support for arrays of doubles Message-ID: <20150427214946.BFC06761E7@kemper.freedesktop.org> Module: Mesa Branch: master Commit: d795cc6508cba209c66c658e39d5adbe2022f0fb URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d795cc6508cba209c66c658e39d5adbe2022f0fb Author: Timothy Arceri Date: Tue Apr 28 07:26:36 2015 +1000 glsl: fix packing support for arrays of doubles Broke in commit f00c5f85b82efe9535b18dbf97c4591fb28aeae6 when adding support for multidimensional arrays Reviewed-by: Ilia Mirkin --- src/glsl/lower_packed_varyings.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/glsl/lower_packed_varyings.cpp b/src/glsl/lower_packed_varyings.cpp index f8e79bd..d8bebb5 100644 --- a/src/glsl/lower_packed_varyings.cpp +++ b/src/glsl/lower_packed_varyings.cpp @@ -645,8 +645,8 @@ lower_packed_varyings_visitor::needs_lowering(ir_variable *var) if (var->data.explicit_location) return false; - if (var->type->without_array()->vector_elements == 4 && - !var->type->is_double()) + const glsl_type *type = var->type->without_array(); + if (type->vector_elements == 4 && !type->is_double()) return false; return true; } From kwg at kemper.freedesktop.org Mon Apr 27 21:59:37 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Mon, 27 Apr 2015 14:59:37 -0700 (PDT) Subject: Mesa (master): i965/vs: Remove unnecessary NULL check on generate_code() result. Message-ID: <20150427215937.7AFE5761E7@kemper.freedesktop.org> Module: Mesa Branch: master Commit: dffc1a0ae3a75d426f10c5d3ba021de977467929 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=dffc1a0ae3a75d426f10c5d3ba021de977467929 Author: Kenneth Graunke Date: Sat Apr 25 09:47:59 2015 -0700 i965/vs: Remove unnecessary NULL check on generate_code() result. Code generation is not allowed to fail for any reason - in fact, fs_generator has no mechanism for failing. The visitor is responsible for that. Signed-off-by: Kenneth Graunke Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_vec4.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 9398adc..8ce07637 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1868,8 +1868,7 @@ brw_vs_emit(struct brw_context *brw, g.generate_code(v.cfg, 8); assembly = g.get_assembly(final_assembly_size); - if (assembly) - prog_data->base.simd8 = true; + prog_data->base.simd8 = true; c->base.last_scratch = v.last_scratch; } From imirkin at kemper.freedesktop.org Tue Apr 28 00:21:19 2015 From: imirkin at kemper.freedesktop.org (Ilia Mirkin) Date: Mon, 27 Apr 2015 17:21:19 -0700 (PDT) Subject: Mesa (master): freedreno/a3xx: add support for disabling depth clipping Message-ID: <20150428002119.0A7D1761E7@kemper.freedesktop.org> Module: Mesa Branch: master Commit: f5c110199675b7f4c50d4a7a64b6aac44c55d0a4 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f5c110199675b7f4c50d4a7a64b6aac44c55d0a4 Author: Ilia Mirkin Date: Fri Apr 24 21:44:05 2015 -0400 freedreno/a3xx: add support for disabling depth clipping Signed-off-by: Ilia Mirkin --- docs/relnotes/10.6.0.html | 9 +++++---- src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c | 2 ++ src/gallium/drivers/freedreno/freedreno_screen.c | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/docs/relnotes/10.6.0.html b/docs/relnotes/10.6.0.html index dbf1229..5695ae4 100644 --- a/docs/relnotes/10.6.0.html +++ b/docs/relnotes/10.6.0.html @@ -45,17 +45,18 @@ Note: some of the new features are only available with certain drivers.
    • GL_AMD_pinned_memory on r600, radeonsi
    • +
    • GL_ARB_clip_control on i965
    • +
    • GL_ARB_depth_clamp on freedreno
    • GL_ARB_draw_indirect, GL_ARB_multi_draw_indirect on r600
    • GL_ARB_draw_instanced on freedreno
    • GL_ARB_gpu_shader_fp64 on nvc0, softpipe
    • +
    • GL_ARB_gpu_shader5 on i965/gen8+
    • GL_ARB_instanced_arrays on freedreno
    • GL_ARB_pipeline_statistics_query on i965, nv50, nvc0, r600, radeonsi, softpipe
    • -
    • GL_ARB_uniform_buffer_object on freedreno
    • -
    • GL_EXT_draw_buffers2 on freedreno
    • -
    • GL_ARB_clip_control on i965
    • GL_ARB_program_interface_query (all drivers)
    • GL_ARB_texture_stencil8 on nv50, nvc0, r600, radeonsi, softpipe
    • -
    • GL_ARB_gpu_shader5 on i965/gen8+
    • +
    • GL_ARB_uniform_buffer_object on freedreno
    • +
    • GL_EXT_draw_buffers2 on freedreno

    Bug fixes

    diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c b/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c index 94f6d6e..9c16804 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c @@ -97,6 +97,8 @@ fd3_rasterizer_state_create(struct pipe_context *pctx, if (cso->offset_tri) so->gras_su_mode_control |= A3XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET; + if (!cso->depth_clip) + so->gras_cl_clip_cntl |= A3XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE; return so; } diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index 15ae287..556c8ab 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -182,6 +182,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_INDEP_BLEND_ENABLE: case PIPE_CAP_INDEP_BLEND_FUNC: + case PIPE_CAP_DEPTH_CLIP_DISABLE: return is_a3xx(screen); case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: @@ -193,7 +194,6 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return (is_a3xx(screen) || is_a4xx(screen)) ? 130 : 120; /* Unsupported features. */ - case PIPE_CAP_DEPTH_CLIP_DISABLE: case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: From imirkin at kemper.freedesktop.org Tue Apr 28 00:21:19 2015 From: imirkin at kemper.freedesktop.org (Ilia Mirkin) Date: Mon, 27 Apr 2015 17:21:19 -0700 (PDT) Subject: Mesa (master): freedreno: add fd_transfer to wrap around pipe_transfer Message-ID: <20150428002119.10D36761E8@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 0a4cb00c7765dbe26a4dbfad3bb87d6c6ce03919 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=0a4cb00c7765dbe26a4dbfad3bb87d6c6ce03919 Author: Ilia Mirkin Date: Mon Apr 6 01:39:14 2015 -0400 freedreno: add fd_transfer to wrap around pipe_transfer Signed-off-by: Ilia Mirkin --- src/gallium/drivers/freedreno/freedreno_context.c | 2 +- src/gallium/drivers/freedreno/freedreno_resource.c | 4 +++- src/gallium/drivers/freedreno/freedreno_resource.h | 11 +++++++++++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c index 5fca57c..5d92da4 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.c +++ b/src/gallium/drivers/freedreno/freedreno_context.c @@ -222,7 +222,7 @@ fd_context_init(struct fd_context *ctx, struct pipe_screen *pscreen, util_dynarray_init(&ctx->draw_patches); - util_slab_create(&ctx->transfer_pool, sizeof(struct pipe_transfer), + util_slab_create(&ctx->transfer_pool, sizeof(struct fd_transfer), 16, UTIL_SLAB_SINGLETHREADED); fd_draw_init(pctx); diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c index 985b663..e8da68e 100644 --- a/src/gallium/drivers/freedreno/freedreno_resource.c +++ b/src/gallium/drivers/freedreno/freedreno_resource.c @@ -140,6 +140,7 @@ fd_resource_transfer_map(struct pipe_context *pctx, struct fd_context *ctx = fd_context(pctx); struct fd_resource *rsc = fd_resource(prsc); struct fd_resource_slice *slice = fd_resource_slice(rsc, level); + struct fd_transfer *trans; struct pipe_transfer *ptrans; enum pipe_format format = prsc->format; uint32_t op = 0; @@ -154,7 +155,8 @@ fd_resource_transfer_map(struct pipe_context *pctx, return NULL; /* util_slab_alloc() doesn't zero: */ - memset(ptrans, 0, sizeof(*ptrans)); + trans = fd_transfer(ptrans); + memset(trans, 0, sizeof(*trans)); pipe_resource_reference(&ptrans->resource, prsc); ptrans->level = level; diff --git a/src/gallium/drivers/freedreno/freedreno_resource.h b/src/gallium/drivers/freedreno/freedreno_resource.h index a2a540c..f80acb1 100644 --- a/src/gallium/drivers/freedreno/freedreno_resource.h +++ b/src/gallium/drivers/freedreno/freedreno_resource.h @@ -81,6 +81,17 @@ fd_resource(struct pipe_resource *ptex) return (struct fd_resource *)ptex; } +struct fd_transfer { + struct pipe_transfer base; + void *staging; +}; + +static INLINE struct fd_transfer * +fd_transfer(struct pipe_transfer *ptrans) +{ + return (struct fd_transfer *)ptrans; +} + static INLINE struct fd_resource_slice * fd_resource_slice(struct fd_resource *rsc, unsigned level) { From imirkin at kemper.freedesktop.org Tue Apr 28 00:21:19 2015 From: imirkin at kemper.freedesktop.org (Ilia Mirkin) Date: Mon, 27 Apr 2015 17:21:19 -0700 (PDT) Subject: Mesa (master): freedreno/a3xx: add support for S8 and Z32F_S8 Message-ID: <20150428002119.2DC73761E8@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 9fc3f472784b2ba53655b715d602268bef5bf12e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=9fc3f472784b2ba53655b715d602268bef5bf12e Author: Ilia Mirkin Date: Wed Apr 22 14:35:00 2015 -0400 freedreno/a3xx: add support for S8 and Z32F_S8 Enables ARB_depth_buffer_float. There is no sampling support for interleaved Z32F_S8, so we store the two textures separately, one as Z32F, the other as S8. As a result, we need a lot of additional logic for restores and transfers. Signed-off-by: Ilia Mirkin --- docs/relnotes/10.6.0.html | 1 + src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 13 ++- src/gallium/drivers/freedreno/a3xx/fd3_format.c | 7 ++ src/gallium/drivers/freedreno/a3xx/fd3_gmem.c | 80 +++++++++---- src/gallium/drivers/freedreno/freedreno_context.c | 8 +- src/gallium/drivers/freedreno/freedreno_draw.c | 13 ++- src/gallium/drivers/freedreno/freedreno_gmem.c | 29 +++-- src/gallium/drivers/freedreno/freedreno_gmem.h | 4 +- src/gallium/drivers/freedreno/freedreno_resource.c | 122 ++++++++++++++++++-- src/gallium/drivers/freedreno/freedreno_resource.h | 3 + 10 files changed, 236 insertions(+), 44 deletions(-) diff --git a/docs/relnotes/10.6.0.html b/docs/relnotes/10.6.0.html index 5695ae4..ff24842 100644 --- a/docs/relnotes/10.6.0.html +++ b/docs/relnotes/10.6.0.html @@ -46,6 +46,7 @@ Note: some of the new features are only available with certain drivers.
    • GL_AMD_pinned_memory on r600, radeonsi
    • GL_ARB_clip_control on i965
    • +
    • GL_ARB_depth_buffer_float on freedreno
    • GL_ARB_depth_clamp on freedreno
    • GL_ARB_draw_indirect, GL_ARB_multi_draw_indirect on r600
    • GL_ARB_draw_instanced on freedreno
    • diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index ee473e6..af08696 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -383,9 +383,17 @@ fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, } struct fd_resource *rsc = fd_resource(psurf[i]->texture); + enum pipe_format format = fd3_gmem_restore_format(psurf[i]->format); + /* The restore blit_zs shader expects stencil in sampler 0, and depth + * in sampler 1 + */ + if (rsc->stencil && i == 0) { + rsc = rsc->stencil; + format = fd3_gmem_restore_format(rsc->base.b.format); + } + unsigned lvl = psurf[i]->u.tex.level; struct fd_resource_slice *slice = fd_resource_slice(rsc, lvl); - enum pipe_format format = fd3_gmem_restore_format(psurf[i]->format); debug_assert(psurf[i]->u.tex.first_layer == psurf[i]->u.tex.last_layer); @@ -412,6 +420,9 @@ fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, for (i = 0; i < bufs; i++) { if (psurf[i]) { struct fd_resource *rsc = fd_resource(psurf[i]->texture); + /* Matches above logic for blit_zs shader */ + if (rsc->stencil && i == 0) + rsc = rsc->stencil; unsigned lvl = psurf[i]->u.tex.level; uint32_t offset = fd_resource_offset(rsc, lvl, psurf[i]->u.tex.first_layer); OUT_RELOC(ring, rsc->bo, offset, 0, 0); diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_format.c b/src/gallium/drivers/freedreno/a3xx/fd3_format.c index 76cb318..ec87aa9 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_format.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_format.c @@ -91,6 +91,8 @@ static struct fd3_format formats[PIPE_FORMAT_COUNT] = { _T(I8_UINT, 8_UINT, NONE, WZYX), _T(I8_SINT, 8_SINT, NONE, WZYX), + _T(S8_UINT, 8_UINT, R8_UNORM, WZYX), + /* 16-bit */ VT(R16_UNORM, 16_UNORM, NONE, WZYX), VT(R16_SNORM, 16_SNORM, NONE, WZYX), @@ -196,6 +198,7 @@ static struct fd3_format formats[PIPE_FORMAT_COUNT] = { _T(Z24X8_UNORM, X8Z24_UNORM, R8G8B8A8_UNORM, WZYX), _T(Z24_UNORM_S8_UINT, X8Z24_UNORM, R8G8B8A8_UNORM, WZYX), _T(Z32_FLOAT, Z32_FLOAT, R8G8B8A8_UNORM, WZYX), + _T(Z32_FLOAT_S8X24_UINT, Z32_FLOAT,R8G8B8A8_UNORM, WZYX), /* 48-bit */ V_(R16G16B16_UNORM, 16_16_16_UNORM, NONE, WZYX), @@ -296,6 +299,8 @@ fd3_pipe2swap(enum pipe_format format) enum a3xx_tex_fetchsize fd3_pipe2fetchsize(enum pipe_format format) { + if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) + format = PIPE_FORMAT_Z32_FLOAT; switch (util_format_get_blocksizebits(format)) { case 8: return TFETCH_1_BYTE; case 16: return TFETCH_2_BYTE; @@ -324,6 +329,8 @@ fd3_gmem_restore_format(enum pipe_format format) return PIPE_FORMAT_R8G8B8A8_UNORM; case PIPE_FORMAT_Z16_UNORM: return PIPE_FORMAT_R8G8_UNORM; + case PIPE_FORMAT_S8_UINT: + return PIPE_FORMAT_R8_UNORM; default: return format; } diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c index d76acb2..7d39757 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c @@ -72,12 +72,20 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, struct pipe_surface *psurf = bufs[i]; rsc = fd_resource(psurf->texture); - slice = fd_resource_slice(rsc, psurf->u.tex.level); - format = fd3_pipe2color(psurf->format); - swap = fd3_pipe2swap(psurf->format); pformat = psurf->format; + /* In case we're drawing to Z32F_S8, the "color" actually goes to + * the stencil + */ + if (rsc->stencil) { + rsc = rsc->stencil; + pformat = rsc->base.b.format; + bases++; + } + slice = fd_resource_slice(rsc, psurf->u.tex.level); + format = fd3_pipe2color(pformat); + swap = fd3_pipe2swap(pformat); if (decode_srgb) - srgb = util_format_is_srgb(psurf->format); + srgb = util_format_is_srgb(pformat); else pformat = util_format_linear(pformat); @@ -299,12 +307,17 @@ emit_binning_workaround(struct fd_context *ctx) static void emit_gmem2mem_surf(struct fd_context *ctx, - enum adreno_rb_copy_control_mode mode, - uint32_t base, struct pipe_surface *psurf) + enum adreno_rb_copy_control_mode mode, + bool stencil, + uint32_t base, struct pipe_surface *psurf) { struct fd_ringbuffer *ring = ctx->ring; struct fd_resource *rsc = fd_resource(psurf->texture); enum pipe_format format = psurf->format; + if (stencil) { + rsc = rsc->stencil; + format = rsc->base.b.format; + } struct fd_resource_slice *slice = fd_resource_slice(rsc, psurf->u.tex.level); uint32_t offset = fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer); @@ -322,10 +335,10 @@ emit_gmem2mem_surf(struct fd_context *ctx, OUT_RELOCW(ring, rsc->bo, offset, 0, -1); /* RB_COPY_DEST_BASE */ OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(slice->pitch * rsc->cpp)); OUT_RING(ring, A3XX_RB_COPY_DEST_INFO_TILE(LINEAR) | - A3XX_RB_COPY_DEST_INFO_FORMAT(fd3_pipe2color(psurf->format)) | + A3XX_RB_COPY_DEST_INFO_FORMAT(fd3_pipe2color(format)) | A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) | A3XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE) | - A3XX_RB_COPY_DEST_INFO_SWAP(fd3_pipe2swap(psurf->format))); + A3XX_RB_COPY_DEST_INFO_SWAP(fd3_pipe2swap(format))); fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, DI_SRC_SEL_AUTO_INDEX, 2, 0, INDEX_SIZE_IGN, 0, 0, NULL); @@ -421,9 +434,15 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile) fd3_program_emit(ring, &emit, 0, NULL); fd3_emit_vertex_bufs(ring, &emit); - if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) - emit_gmem2mem_surf(ctx, RB_COPY_DEPTH_STENCIL, - ctx->gmem.zsbuf_base, pfb->zsbuf); + if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { + struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); + if (!rsc->stencil || ctx->resolve & FD_BUFFER_DEPTH) + emit_gmem2mem_surf(ctx, RB_COPY_DEPTH_STENCIL, false, + ctx->gmem.zsbuf_base[0], pfb->zsbuf); + if (rsc->stencil && ctx->resolve & FD_BUFFER_STENCIL) + emit_gmem2mem_surf(ctx, RB_COPY_DEPTH_STENCIL, true, + ctx->gmem.zsbuf_base[1], pfb->zsbuf); + } if (ctx->resolve & FD_BUFFER_COLOR) { for (i = 0; i < pfb->nr_cbufs; i++) { @@ -431,7 +450,7 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile) continue; if (!(ctx->resolve & (PIPE_CLEAR_COLOR0 << i))) continue; - emit_gmem2mem_surf(ctx, RB_COPY_RESOLVE, + emit_gmem2mem_surf(ctx, RB_COPY_RESOLVE, false, ctx->gmem.cbuf_base[i], pfb->cbufs[i]); } } @@ -454,6 +473,7 @@ emit_mem2gmem_surf(struct fd_context *ctx, uint32_t bases[], struct pipe_surface **psurf, uint32_t bufs, uint32_t bin_w) { struct fd_ringbuffer *ring = ctx->ring; + struct pipe_surface *zsbufs[2]; assert(bufs > 0); @@ -464,7 +484,8 @@ emit_mem2gmem_surf(struct fd_context *ctx, uint32_t bases[], emit_mrt(ring, bufs, psurf, bases, bin_w, false); - if (psurf[0] && psurf[0]->format == PIPE_FORMAT_Z32_FLOAT) { + if (psurf[0] && (psurf[0]->format == PIPE_FORMAT_Z32_FLOAT || + psurf[0]->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)) { /* Depth is stored as unorm in gmem, so we have to write it in using a * special blit shader which writes depth. */ @@ -480,8 +501,18 @@ emit_mem2gmem_surf(struct fd_context *ctx, uint32_t bases[], A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(DEPTHX_32)); OUT_RING(ring, A3XX_RB_DEPTH_PITCH(4 * ctx->gmem.bin_w)); - OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(0), 1); - OUT_RING(ring, 0); + if (psurf[0]->format == PIPE_FORMAT_Z32_FLOAT) { + OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(0), 1); + OUT_RING(ring, 0); + } else { + /* The gmem_restore_tex logic will put the first buffer's stencil + * as color. Supply it with the proper information to make that + * happen. + */ + zsbufs[0] = zsbufs[1] = psurf[0]; + psurf = zsbufs; + bufs = 2; + } } else { OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1); OUT_RING(ring, A3XX_SP_FS_OUTPUT_REG_MRT(bufs - 1)); @@ -509,7 +540,7 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) .half_precision = (fd3_half_precision(pfb->cbufs[0]) && fd3_half_precision(pfb->cbufs[1]) && fd3_half_precision(pfb->cbufs[2]) && - fd3_half_precision(pfb->cbufs[3])), + fd3_half_precision(pfb->cbufs[3])) }, }; float x0, y0, x1, y1; @@ -592,6 +623,10 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) | A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP)); + OUT_PKT0(ring, REG_A3XX_RB_STENCIL_INFO, 2); + OUT_RING(ring, 0); /* RB_STENCIL_INFO */ + OUT_RING(ring, 0); /* RB_STENCIL_PITCH */ + OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | @@ -640,7 +675,7 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) emit.key.half_precision = false; } fd3_program_emit(ring, &emit, 1, &pfb->zsbuf); - emit_mem2gmem_surf(ctx, &gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w); + emit_mem2gmem_surf(ctx, gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w); } OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); @@ -950,14 +985,19 @@ fd3_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile) uint32_t reg; OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2); - reg = A3XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base); + reg = A3XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]); if (pfb->zsbuf) { reg |= A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format)); } OUT_RING(ring, reg); if (pfb->zsbuf) { - uint32_t cpp = util_format_get_blocksize(pfb->zsbuf->format); - OUT_RING(ring, A3XX_RB_DEPTH_PITCH(cpp * gmem->bin_w)); + struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); + OUT_RING(ring, A3XX_RB_DEPTH_PITCH(rsc->cpp * gmem->bin_w)); + if (rsc->stencil) { + OUT_PKT0(ring, REG_A3XX_RB_STENCIL_INFO, 2); + OUT_RING(ring, A3XX_RB_STENCIL_INFO_STENCIL_BASE(gmem->zsbuf_base[1])); + OUT_RING(ring, A3XX_RB_STENCIL_PITCH(rsc->stencil->cpp * gmem->bin_w)); + } } else { OUT_RING(ring, 0x00000000); } diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c index 5d92da4..668ef36 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.c +++ b/src/gallium/drivers/freedreno/freedreno_context.c @@ -121,8 +121,12 @@ fd_context_render(struct pipe_context *pctx) for (i = 0; i < pfb->nr_cbufs; i++) if (pfb->cbufs[i]) fd_resource(pfb->cbufs[i]->texture)->dirty = false; - if (pfb->zsbuf) - fd_resource(pfb->zsbuf->texture)->dirty = false; + if (pfb->zsbuf) { + rsc = fd_resource(pfb->zsbuf->texture); + rsc->dirty = false; + if (rsc->stencil) + rsc->stencil->dirty = false; + } /* go through all the used resources and clear their reading flag */ LIST_FOR_EACH_ENTRY_SAFE(rsc, rsc_tmp, &ctx->used_resources, list) { diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c index fed3e64..c9e317c 100644 --- a/src/gallium/drivers/freedreno/freedreno_draw.c +++ b/src/gallium/drivers/freedreno/freedreno_draw.c @@ -88,8 +88,12 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) } if (fd_stencil_enabled(ctx)) { + struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); buffers |= FD_BUFFER_STENCIL; - fd_resource(pfb->zsbuf->texture)->dirty = true; + if (rsc->stencil) + rsc->stencil->dirty = true; + else + rsc->dirty = true; ctx->gmem_reason |= FD_GMEM_STENCIL_ENABLED; } @@ -215,7 +219,12 @@ fd_clear(struct pipe_context *pctx, unsigned buffers, fd_resource(pfb->cbufs[i]->texture)->dirty = true; if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) { - fd_resource(pfb->zsbuf->texture)->dirty = true; + struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); + if (rsc->stencil && buffers & PIPE_CLEAR_STENCIL) + rsc->stencil->dirty = true; + if (!rsc->stencil || buffers & PIPE_CLEAR_DEPTH) + rsc->dirty = true; + ctx->gmem_reason |= FD_GMEM_CLEARS_DEPTH_STENCIL; } diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c index 473d2b8..11a1b62 100644 --- a/src/gallium/drivers/freedreno/freedreno_gmem.c +++ b/src/gallium/drivers/freedreno/freedreno_gmem.c @@ -77,7 +77,7 @@ static uint32_t bin_width(struct fd_context *ctx) } static uint32_t -total_size(uint8_t cbuf_cpp[], uint8_t zsbuf_cpp, +total_size(uint8_t cbuf_cpp[], uint8_t zsbuf_cpp[2], uint32_t bin_w, uint32_t bin_h, struct fd_gmem_stateobj *gmem) { uint32_t total = 0, i; @@ -89,9 +89,14 @@ total_size(uint8_t cbuf_cpp[], uint8_t zsbuf_cpp, } } - if (zsbuf_cpp) { - gmem->zsbuf_base = align(total, 0x4000); - total = gmem->zsbuf_base + zsbuf_cpp * bin_w * bin_h; + if (zsbuf_cpp[0]) { + gmem->zsbuf_base[0] = align(total, 0x4000); + total = gmem->zsbuf_base[0] + zsbuf_cpp[0] * bin_w * bin_h; + } + + if (zsbuf_cpp[1]) { + gmem->zsbuf_base[1] = align(total, 0x4000); + total = gmem->zsbuf_base[1] + zsbuf_cpp[1] * bin_w * bin_h; } return total; @@ -108,13 +113,17 @@ calculate_tiles(struct fd_context *ctx) uint32_t nbins_x = 1, nbins_y = 1; uint32_t bin_w, bin_h; uint32_t max_width = bin_width(ctx); - uint8_t cbuf_cpp[4] = {0}, zsbuf_cpp = 0; + uint8_t cbuf_cpp[4] = {0}, zsbuf_cpp[2] = {0}; uint32_t i, j, t, xoff, yoff; uint32_t tpp_x, tpp_y; bool has_zs = !!(ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)); - if (has_zs) - zsbuf_cpp = util_format_get_blocksize(pfb->zsbuf->format); + if (has_zs) { + struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); + zsbuf_cpp[0] = rsc->cpp; + if (rsc->stencil) + zsbuf_cpp[1] = rsc->stencil->cpp; + } for (i = 0; i < pfb->nr_cbufs; i++) { if (pfb->cbufs[i]) cbuf_cpp[i] = util_format_get_blocksize(pfb->cbufs[i]->format); @@ -122,7 +131,7 @@ calculate_tiles(struct fd_context *ctx) cbuf_cpp[i] = 4; } - if (gmem->zsbuf_cpp == zsbuf_cpp && + if (!memcmp(gmem->zsbuf_cpp, zsbuf_cpp, sizeof(zsbuf_cpp)) && !memcmp(gmem->cbuf_cpp, cbuf_cpp, sizeof(cbuf_cpp)) && !memcmp(&gmem->scissor, scissor, sizeof(gmem->scissor))) { /* everything is up-to-date */ @@ -156,7 +165,7 @@ calculate_tiles(struct fd_context *ctx) * constraints: */ DBG("binning input: cbuf cpp: %d %d %d %d, zsbuf cpp: %d; %dx%d", - cbuf_cpp[0], cbuf_cpp[1], cbuf_cpp[2], cbuf_cpp[3], zsbuf_cpp, + cbuf_cpp[0], cbuf_cpp[1], cbuf_cpp[2], cbuf_cpp[3], zsbuf_cpp[0], width, height); while (total_size(cbuf_cpp, zsbuf_cpp, bin_w, bin_h, gmem) > gmem_size) { if (bin_w > bin_h) { @@ -172,7 +181,7 @@ calculate_tiles(struct fd_context *ctx) gmem->scissor = *scissor; memcpy(gmem->cbuf_cpp, cbuf_cpp, sizeof(cbuf_cpp)); - gmem->zsbuf_cpp = zsbuf_cpp; + memcpy(gmem->zsbuf_cpp, zsbuf_cpp, sizeof(zsbuf_cpp)); gmem->bin_h = bin_h; gmem->bin_w = bin_w; gmem->nbins_x = nbins_x; diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.h b/src/gallium/drivers/freedreno/freedreno_gmem.h index 81f9b6a..5867235 100644 --- a/src/gallium/drivers/freedreno/freedreno_gmem.h +++ b/src/gallium/drivers/freedreno/freedreno_gmem.h @@ -48,9 +48,9 @@ struct fd_tile { struct fd_gmem_stateobj { struct pipe_scissor_state scissor; uint32_t cbuf_base[4]; - uint32_t zsbuf_base; + uint32_t zsbuf_base[2]; uint8_t cbuf_cpp[4]; - uint8_t zsbuf_cpp; + uint8_t zsbuf_cpp[2]; uint16_t bin_h, nbins_y; uint16_t bin_w, nbins_x; uint16_t minx, miny; diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c index e8da68e..95f79df 100644 --- a/src/gallium/drivers/freedreno/freedreno_resource.c +++ b/src/gallium/drivers/freedreno/freedreno_resource.c @@ -27,6 +27,7 @@ */ #include "util/u_format.h" +#include "util/u_format_zs.h" #include "util/u_inlines.h" #include "util/u_transfer.h" #include "util/u_string.h" @@ -101,16 +102,51 @@ realloc_bo(struct fd_resource *rsc, uint32_t size) util_range_set_empty(&rsc->valid_buffer_range); } +/* Currently this is only used for flushing Z32_S8 texture transfers, but + * eventually it should handle everything. + */ +static void +fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box) +{ + struct fd_resource *rsc = fd_resource(trans->base.resource); + struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level); + struct fd_resource_slice *sslice = fd_resource_slice(rsc->stencil, trans->base.level); + enum pipe_format format = trans->base.resource->format; + + float *depth = fd_bo_map(rsc->bo) + slice->offset + + (trans->base.box.y + box->y) * slice->pitch * 4 + (trans->base.box.x + box->x) * 4; + uint8_t *stencil = fd_bo_map(rsc->stencil->bo) + sslice->offset + + (trans->base.box.y + box->y) * sslice->pitch + trans->base.box.x + box->x; + + assert(format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT || + format == PIPE_FORMAT_X32_S8X24_UINT); + + if (format != PIPE_FORMAT_X32_S8X24_UINT) + util_format_z32_float_s8x24_uint_unpack_z_float( + depth, slice->pitch * 4, + trans->staging, trans->base.stride, + box->width, box->height); + + util_format_z32_float_s8x24_uint_unpack_s_8uint( + stencil, sslice->pitch, + trans->staging, trans->base.stride, + box->width, box->height); +} + static void fd_resource_transfer_flush_region(struct pipe_context *pctx, struct pipe_transfer *ptrans, const struct pipe_box *box) { struct fd_resource *rsc = fd_resource(ptrans->resource); + struct fd_transfer *trans = fd_transfer(ptrans); if (ptrans->resource->target == PIPE_BUFFER) util_range_add(&rsc->valid_buffer_range, ptrans->box.x + box->x, ptrans->box.x + box->x + box->width); + + if (trans->staging) + fd_resource_flush(trans, box); } static void @@ -119,8 +155,19 @@ fd_resource_transfer_unmap(struct pipe_context *pctx, { struct fd_context *ctx = fd_context(pctx); struct fd_resource *rsc = fd_resource(ptrans->resource); - if (!(ptrans->usage & PIPE_TRANSFER_UNSYNCHRONIZED)) + struct fd_transfer *trans = fd_transfer(ptrans); + + if (trans->staging && !(ptrans->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) { + struct pipe_box box; + u_box_2d(0, 0, ptrans->box.width, ptrans->box.height, &box); + fd_resource_flush(trans, &box); + } + + if (!(ptrans->usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { fd_bo_cpu_fini(rsc->bo); + if (rsc->stencil) + fd_bo_cpu_fini(rsc->stencil->bo); + } util_range_add(&rsc->valid_buffer_range, ptrans->box.x, @@ -128,6 +175,9 @@ fd_resource_transfer_unmap(struct pipe_context *pctx, pipe_resource_reference(&ptrans->resource, NULL); util_slab_free(&ctx->transfer_pool, ptrans); + + if (trans->staging) + free(trans->staging); } static void * @@ -148,7 +198,8 @@ fd_resource_transfer_map(struct pipe_context *pctx, char *buf; int ret = 0; - DBG("prsc=%p, level=%u, usage=%x", prsc, level, usage); + DBG("prsc=%p, level=%u, usage=%x, box=%dx%d+%d,%d", prsc, level, usage, + box->width, box->height, box->x, box->y); ptrans = util_slab_alloc(&ctx->transfer_pool); if (!ptrans) @@ -173,6 +224,8 @@ fd_resource_transfer_map(struct pipe_context *pctx, if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) { realloc_bo(rsc, fd_bo_size(rsc->bo)); + if (rsc->stencil) + realloc_bo(rsc->stencil, fd_bo_size(rsc->stencil->bo)); fd_invalidate_resource(ctx, prsc); } else if ((usage & PIPE_TRANSFER_WRITE) && prsc->target == PIPE_BUFFER && @@ -185,7 +238,7 @@ fd_resource_transfer_map(struct pipe_context *pctx, /* If the GPU is writing to the resource, or if it is reading from the * resource and we're trying to write to it, flush the renders. */ - if (rsc->dirty || + if (rsc->dirty || (rsc->stencil && rsc->stencil->dirty) || ((ptrans->usage & PIPE_TRANSFER_WRITE) && rsc->reading)) fd_context_render(pctx); @@ -204,8 +257,6 @@ fd_resource_transfer_map(struct pipe_context *pctx, return NULL; } - *pptrans = ptrans; - if (rsc->layer_first) { offset = slice->offset + box->y / util_format_get_blockheight(format) * ptrans->stride + @@ -218,6 +269,47 @@ fd_resource_transfer_map(struct pipe_context *pctx, box->z * slice->size0; } + if (prsc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT || + prsc->format == PIPE_FORMAT_X32_S8X24_UINT) { + trans->base.stride = trans->base.box.width * rsc->cpp * 2; + trans->staging = malloc(trans->base.stride * trans->base.box.height); + if (!trans->staging) + goto fail; + + /* if we're not discarding the whole range (or resource), we must copy + * the real data in. + */ + if (!(usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE | + PIPE_TRANSFER_DISCARD_RANGE))) { + struct fd_resource_slice *sslice = + fd_resource_slice(rsc->stencil, level); + void *sbuf = fd_bo_map(rsc->stencil->bo); + if (!sbuf) + goto fail; + + float *depth = (float *)(buf + slice->offset + + box->y * slice->pitch * 4 + box->x * 4); + uint8_t *stencil = sbuf + sslice->offset + + box->y * sslice->pitch + box->x; + + if (format != PIPE_FORMAT_X32_S8X24_UINT) + util_format_z32_float_s8x24_uint_pack_z_float( + trans->staging, trans->base.stride, + depth, slice->pitch * 4, + box->width, box->height); + + util_format_z32_float_s8x24_uint_pack_s_8uint( + trans->staging, trans->base.stride, + stencil, sslice->pitch, + box->width, box->height); + } + + buf = trans->staging; + offset = 0; + } + + *pptrans = ptrans; + return buf + offset; fail: @@ -347,7 +439,10 @@ fd_resource_create(struct pipe_screen *pscreen, util_range_init(&rsc->valid_buffer_range); rsc->base.vtbl = &fd_resource_vtbl; - rsc->cpp = util_format_get_blocksize(tmpl->format); + if (tmpl->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) + rsc->cpp = util_format_get_blocksize(PIPE_FORMAT_Z32_FLOAT); + else + rsc->cpp = util_format_get_blocksize(tmpl->format); assert(rsc->cpp); @@ -374,6 +469,19 @@ fd_resource_create(struct pipe_screen *pscreen, if (!rsc->bo) goto fail; + /* There is no native Z32F_S8 sampling or rendering format, so this must + * be emulated via two separate textures. The depth texture still keeps + * its Z32F_S8 format though, and we also keep a reference to a separate + * S8 texture. + */ + if (tmpl->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) { + struct pipe_resource stencil = *tmpl; + stencil.format = PIPE_FORMAT_S8_UINT; + rsc->stencil = fd_resource(fd_resource_create(pscreen, &stencil)); + if (!rsc->stencil) + goto fail; + } + return prsc; fail: fd_resource_destroy(pscreen, prsc); @@ -567,7 +675,7 @@ fd_flush_resource(struct pipe_context *pctx, struct pipe_resource *prsc) { struct fd_resource *rsc = fd_resource(prsc); - if (rsc->dirty) + if (rsc->dirty || (rsc->stencil && rsc->stencil->dirty)) fd_context_render(pctx); } diff --git a/src/gallium/drivers/freedreno/freedreno_resource.h b/src/gallium/drivers/freedreno/freedreno_resource.h index f80acb1..fdf3b8c 100644 --- a/src/gallium/drivers/freedreno/freedreno_resource.h +++ b/src/gallium/drivers/freedreno/freedreno_resource.h @@ -72,6 +72,9 @@ struct fd_resource { /* buffer range that has been initialized */ struct util_range valid_buffer_range; + /* reference to the resource holding stencil data for a z32_s8 texture */ + struct fd_resource *stencil; + struct list_head list; }; From imirkin at kemper.freedesktop.org Tue Apr 28 00:21:19 2015 From: imirkin at kemper.freedesktop.org (Ilia Mirkin) Date: Mon, 27 Apr 2015 17:21:19 -0700 (PDT) Subject: Mesa (master): freedreno/a3xx: add Z32F support Message-ID: <20150428002119.25035761E7@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 1571da6ac31ade482f5e4adc82eb66d42a1bb389 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1571da6ac31ade482f5e4adc82eb66d42a1bb389 Author: Ilia Mirkin Date: Sat Apr 25 01:21:26 2015 -0400 freedreno/a3xx: add Z32F support 32-bit depth buffers are stored as unorm, and thus need special handling when moving to and from gmem. They are copied into gmem by writing depth, and resolved from gmem using a special resolve bit which apparently float-ifies the data. Signed-off-by: Ilia Mirkin --- src/gallium/drivers/freedreno/a3xx/fd3_format.c | 2 +- src/gallium/drivers/freedreno/a3xx/fd3_gmem.c | 50 ++++++++++++++++++--- src/gallium/drivers/freedreno/freedreno_context.h | 1 + src/gallium/drivers/freedreno/freedreno_program.c | 19 ++++++-- src/gallium/drivers/freedreno/freedreno_util.c | 3 ++ 5 files changed, 65 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_format.c b/src/gallium/drivers/freedreno/a3xx/fd3_format.c index 939693d..76cb318 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_format.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_format.c @@ -195,7 +195,7 @@ static struct fd3_format formats[PIPE_FORMAT_COUNT] = { _T(Z24X8_UNORM, X8Z24_UNORM, R8G8B8A8_UNORM, WZYX), _T(Z24_UNORM_S8_UINT, X8Z24_UNORM, R8G8B8A8_UNORM, WZYX), - /*_T(Z32_FLOAT, Z32_FLOAT, R8G8B8A8_UNORM, WZYX),*/ + _T(Z32_FLOAT, Z32_FLOAT, R8G8B8A8_UNORM, WZYX), /* 48-bit */ V_(R16G16B16_UNORM, 16_16_16_UNORM, NONE, WZYX), diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c index 4e2eefa..d76acb2 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c @@ -304,6 +304,7 @@ emit_gmem2mem_surf(struct fd_context *ctx, { struct fd_ringbuffer *ring = ctx->ring; struct fd_resource *rsc = fd_resource(psurf->texture); + enum pipe_format format = psurf->format; struct fd_resource_slice *slice = fd_resource_slice(rsc, psurf->u.tex.level); uint32_t offset = fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer); @@ -313,7 +314,10 @@ emit_gmem2mem_surf(struct fd_context *ctx, OUT_PKT0(ring, REG_A3XX_RB_COPY_CONTROL, 4); OUT_RING(ring, A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) | A3XX_RB_COPY_CONTROL_MODE(mode) | - A3XX_RB_COPY_CONTROL_GMEM_BASE(base)); + A3XX_RB_COPY_CONTROL_GMEM_BASE(base) | + COND(format == PIPE_FORMAT_Z32_FLOAT || + format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT, + A3XX_RB_COPY_CONTROL_UNK12)); OUT_RELOCW(ring, rsc->bo, offset, 0, -1); /* RB_COPY_DEST_BASE */ OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(slice->pitch * rsc->cpp)); @@ -453,15 +457,35 @@ emit_mem2gmem_surf(struct fd_context *ctx, uint32_t bases[], assert(bufs > 0); - emit_mrt(ring, bufs, psurf, bases, bin_w, false); - OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE | A3XX_RB_MODE_CONTROL_MRT(bufs - 1)); - OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1); - OUT_RING(ring, A3XX_SP_FS_OUTPUT_REG_MRT(bufs - 1)); + emit_mrt(ring, bufs, psurf, bases, bin_w, false); + + if (psurf[0] && psurf[0]->format == PIPE_FORMAT_Z32_FLOAT) { + /* Depth is stored as unorm in gmem, so we have to write it in using a + * special blit shader which writes depth. + */ + OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1); + OUT_RING(ring, (A3XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z | + A3XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE | + A3XX_RB_DEPTH_CONTROL_Z_ENABLE | + A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE | + A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_ALWAYS))); + + OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2); + OUT_RING(ring, A3XX_RB_DEPTH_INFO_DEPTH_BASE(bases[0]) | + A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(DEPTHX_32)); + OUT_RING(ring, A3XX_RB_DEPTH_PITCH(4 * ctx->gmem.bin_w)); + + OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(0), 1); + OUT_RING(ring, 0); + } else { + OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1); + OUT_RING(ring, A3XX_SP_FS_OUTPUT_REG_MRT(bufs - 1)); + } fd3_emit_gmem_restore_tex(ring, psurf, bufs); @@ -600,7 +624,21 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) } if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { - emit.prog = &ctx->blit_prog[0]; + if (pfb->zsbuf->format != PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && + pfb->zsbuf->format != PIPE_FORMAT_Z32_FLOAT) { + /* Non-float can use a regular color write. It's split over 8-bit + * components, so half precision is always sufficient. + */ + emit.prog = &ctx->blit_prog[0]; + emit.key.half_precision = true; + } else { + /* Float depth needs special blit shader that writes depth */ + if (pfb->zsbuf->format == PIPE_FORMAT_Z32_FLOAT) + emit.prog = &ctx->blit_z; + else + emit.prog = &ctx->blit_zs; + emit.key.half_precision = false; + } fd3_program_emit(ring, &emit, 1, &pfb->zsbuf); emit_mem2gmem_surf(ctx, &gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w); } diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index a648689..e6a5f01 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -180,6 +180,7 @@ struct fd_context { /* shaders used by mem->gmem blits: */ struct fd_program_stateobj blit_prog[8]; // TODO move to screen? + struct fd_program_stateobj blit_z, blit_zs; /* do we need to mem2gmem before rendering. We don't, if for example, * there was a glClear() that invalidated the entire previous buffer diff --git a/src/gallium/drivers/freedreno/freedreno_program.c b/src/gallium/drivers/freedreno/freedreno_program.c index 52a165b..5e344e6 100644 --- a/src/gallium/drivers/freedreno/freedreno_program.c +++ b/src/gallium/drivers/freedreno/freedreno_program.c @@ -92,7 +92,7 @@ static void * assemble_tgsi(struct pipe_context *pctx, } static void * -fd_prog_blit(struct pipe_context *pctx, int rts) +fd_prog_blit(struct pipe_context *pctx, int rts, bool depth) { int i; struct ureg_src tc; @@ -105,6 +105,12 @@ fd_prog_blit(struct pipe_context *pctx, int rts) for (i = 0; i < rts; i++) ureg_TEX(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, i), TGSI_TEXTURE_2D, tc, ureg_DECL_sampler(ureg, i)); + if (depth) + ureg_TEX(ureg, + ureg_writemask( + ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0), + TGSI_WRITEMASK_Z), + TGSI_TEXTURE_2D, tc, ureg_DECL_sampler(ureg, rts)); ureg_END(ureg); @@ -128,11 +134,16 @@ void fd_prog_init(struct pipe_context *pctx) ctx->solid_prog.fp = assemble_tgsi(pctx, solid_fp, true); ctx->solid_prog.vp = assemble_tgsi(pctx, solid_vp, false); ctx->blit_prog[0].vp = assemble_tgsi(pctx, blit_vp, false); - ctx->blit_prog[0].fp = fd_prog_blit(pctx, 1); + ctx->blit_prog[0].fp = fd_prog_blit(pctx, 1, false); for (i = 1; i < ctx->screen->max_rts; i++) { ctx->blit_prog[i].vp = ctx->blit_prog[0].vp; - ctx->blit_prog[i].fp = fd_prog_blit(pctx, i + 1); + ctx->blit_prog[i].fp = fd_prog_blit(pctx, i + 1, false); } + + ctx->blit_z.vp = ctx->blit_prog[0].vp; + ctx->blit_z.fp = fd_prog_blit(pctx, 0, true); + ctx->blit_zs.vp = ctx->blit_prog[0].vp; + ctx->blit_zs.fp = fd_prog_blit(pctx, 1, true); } void fd_prog_fini(struct pipe_context *pctx) @@ -145,4 +156,6 @@ void fd_prog_fini(struct pipe_context *pctx) pctx->delete_vs_state(pctx, ctx->blit_prog[0].vp); for (i = 0; i < ctx->screen->max_rts; i++) pctx->delete_fs_state(pctx, ctx->blit_prog[i].fp); + pctx->delete_fs_state(pctx, ctx->blit_z.fp); + pctx->delete_fs_state(pctx, ctx->blit_zs.fp); } diff --git a/src/gallium/drivers/freedreno/freedreno_util.c b/src/gallium/drivers/freedreno/freedreno_util.c index 9892b05..2acce06 100644 --- a/src/gallium/drivers/freedreno/freedreno_util.c +++ b/src/gallium/drivers/freedreno/freedreno_util.c @@ -44,6 +44,9 @@ fd_pipe2depth(enum pipe_format format) case PIPE_FORMAT_X8Z24_UNORM: case PIPE_FORMAT_S8_UINT_Z24_UNORM: return DEPTHX_24_8; + case PIPE_FORMAT_Z32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + return DEPTHX_32; default: return ~0; } From imirkin at kemper.freedesktop.org Tue Apr 28 00:21:19 2015 From: imirkin at kemper.freedesktop.org (Ilia Mirkin) Date: Mon, 27 Apr 2015 17:21:19 -0700 (PDT) Subject: Mesa (master): freedreno/a3xx: color masking works like a blend for some formats Message-ID: <20150428002119.36541761E7@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 52614f59b7a9c4ae5efeacba26fa811568818811 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=52614f59b7a9c4ae5efeacba26fa811568818811 Author: Ilia Mirkin Date: Sat Apr 25 15:37:24 2015 -0400 freedreno/a3xx: color masking works like a blend for some formats When there is a colormask active that does not cover all the channels, enable reading in the destination like with a combining blend operation. This fixes fbo-blending-formats on a3xx. Signed-off-by: Ilia Mirkin --- src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index af08696..07cc226 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -704,6 +704,8 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, for (i = 0; i < ARRAY_SIZE(blend->rb_mrt); i++) { enum pipe_format format = pipe_surface_format(ctx->framebuffer.cbufs[i]); + const struct util_format_description *desc = + util_format_description(format); bool is_float = util_format_is_float(format); bool is_int = util_format_is_pure_integer(format); bool has_alpha = util_format_has_alpha(format); @@ -726,6 +728,18 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, control &= ~A3XX_RB_MRT_CONTROL_BLEND2; } + if (format && util_format_get_component_bits( + format, UTIL_FORMAT_COLORSPACE_RGB, 0) < 8) { + const struct pipe_rt_blend_state *rt; + if (ctx->blend->independent_blend_enable) + rt = &ctx->blend->rt[i]; + else + rt = &ctx->blend->rt[0]; + + if (!util_format_colormask_full(desc, rt->colormask)) + control |= A3XX_RB_MRT_CONTROL_READ_DEST_ENABLE; + } + OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1); OUT_RING(ring, control); From imirkin at kemper.freedesktop.org Tue Apr 28 01:34:36 2015 From: imirkin at kemper.freedesktop.org (Ilia Mirkin) Date: Mon, 27 Apr 2015 18:34:36 -0700 (PDT) Subject: Mesa (master): gk110/ir: add support for writing per-patch and shader outputs Message-ID: <20150428013436.6FA03761E7@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 89e0b08794a56e2ef78e8573a8c11e0cc4589f9e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=89e0b08794a56e2ef78e8573a8c11e0cc4589f9e Author: Ilia Mirkin Date: Mon Apr 27 12:54:43 2015 -0400 gk110/ir: add support for writing per-patch and shader outputs Signed-off-by: Ilia Mirkin --- src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp index 34cb06a..a73bee2 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp @@ -1347,12 +1347,10 @@ CodeEmitterGK110::emitVFETCH(const Instruction *i) code[1] = 0x7ec00000 | (offset >> 9); code[1] |= (size / 4 - 1) << 18; -#if 0 if (i->perPatch) - code[0] |= 0x100; + code[1] |= 0x4; if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT) - code[0] |= 0x200; // yes, TCPs can read from *outputs* of other threads -#endif + code[1] |= 0x8; // yes, TCPs can read from *outputs* of other threads emitPredicate(i); @@ -1371,10 +1369,8 @@ CodeEmitterGK110::emitEXPORT(const Instruction *i) code[1] = 0x7f000000 | (offset >> 9); code[1] |= (size / 4 - 1) << 18; -#if 0 if (i->perPatch) - code[0] |= 0x100; -#endif + code[1] |= 0x4; emitPredicate(i); From imirkin at kemper.freedesktop.org Tue Apr 28 01:34:36 2015 From: imirkin at kemper.freedesktop.org (Ilia Mirkin) Date: Mon, 27 Apr 2015 18:34:36 -0700 (PDT) Subject: Mesa (master): gm107/ir: add lane/vertex count sysvals Message-ID: <20150428013436.75FFC761E8@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 9143940da2c4f0deb07d01c1b48d16bb16022997 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=9143940da2c4f0deb07d01c1b48d16bb16022997 Author: Ilia Mirkin Date: Tue Jul 22 23:45:13 2014 -0400 gm107/ir: add lane/vertex count sysvals Signed-off-by: Ilia Mirkin --- src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp index ee0487f..22db368 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp @@ -240,6 +240,8 @@ CodeEmitterGM107::emitSYS(int pos, const Value *val) int id = val ? val->reg.data.id : -1; switch (id) { + case SV_LANEID : id = 0x00; break; + case SV_VERTEX_COUNT : id = 0x10; break; case SV_INVOCATION_ID : id = 0x11; break; case SV_INVOCATION_INFO: id = 0x1d; break; default: From mareko at kemper.freedesktop.org Tue Apr 28 14:06:02 2015 From: mareko at kemper.freedesktop.org (Marek Olšák) Date: Tue, 28 Apr 2015 07:06:02 -0700 (PDT) Subject: Mesa (master): r600g,radeonsi: add a driver query returning GPU load Message-ID: <20150428140602.9D26E761E8@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 6d05396b0047c74d740c53156eda1a8574403498 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6d05396b0047c74d740c53156eda1a8574403498 Author: Marek Ol??k Date: Tue Feb 24 01:26:13 2015 +0100 r600g,radeonsi: add a driver query returning GPU load Reviewed-by: Alex Deucher --- src/gallium/drivers/radeon/Makefile.am | 4 +- src/gallium/drivers/radeon/Makefile.sources | 1 + src/gallium/drivers/radeon/r600_gpu_load.c | 141 +++++++++++++++++++++ src/gallium/drivers/radeon/r600_pipe_common.c | 7 +- src/gallium/drivers/radeon/r600_pipe_common.h | 13 ++ src/gallium/drivers/radeon/r600_query.c | 11 ++ src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 16 +++ src/gallium/winsys/radeon/drm/radeon_winsys.h | 3 + 8 files changed, 194 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/radeon/Makefile.am b/src/gallium/drivers/radeon/Makefile.am index ef6b999..13d8976 100644 --- a/src/gallium/drivers/radeon/Makefile.am +++ b/src/gallium/drivers/radeon/Makefile.am @@ -4,7 +4,9 @@ include $(top_srcdir)/src/gallium/Automake.inc AM_CFLAGS = \ $(GALLIUM_DRIVER_CFLAGS) \ - $(RADEON_CFLAGS) + $(RADEON_CFLAGS) \ + -Wstrict-overflow=0 +# ^^ disable warnings about overflows (os_time_timeout) noinst_LTLIBRARIES = libradeon.la diff --git a/src/gallium/drivers/radeon/Makefile.sources b/src/gallium/drivers/radeon/Makefile.sources index f2b70a1..469f6d1 100644 --- a/src/gallium/drivers/radeon/Makefile.sources +++ b/src/gallium/drivers/radeon/Makefile.sources @@ -3,6 +3,7 @@ C_SOURCES := \ r600_buffer_common.c \ r600_cs.h \ r600d_common.h \ + r600_gpu_load.c \ r600_pipe_common.c \ r600_pipe_common.h \ r600_query.c \ diff --git a/src/gallium/drivers/radeon/r600_gpu_load.c b/src/gallium/drivers/radeon/r600_gpu_load.c new file mode 100644 index 0000000..a653834 --- /dev/null +++ b/src/gallium/drivers/radeon/r600_gpu_load.c @@ -0,0 +1,141 @@ +/* + * Copyright 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: Marek Ol??k + * + */ + +/* The GPU load is measured as follows. + * + * There is a thread which samples the GRBM_STATUS register at a certain + * frequency and the "busy" or "idle" counter is incremented based on + * whether the GUI_ACTIVE bit is set or not. + * + * Then, the user can sample the counters twice and calculate the average + * GPU load between the two samples. + */ + +#include "r600_pipe_common.h" +#include "os/os_time.h" + +/* For good accuracy at 1000 fps or lower. This will be inaccurate for higher + * fps (there are too few samples per frame). */ +#define SAMPLES_PER_SEC 10000 + +#define GRBM_STATUS 0x8010 +#define GUI_ACTIVE(x) (((x) >> 31) & 0x1) + +static bool r600_is_gpu_busy(struct r600_common_screen *rscreen) +{ + uint32_t value = 0; + + rscreen->ws->read_registers(rscreen->ws, GRBM_STATUS, 1, &value); + return GUI_ACTIVE(value); +} + +static PIPE_THREAD_ROUTINE(r600_gpu_load_thread, param) +{ + struct r600_common_screen *rscreen = (struct r600_common_screen*)param; + const int period_us = 1000000 / SAMPLES_PER_SEC; + int sleep_us = period_us; + int64_t cur_time, last_time = os_time_get(); + + while (!p_atomic_read(&rscreen->gpu_load_stop_thread)) { + if (sleep_us) + os_time_sleep(sleep_us); + + /* Make sure we sleep the ideal amount of time to match + * the expected frequency. */ + cur_time = os_time_get(); + + if (os_time_timeout(last_time, last_time + period_us, + cur_time)) + sleep_us = MAX2(sleep_us - 1, 1); + else + sleep_us += 1; + + /*printf("Hz: %.1f\n", 1000000.0 / (cur_time - last_time));*/ + last_time = cur_time; + + /* Update the counters. */ + if (r600_is_gpu_busy(rscreen)) + p_atomic_inc(&rscreen->gpu_load_counter_busy); + else + p_atomic_inc(&rscreen->gpu_load_counter_idle); + } + p_atomic_dec(&rscreen->gpu_load_stop_thread); + return 0; +} + +void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen) +{ + if (!rscreen->gpu_load_thread) + return; + + p_atomic_inc(&rscreen->gpu_load_stop_thread); + pipe_thread_wait(rscreen->gpu_load_thread); + rscreen->gpu_load_thread = 0; +} + +static uint64_t r600_gpu_load_read_counter(struct r600_common_screen *rscreen) +{ + /* Start the thread if needed. */ + if (!rscreen->gpu_load_thread) { + pipe_mutex_lock(rscreen->gpu_load_mutex); + /* Check again inside the mutex. */ + if (!rscreen->gpu_load_thread) + rscreen->gpu_load_thread = + pipe_thread_create(r600_gpu_load_thread, rscreen); + pipe_mutex_unlock(rscreen->gpu_load_mutex); + } + + /* The busy counter is in the lower 32 bits. + * The idle counter is in the upper 32 bits. */ + return p_atomic_read(&rscreen->gpu_load_counter_busy) | + ((uint64_t)p_atomic_read(&rscreen->gpu_load_counter_idle) << 32); +} + +/** + * Just return the counters. + */ +uint64_t r600_gpu_load_begin(struct r600_common_screen *rscreen) +{ + return r600_gpu_load_read_counter(rscreen); +} + +unsigned r600_gpu_load_end(struct r600_common_screen *rscreen, uint64_t begin) +{ + uint64_t end = r600_gpu_load_read_counter(rscreen); + unsigned busy = (end & 0xffffffff) - (begin & 0xffffffff); + unsigned idle = (end >> 32) - (begin >> 32); + + /* Calculate the GPU load. + * + * If no counters have been incremented, return the current load. + * It's for the case when the load is queried faster than + * the counters are updated. + */ + if (idle || busy) + return busy*100 / (busy + idle); + else + return r600_is_gpu_busy(rscreen) ? 100 : 0; +} diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index 97eed13..eddb37d 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -667,7 +667,8 @@ static int r600_get_driver_query_info(struct pipe_screen *screen, {"GTT-usage", R600_QUERY_GTT_USAGE, rscreen->info.gart_size, TRUE}, {"temperature", R600_QUERY_GPU_TEMPERATURE, 100, FALSE}, {"shader-clock", R600_QUERY_CURRENT_GPU_SCLK, 0, FALSE}, - {"memory-clock", R600_QUERY_CURRENT_GPU_MCLK, 0, FALSE} + {"memory-clock", R600_QUERY_CURRENT_GPU_MCLK, 0, FALSE}, + {"GPU-load", R600_QUERY_GPU_LOAD, 100, FALSE} }; unsigned num_queries; @@ -872,6 +873,7 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen, } util_format_s3tc_init(); pipe_mutex_init(rscreen->aux_context_lock); + pipe_mutex_init(rscreen->gpu_load_mutex); if (rscreen->info.drm_minor >= 28 && (rscreen->debug_flags & DBG_TRACE_CS)) { rscreen->trace_bo = (struct r600_resource*)pipe_buffer_create(&rscreen->b, @@ -915,6 +917,9 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen, void r600_destroy_common_screen(struct r600_common_screen *rscreen) { + r600_gpu_load_kill_thread(rscreen); + + pipe_mutex_destroy(rscreen->gpu_load_mutex); pipe_mutex_destroy(rscreen->aux_context_lock); rscreen->aux_context->destroy(rscreen->aux_context); diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index c23072c..faa6e0d 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -58,6 +58,7 @@ #define R600_QUERY_GPU_TEMPERATURE (PIPE_QUERY_DRIVER_SPECIFIC + 8) #define R600_QUERY_CURRENT_GPU_SCLK (PIPE_QUERY_DRIVER_SPECIFIC + 9) #define R600_QUERY_CURRENT_GPU_MCLK (PIPE_QUERY_DRIVER_SPECIFIC + 10) +#define R600_QUERY_GPU_LOAD (PIPE_QUERY_DRIVER_SPECIFIC + 11) #define R600_CONTEXT_STREAMOUT_FLUSH (1u << 0) #define R600_CONTEXT_PRIVATE_FLAG (1u << 1) @@ -283,6 +284,13 @@ struct r600_common_screen { struct r600_resource *trace_bo; uint32_t *trace_ptr; unsigned cs_count; + + /* GPU load thread. */ + pipe_mutex gpu_load_mutex; + pipe_thread gpu_load_thread; + unsigned gpu_load_counter_busy; + unsigned gpu_load_counter_idle; + unsigned gpu_load_stop_thread; /* bool */ }; /* This encapsulates a state or an operation which can emitted into the GPU @@ -478,6 +486,11 @@ struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen, const char *r600_get_llvm_processor_name(enum radeon_family family); void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw); +/* r600_gpu_load.c */ +void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen); +uint64_t r600_gpu_load_begin(struct r600_common_screen *rscreen); +unsigned r600_gpu_load_end(struct r600_common_screen *rscreen, uint64_t begin); + /* r600_query.c */ void r600_query_init(struct r600_common_context *rctx); void r600_suspend_nontimer_queries(struct r600_common_context *ctx); diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c index 758064a..1335087 100644 --- a/src/gallium/drivers/radeon/r600_query.c +++ b/src/gallium/drivers/radeon/r600_query.c @@ -89,6 +89,7 @@ static struct r600_resource *r600_new_query_buffer(struct r600_common_context *c case R600_QUERY_GPU_TEMPERATURE: case R600_QUERY_CURRENT_GPU_SCLK: case R600_QUERY_CURRENT_GPU_MCLK: + case R600_QUERY_GPU_LOAD: return NULL; } @@ -388,6 +389,7 @@ static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned q case R600_QUERY_GPU_TEMPERATURE: case R600_QUERY_CURRENT_GPU_SCLK: case R600_QUERY_CURRENT_GPU_MCLK: + case R600_QUERY_GPU_LOAD: skip_allocation = true; break; default: @@ -459,6 +461,9 @@ static void r600_begin_query(struct pipe_context *ctx, struct pipe_query *query) case R600_QUERY_NUM_BYTES_MOVED: rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_BYTES_MOVED); return; + case R600_QUERY_GPU_LOAD: + rquery->begin_result = r600_gpu_load_begin(rctx->screen); + return; } /* Discard the old query buffers. */ @@ -531,6 +536,9 @@ static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query) case R600_QUERY_CURRENT_GPU_MCLK: rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_CURRENT_MCLK) * 1000000; return; + case R600_QUERY_GPU_LOAD: + rquery->end_result = r600_gpu_load_end(rctx->screen, rquery->begin_result); + return; } r600_emit_query_end(rctx, rquery); @@ -593,6 +601,9 @@ static boolean r600_get_query_buffer_result(struct r600_common_context *ctx, case R600_QUERY_CURRENT_GPU_MCLK: result->u64 = query->end_result - query->begin_result; return TRUE; + case R600_QUERY_GPU_LOAD: + result->u64 = query->end_result; + return TRUE; } map = r600_buffer_map_sync_with_rings(ctx, qbuf->buf, diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index b3e3cb2..a6f847f 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -582,6 +582,21 @@ static uint64_t radeon_query_value(struct radeon_winsys *rws, return 0; } +static void radeon_read_registers(struct radeon_winsys *rws, + unsigned reg_offset, + unsigned num_registers, uint32_t *out) +{ + struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws; + unsigned i; + + for (i = 0; i < num_registers; i++) { + uint32_t reg = reg_offset + i*4; + + radeon_get_drm_value(ws->fd, RADEON_INFO_READ_REG, "read-reg", ®); + out[i] = reg; + } +} + static unsigned hash_fd(void *key) { int fd = pointer_to_intptr(key); @@ -728,6 +743,7 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create) ws->base.surface_init = radeon_drm_winsys_surface_init; ws->base.surface_best = radeon_drm_winsys_surface_best; ws->base.query_value = radeon_query_value; + ws->base.read_registers = radeon_read_registers; radeon_bomgr_init_functions(ws); radeon_drm_cs_init_functions(ws); diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index 996a201..ee0a904 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -589,6 +589,9 @@ struct radeon_winsys { uint64_t (*query_value)(struct radeon_winsys *ws, enum radeon_value_id value); + + void (*read_registers)(struct radeon_winsys *ws, unsigned reg_offset, + unsigned num_registers, uint32_t *out); }; From mareko at kemper.freedesktop.org Tue Apr 28 14:06:02 2015 From: mareko at kemper.freedesktop.org (Marek Olšák) Date: Tue, 28 Apr 2015 07:06:02 -0700 (PDT) Subject: Mesa (master): r600g,radeonsi: add driver queries for GPU temperature and shader+memory clocks Message-ID: <20150428140602.922F4761E8@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 0b8e73a6ae2a77d0e9a7810cca5b181ba5f0893e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=0b8e73a6ae2a77d0e9a7810cca5b181ba5f0893e Author: Marek Ol??k Date: Tue Feb 24 00:50:20 2015 +0100 r600g,radeonsi: add driver queries for GPU temperature and shader+memory clocks Reviewed-by: Alex Deucher --- src/gallium/drivers/radeon/r600_pipe_common.c | 13 +++++++++++-- src/gallium/drivers/radeon/r600_pipe_common.h | 3 +++ src/gallium/drivers/radeon/r600_query.c | 21 +++++++++++++++++++++ src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 19 +++++++++++++++++++ src/gallium/winsys/radeon/drm/radeon_winsys.h | 5 ++++- 5 files changed, 58 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index c6d7918..97eed13 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -665,12 +665,21 @@ static int r600_get_driver_query_info(struct pipe_screen *screen, {"num-bytes-moved", R600_QUERY_NUM_BYTES_MOVED, 0, TRUE}, {"VRAM-usage", R600_QUERY_VRAM_USAGE, rscreen->info.vram_size, TRUE}, {"GTT-usage", R600_QUERY_GTT_USAGE, rscreen->info.gart_size, TRUE}, + {"temperature", R600_QUERY_GPU_TEMPERATURE, 100, FALSE}, + {"shader-clock", R600_QUERY_CURRENT_GPU_SCLK, 0, FALSE}, + {"memory-clock", R600_QUERY_CURRENT_GPU_MCLK, 0, FALSE} }; + unsigned num_queries; + + if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42) + num_queries = Elements(list); + else + num_queries = 8; if (!info) - return Elements(list); + return num_queries; - if (index >= Elements(list)) + if (index >= num_queries) return 0; *info = list[index]; diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 384a9a6..c23072c 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -55,6 +55,9 @@ #define R600_QUERY_NUM_BYTES_MOVED (PIPE_QUERY_DRIVER_SPECIFIC + 5) #define R600_QUERY_VRAM_USAGE (PIPE_QUERY_DRIVER_SPECIFIC + 6) #define R600_QUERY_GTT_USAGE (PIPE_QUERY_DRIVER_SPECIFIC + 7) +#define R600_QUERY_GPU_TEMPERATURE (PIPE_QUERY_DRIVER_SPECIFIC + 8) +#define R600_QUERY_CURRENT_GPU_SCLK (PIPE_QUERY_DRIVER_SPECIFIC + 9) +#define R600_QUERY_CURRENT_GPU_MCLK (PIPE_QUERY_DRIVER_SPECIFIC + 10) #define R600_CONTEXT_STREAMOUT_FLUSH (1u << 0) #define R600_CONTEXT_PRIVATE_FLAG (1u << 1) diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c index 6a35ab8..758064a 100644 --- a/src/gallium/drivers/radeon/r600_query.c +++ b/src/gallium/drivers/radeon/r600_query.c @@ -86,6 +86,9 @@ static struct r600_resource *r600_new_query_buffer(struct r600_common_context *c case R600_QUERY_NUM_BYTES_MOVED: case R600_QUERY_VRAM_USAGE: case R600_QUERY_GTT_USAGE: + case R600_QUERY_GPU_TEMPERATURE: + case R600_QUERY_CURRENT_GPU_SCLK: + case R600_QUERY_CURRENT_GPU_MCLK: return NULL; } @@ -382,6 +385,9 @@ static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned q case R600_QUERY_NUM_BYTES_MOVED: case R600_QUERY_VRAM_USAGE: case R600_QUERY_GTT_USAGE: + case R600_QUERY_GPU_TEMPERATURE: + case R600_QUERY_CURRENT_GPU_SCLK: + case R600_QUERY_CURRENT_GPU_MCLK: skip_allocation = true; break; default: @@ -439,6 +445,9 @@ static void r600_begin_query(struct pipe_context *ctx, struct pipe_query *query) case R600_QUERY_REQUESTED_GTT: case R600_QUERY_VRAM_USAGE: case R600_QUERY_GTT_USAGE: + case R600_QUERY_GPU_TEMPERATURE: + case R600_QUERY_CURRENT_GPU_SCLK: + case R600_QUERY_CURRENT_GPU_MCLK: rquery->begin_result = 0; return; case R600_QUERY_BUFFER_WAIT_TIME: @@ -513,6 +522,15 @@ static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query) case R600_QUERY_GTT_USAGE: rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_GTT_USAGE); return; + case R600_QUERY_GPU_TEMPERATURE: + rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_GPU_TEMPERATURE) / 1000; + return; + case R600_QUERY_CURRENT_GPU_SCLK: + rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_CURRENT_SCLK) * 1000000; + return; + case R600_QUERY_CURRENT_GPU_MCLK: + rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_CURRENT_MCLK) * 1000000; + return; } r600_emit_query_end(rctx, rquery); @@ -570,6 +588,9 @@ static boolean r600_get_query_buffer_result(struct r600_common_context *ctx, case R600_QUERY_NUM_BYTES_MOVED: case R600_QUERY_VRAM_USAGE: case R600_QUERY_GTT_USAGE: + case R600_QUERY_GPU_TEMPERATURE: + case R600_QUERY_CURRENT_GPU_SCLK: + case R600_QUERY_CURRENT_GPU_MCLK: result->u64 = query->end_result - query->begin_result; return TRUE; } diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index 2b12f4d..b3e3cb2 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -49,6 +49,13 @@ #define RADEON_INFO_ACTIVE_CU_COUNT 0x20 #endif +#ifndef RADEON_INFO_CURRENT_GPU_TEMP +#define RADEON_INFO_CURRENT_GPU_TEMP 0x21 +#define RADEON_INFO_CURRENT_GPU_SCLK 0x22 +#define RADEON_INFO_CURRENT_GPU_MCLK 0x23 +#define RADEON_INFO_READ_REG 0x24 +#endif + static struct util_hash_table *fd_tab = NULL; pipe_static_mutex(fd_tab_mutex); @@ -559,6 +566,18 @@ static uint64_t radeon_query_value(struct radeon_winsys *rws, radeon_get_drm_value(ws->fd, RADEON_INFO_GTT_USAGE, "gtt-usage", (uint32_t*)&retval); return retval; + case RADEON_GPU_TEMPERATURE: + radeon_get_drm_value(ws->fd, RADEON_INFO_CURRENT_GPU_TEMP, + "gpu-temp", (uint32_t*)&retval); + return retval; + case RADEON_CURRENT_SCLK: + radeon_get_drm_value(ws->fd, RADEON_INFO_CURRENT_GPU_SCLK, + "current-gpu-sclk", (uint32_t*)&retval); + return retval; + case RADEON_CURRENT_MCLK: + radeon_get_drm_value(ws->fd, RADEON_INFO_CURRENT_GPU_MCLK, + "current-gpu-mclk", (uint32_t*)&retval); + return retval; } return 0; } diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index a8cc60a..996a201 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -169,7 +169,10 @@ enum radeon_value_id { RADEON_NUM_CS_FLUSHES, RADEON_NUM_BYTES_MOVED, RADEON_VRAM_USAGE, - RADEON_GTT_USAGE + RADEON_GTT_USAGE, + RADEON_GPU_TEMPERATURE, + RADEON_CURRENT_SCLK, + RADEON_CURRENT_MCLK }; enum radeon_bo_priority { From mareko at kemper.freedesktop.org Tue Apr 28 15:45:01 2015 From: mareko at kemper.freedesktop.org (Marek Olšák) Date: Tue, 28 Apr 2015 08:45:01 -0700 (PDT) Subject: Mesa (master): gallium/util: get h264 level based on number of max references and resolution Message-ID: <20150428154501.38B19761E8@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 4509fc8b94a0dcf8a847e1f885685a4d2d660c5a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4509fc8b94a0dcf8a847e1f885685a4d2d660c5a Author: Leo Liu Date: Thu Mar 12 14:01:52 2015 -0400 gallium/util: get h264 level based on number of max references and resolution v2: add commments for limitation of max references numbers, and what the caculation is based Signed-off-by: Leo Liu Reviewed-by: Christian K?nig --- src/gallium/auxiliary/util/u_video.h | 36 ++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/src/gallium/auxiliary/util/u_video.h b/src/gallium/auxiliary/util/u_video.h index 45b2d6e..b4743d1 100644 --- a/src/gallium/auxiliary/util/u_video.h +++ b/src/gallium/auxiliary/util/u_video.h @@ -38,6 +38,7 @@ extern "C" { /* u_reduce_video_profile() needs these */ #include "pipe/p_compiler.h" #include "util/u_debug.h" +#include "util/u_math.h" static INLINE enum pipe_video_format u_reduce_video_profile(enum pipe_video_profile profile) @@ -146,6 +147,41 @@ u_copy_swap422_packed(void *const *destination_data, } } +static INLINE uint32_t +u_get_h264_level(uint32_t width, uint32_t height, uint32_t *max_reference) +{ + uint32_t max_dpb_mbs; + + width = align(width, 16); + height = align(height, 16); + + /* Max references will be used for caculation of number of DPB buffers + in the UVD driver, limitation of max references is 16. Some client + like mpv application for VA-API, it requires references more than that, + so we have to set max of references to 16 here. */ + *max_reference = MIN2(*max_reference, 16); + max_dpb_mbs = (width / 16) * (height / 16) * *max_reference; + + /* The calculation is based on "Decoded picture buffering" section + from http://en.wikipedia.org/wiki/H.264/MPEG-4_AVC */ + if (max_dpb_mbs <= 8100) + return 30; + else if (max_dpb_mbs <= 18000) + return 31; + else if (max_dpb_mbs <= 20480) + return 32; + else if (max_dpb_mbs <= 32768) + return 41; + else if (max_dpb_mbs <= 34816) + return 42; + else if (max_dpb_mbs <= 110400) + return 50; + else if (max_dpb_mbs <= 184320) + return 51; + else + return 52; +} + #ifdef __cplusplus } #endif From mareko at kemper.freedesktop.org Tue Apr 28 15:45:01 2015 From: mareko at kemper.freedesktop.org (Marek Olšák) Date: Tue, 28 Apr 2015 08:45:01 -0700 (PDT) Subject: Mesa (master): st/vdpau: add h264 decoder level support Message-ID: <20150428154501.431E1761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: d043b51ba47688044b1a09a6023093a90ad62e6a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d043b51ba47688044b1a09a6023093a90ad62e6a Author: Leo Liu Date: Thu Mar 12 14:09:49 2015 -0400 st/vdpau: add h264 decoder level support Signed-off-by: Leo Liu Reviewed-by: Christian K?nig --- src/gallium/state_trackers/vdpau/decode.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/gallium/state_trackers/vdpau/decode.c b/src/gallium/state_trackers/vdpau/decode.c index 767d311..0634ba7 100644 --- a/src/gallium/state_trackers/vdpau/decode.c +++ b/src/gallium/state_trackers/vdpau/decode.c @@ -118,6 +118,11 @@ vlVdpDecoderCreate(VdpDevice device, templat.height = height; templat.max_references = max_references; + if (u_reduce_video_profile(templat.profile) == + PIPE_VIDEO_FORMAT_MPEG4_AVC) + templat.level = u_get_h264_level(templat.width, templat.height, + &templat.max_references); + vldecoder->decoder = pipe->create_video_codec(pipe, &templat); if (!vldecoder->decoder) { From mareko at kemper.freedesktop.org Tue Apr 28 15:45:01 2015 From: mareko at kemper.freedesktop.org (Marek Olšák) Date: Tue, 28 Apr 2015 08:45:01 -0700 (PDT) Subject: Mesa (master): st/va: add h264 decoder level support Message-ID: <20150428154501.724EC76348@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 2d4a890c0b763ae3feb7af9255d6d92baa9cfe57 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=2d4a890c0b763ae3feb7af9255d6d92baa9cfe57 Author: Leo Liu Date: Thu Mar 12 14:29:21 2015 -0400 st/va: add h264 decoder level support Signed-off-by: Leo Liu Reviewed-by: Christian K?nig --- src/gallium/state_trackers/va/context.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/gallium/state_trackers/va/context.c b/src/gallium/state_trackers/va/context.c index a7a55f9..0a733b3 100644 --- a/src/gallium/state_trackers/va/context.c +++ b/src/gallium/state_trackers/va/context.c @@ -167,6 +167,11 @@ vlVaCreateContext(VADriverContextP ctx, VAConfigID config_id, int picture_width, templat.max_references = num_render_targets; templat.expect_chunked_decode = true; + if (u_reduce_video_profile(templat.profile) == + PIPE_VIDEO_FORMAT_MPEG4_AVC) + templat.level = u_get_h264_level(templat.width, templat.height, + &templat.max_references); + context->decoder = drv->pipe->create_video_codec(drv->pipe, &templat); if (!context->decoder) { FREE(context); From mareko at kemper.freedesktop.org Tue Apr 28 15:45:01 2015 From: mareko at kemper.freedesktop.org (Marek Olšák) Date: Tue, 28 Apr 2015 08:45:01 -0700 (PDT) Subject: Mesa (master): vl: add level idc in sps Message-ID: <20150428154501.5D5F47626E@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 1a5e2bb5ce254eb272960e5b50b803110f25c9b1 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1a5e2bb5ce254eb272960e5b50b803110f25c9b1 Author: Leo Liu Date: Mon Mar 16 15:06:30 2015 -0400 vl: add level idc in sps Signed-off-by: Leo Liu Reviewed-by: Christian K?nig --- src/gallium/include/pipe/p_video_state.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h index 6621dbd..3713cd9 100644 --- a/src/gallium/include/pipe/p_video_state.h +++ b/src/gallium/include/pipe/p_video_state.h @@ -271,6 +271,7 @@ struct pipe_vc1_picture_desc struct pipe_h264_sps { + uint8_t level_idc; uint8_t chroma_format_idc; uint8_t separate_colour_plane_flag; uint8_t bit_depth_luma_minus8; From mareko at kemper.freedesktop.org Tue Apr 28 15:45:01 2015 From: mareko at kemper.freedesktop.org (Marek Olšák) Date: Tue, 28 Apr 2015 08:45:01 -0700 (PDT) Subject: Mesa (master): st/omx/dec: separate create_video_codec to different codecs Message-ID: <20150428154501.4FD7576250@kemper.freedesktop.org> Module: Mesa Branch: master Commit: ef1ae703a96fef72d2d8c22ef76bd1dfc41d1cee URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ef1ae703a96fef72d2d8c22ef76bd1dfc41d1cee Author: Leo Liu Date: Fri Mar 13 12:25:42 2015 -0400 st/omx/dec: separate create_video_codec to different codecs v2: get frame size from port info Signed-off-by: Leo Liu Reviewed-by: Christian K?nig --- src/gallium/state_trackers/omx/vid_dec.c | 18 ------------------ src/gallium/state_trackers/omx/vid_dec.h | 1 + src/gallium/state_trackers/omx/vid_dec_h264.c | 15 +++++++++++++++ src/gallium/state_trackers/omx/vid_dec_mpeg12.c | 14 ++++++++++++++ 4 files changed, 30 insertions(+), 18 deletions(-) diff --git a/src/gallium/state_trackers/omx/vid_dec.c b/src/gallium/state_trackers/omx/vid_dec.c index 13f4f55..9e7e7ba 100644 --- a/src/gallium/state_trackers/omx/vid_dec.c +++ b/src/gallium/state_trackers/omx/vid_dec.c @@ -44,8 +44,6 @@ #include #endif -#include - #include "pipe/p_screen.h" #include "pipe/p_video_codec.h" #include "util/u_memory.h" @@ -364,22 +362,6 @@ static OMX_ERRORTYPE vid_dec_MessageHandler(OMX_COMPONENTTYPE* comp, internalReq if (msg->messageType == OMX_CommandStateSet) { if ((msg->messageParam == OMX_StateIdle ) && (priv->state == OMX_StateLoaded)) { - - struct pipe_video_codec templat = {}; - omx_base_video_PortType *port; - - port = (omx_base_video_PortType *)priv->ports[OMX_BASE_FILTER_INPUTPORT_INDEX]; - - templat.profile = priv->profile; - templat.entrypoint = PIPE_VIDEO_ENTRYPOINT_BITSTREAM; - templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420; - templat.width = port->sPortParam.format.video.nFrameWidth; - templat.height = port->sPortParam.format.video.nFrameHeight; - templat.max_references = 2; - templat.expect_chunked_decode = true; - - priv->codec = priv->pipe->create_video_codec(priv->pipe, &templat); - if (priv->profile == PIPE_VIDEO_PROFILE_MPEG2_MAIN) vid_dec_mpeg12_Init(priv); else if (priv->profile == PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH) diff --git a/src/gallium/state_trackers/omx/vid_dec.h b/src/gallium/state_trackers/omx/vid_dec.h index 9acf872..1c51f9c 100644 --- a/src/gallium/state_trackers/omx/vid_dec.h +++ b/src/gallium/state_trackers/omx/vid_dec.h @@ -44,6 +44,7 @@ #include #include +#include #include "pipe/p_video_state.h" #include "state_tracker/drm_driver.h" diff --git a/src/gallium/state_trackers/omx/vid_dec_h264.c b/src/gallium/state_trackers/omx/vid_dec_h264.c index e01e873..7c90dee 100644 --- a/src/gallium/state_trackers/omx/vid_dec_h264.c +++ b/src/gallium/state_trackers/omx/vid_dec_h264.c @@ -105,6 +105,21 @@ static void vid_dec_h264_BeginFrame(vid_dec_PrivateType *priv) priv->picture.h264.num_ref_frames = priv->picture.h264.pps->sps->max_num_ref_frames; + if (!priv->codec) { + struct pipe_video_codec templat = {}; + omx_base_video_PortType *port; + + port = (omx_base_video_PortType *)priv->ports[OMX_BASE_FILTER_INPUTPORT_INDEX]; + templat.profile = priv->profile; + templat.entrypoint = PIPE_VIDEO_ENTRYPOINT_BITSTREAM; + templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420; + templat.max_references = 2; + templat.expect_chunked_decode = true; + templat.width = port->sPortParam.format.video.nFrameWidth; + templat.height = port->sPortParam.format.video.nFrameHeight; + + priv->codec = priv->pipe->create_video_codec(priv->pipe, &templat); + } priv->codec->begin_frame(priv->codec, priv->target, &priv->picture.base); priv->frame_started = true; } diff --git a/src/gallium/state_trackers/omx/vid_dec_mpeg12.c b/src/gallium/state_trackers/omx/vid_dec_mpeg12.c index de4c69a..bef83ec 100644 --- a/src/gallium/state_trackers/omx/vid_dec_mpeg12.c +++ b/src/gallium/state_trackers/omx/vid_dec_mpeg12.c @@ -65,6 +65,20 @@ static struct pipe_video_buffer *vid_dec_mpeg12_Flush(vid_dec_PrivateType *priv) void vid_dec_mpeg12_Init(vid_dec_PrivateType *priv) { + struct pipe_video_codec templat = {}; + omx_base_video_PortType *port; + + port = (omx_base_video_PortType *)priv->ports[OMX_BASE_FILTER_INPUTPORT_INDEX]; + templat.profile = priv->profile; + templat.entrypoint = PIPE_VIDEO_ENTRYPOINT_BITSTREAM; + templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420; + templat.max_references = 2; + templat.expect_chunked_decode = true; + templat.width = port->sPortParam.format.video.nFrameWidth; + templat.height = port->sPortParam.format.video.nFrameHeight; + + priv->codec = priv->pipe->create_video_codec(priv->pipe, &templat); + priv->picture.base.profile = PIPE_VIDEO_PROFILE_MPEG2_MAIN; priv->picture.mpeg12.intra_matrix = default_intra_matrix; priv->picture.mpeg12.non_intra_matrix = default_non_intra_matrix; From mareko at kemper.freedesktop.org Tue Apr 28 15:45:01 2015 From: mareko at kemper.freedesktop.org (Marek Olšák) Date: Tue, 28 Apr 2015 08:45:01 -0700 (PDT) Subject: Mesa (master): st/omx/dec: add h264 decoder level support Message-ID: <20150428154501.670A376347@kemper.freedesktop.org> Module: Mesa Branch: master Commit: b2596efeb767417f21ce45a411c56bb3ecc51620 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b2596efeb767417f21ce45a411c56bb3ecc51620 Author: Leo Liu Date: Fri Mar 13 12:39:26 2015 -0400 st/omx/dec: add h264 decoder level support v2: use sps level idc as level to driver Signed-off-by: Leo Liu Reviewed-by: Christian K?nig --- src/gallium/state_trackers/omx/vid_dec_h264.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/gallium/state_trackers/omx/vid_dec_h264.c b/src/gallium/state_trackers/omx/vid_dec_h264.c index 7c90dee..18d8803 100644 --- a/src/gallium/state_trackers/omx/vid_dec_h264.c +++ b/src/gallium/state_trackers/omx/vid_dec_h264.c @@ -33,6 +33,7 @@ #include "pipe/p_video_codec.h" #include "util/u_memory.h" +#include "util/u_video.h" #include "vl/vl_rbsp.h" #include "entrypoint.h" @@ -113,10 +114,11 @@ static void vid_dec_h264_BeginFrame(vid_dec_PrivateType *priv) templat.profile = priv->profile; templat.entrypoint = PIPE_VIDEO_ENTRYPOINT_BITSTREAM; templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420; - templat.max_references = 2; + templat.max_references = priv->picture.h264.num_ref_frames; templat.expect_chunked_decode = true; templat.width = port->sPortParam.format.video.nFrameWidth; templat.height = port->sPortParam.format.video.nFrameHeight; + templat.level = priv->picture.h264.pps->sps->level_idc; priv->codec = priv->pipe->create_video_codec(priv->pipe, &templat); } @@ -239,7 +241,7 @@ static struct pipe_h264_sps *seq_parameter_set_id(vid_dec_PrivateType *priv, str static void seq_parameter_set(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp) { struct pipe_h264_sps *sps; - unsigned profile_idc; + unsigned profile_idc, level_idc; unsigned i; /* Sequence parameter set */ @@ -267,7 +269,7 @@ static void seq_parameter_set(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp) vl_rbsp_u(rbsp, 2); /* level_idc */ - vl_rbsp_u(rbsp, 8); + level_idc = vl_rbsp_u(rbsp, 8); sps = seq_parameter_set_id(priv, rbsp); if (!sps) @@ -277,6 +279,8 @@ static void seq_parameter_set(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp) memset(sps->ScalingList4x4, 16, sizeof(sps->ScalingList4x4)); memset(sps->ScalingList8x8, 16, sizeof(sps->ScalingList8x8)); + sps->level_idc = level_idc; + if (profile_idc == 100 || profile_idc == 110 || profile_idc == 122 || profile_idc == 244 || profile_idc == 44 || profile_idc == 83 || profile_idc == 86 || profile_idc == 118 || profile_idc == 128 || profile_idc == 138) { From imirkin at kemper.freedesktop.org Tue Apr 28 16:49:37 2015 From: imirkin at kemper.freedesktop.org (Ilia Mirkin) Date: Tue, 28 Apr 2015 09:49:37 -0700 (PDT) Subject: Mesa (master): st/mesa: allow glsl version up to 410, enable ARB_shader_precision Message-ID: <20150428164937.EF873761E8@kemper.freedesktop.org> Module: Mesa Branch: master Commit: b5947984cd7c90bf1409e15a2425fa1d4dc2be15 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b5947984cd7c90bf1409e15a2425fa1d4dc2be15 Author: Ilia Mirkin Date: Sun Apr 26 16:14:36 2015 -0400 st/mesa: allow glsl version up to 410, enable ARB_shader_precision Signed-off-by: Ilia Mirkin Reviewed-by: Marek Ol??k --- src/mesa/state_tracker/st_extensions.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index 25932dd..82e4a30 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -657,8 +657,8 @@ void st_init_extensions(struct pipe_screen *screen, glsl_feature_level = screen->get_param(screen, PIPE_CAP_GLSL_FEATURE_LEVEL); consts->GLSLVersion = glsl_feature_level; - if (glsl_feature_level >= 330) - consts->GLSLVersion = 330; + if (glsl_feature_level >= 410) + consts->GLSLVersion = 410; _mesa_override_glsl_version(consts); @@ -669,6 +669,8 @@ void st_init_extensions(struct pipe_screen *screen, if (glsl_feature_level >= 400) extensions->ARB_gpu_shader5 = GL_TRUE; + if (glsl_feature_level >= 410) + extensions->ARB_shader_precision = GL_TRUE; /* This extension needs full OpenGL 3.2, but we don't know if that's * supported at this point. Only check the GLSL version. */ From imirkin at kemper.freedesktop.org Tue Apr 28 16:49:38 2015 From: imirkin at kemper.freedesktop.org (Ilia Mirkin) Date: Tue, 28 Apr 2015 09:49:38 -0700 (PDT) Subject: Mesa (master): nvc0: expose GLSL version 410 Message-ID: <20150428164938.03AF1761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: e312a6995850e78b3b9e2cbe4713928bc9cc386d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e312a6995850e78b3b9e2cbe4713928bc9cc386d Author: Ilia Mirkin Date: Sun Apr 26 16:15:02 2015 -0400 nvc0: expose GLSL version 410 Signed-off-by: Ilia Mirkin --- src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 04c34f5..7a9f649 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -89,7 +89,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: return 65536; case PIPE_CAP_GLSL_FEATURE_LEVEL: - return 400; + return 410; case PIPE_CAP_MAX_RENDER_TARGETS: return 8; case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: From brianp at kemper.freedesktop.org Tue Apr 28 18:48:36 2015 From: brianp at kemper.freedesktop.org (Brian Paul) Date: Tue, 28 Apr 2015 11:48:36 -0700 (PDT) Subject: Mesa (master): mesa: remove unneeded #include colortab.h Message-ID: <20150428184836.48FBE76250@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 7e8de8219fbde486364054cc2a75022ddcbe0c2c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7e8de8219fbde486364054cc2a75022ddcbe0c2c Author: Brian Paul Date: Fri Apr 24 20:12:42 2015 -0600 mesa: remove unneeded #include colortab.h Reviewed-by: Anuj Phogat --- src/mesa/main/texobj.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c index e018ab9..c563f1e 100644 --- a/src/mesa/main/texobj.c +++ b/src/mesa/main/texobj.c @@ -30,7 +30,6 @@ #include #include "bufferobj.h" -#include "colortab.h" #include "context.h" #include "enums.h" #include "fbobject.h" From brianp at kemper.freedesktop.org Tue Apr 28 18:48:36 2015 From: brianp at kemper.freedesktop.org (Brian Paul) Date: Tue, 28 Apr 2015 11:48:36 -0700 (PDT) Subject: Mesa (master): mesa: remove unused options var in compile_shader() Message-ID: <20150428184836.3E558761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 7c1be009b7c34dce55be83ae2a955dd8a37e230a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7c1be009b7c34dce55be83ae2a955dd8a37e230a Author: Brian Paul Date: Fri Apr 24 20:04:29 2015 -0600 mesa: remove unused options var in compile_shader() Reviewed-by: Anuj Phogat --- src/mesa/main/shaderapi.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index cc001ba..a04b287 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -861,14 +861,11 @@ static void compile_shader(struct gl_context *ctx, GLuint shaderObj) { struct gl_shader *sh; - struct gl_shader_compiler_options *options; sh = _mesa_lookup_shader_err(ctx, shaderObj, "glCompileShader"); if (!sh) return; - options = &ctx->Const.ShaderCompilerOptions[sh->Stage]; - if (!sh->Source) { /* If the user called glCompileShader without first calling * glShaderSource, we should fail to compile, but not raise a GL_ERROR. From brianp at kemper.freedesktop.org Tue Apr 28 18:48:36 2015 From: brianp at kemper.freedesktop.org (Brian Paul) Date: Tue, 28 Apr 2015 11:48:36 -0700 (PDT) Subject: Mesa (master): meta: remove unneeded #include colortab.h Message-ID: <20150428184836.5E4CF7626E@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 66985d2a6de0afd9446c92dce999f00efa528d31 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=66985d2a6de0afd9446c92dce999f00efa528d31 Author: Brian Paul Date: Fri Apr 24 20:12:50 2015 -0600 meta: remove unneeded #include colortab.h Reviewed-by: Anuj Phogat --- src/mesa/drivers/common/meta.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c index cf99d95..d2ab7b8 100644 --- a/src/mesa/drivers/common/meta.c +++ b/src/mesa/drivers/common/meta.c @@ -41,7 +41,6 @@ #include "main/bufferobj.h" #include "main/buffers.h" #include "main/clear.h" -#include "main/colortab.h" #include "main/condrender.h" #include "main/depth.h" #include "main/enable.h" From brianp at kemper.freedesktop.org Tue Apr 28 18:48:36 2015 From: brianp at kemper.freedesktop.org (Brian Paul) Date: Tue, 28 Apr 2015 11:48:36 -0700 (PDT) Subject: Mesa (master): docs: more details about Viewperf 12 medical-01 test issues Message-ID: <20150428184836.35B3D761E8@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 3597a0de94fa721e86ef2b7a68855f44ed4e1973 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3597a0de94fa721e86ef2b7a68855f44ed4e1973 Author: Brian Paul Date: Thu Apr 23 10:00:34 2015 -0600 docs: more details about Viewperf 12 medical-01 test issues --- docs/viewperf.html | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/viewperf.html b/docs/viewperf.html index 6b63b94..a60a44f 100644 --- a/docs/viewperf.html +++ b/docs/viewperf.html @@ -329,6 +329,13 @@ array initializer statement, but it neglects to specify #version 120 at the top of the shader code. So, the shader does not compile and all that's rendered is plain white polygons.

      +

      +Also, the test tries to create a very large 3D texture that may exceed +the device driver's limit. +When this happens, the glTexImage3D call fails and all that's rendered is +a white box. +

      +

      showcase-01

      From imirkin at kemper.freedesktop.org Wed Apr 29 00:17:43 2015 From: imirkin at kemper.freedesktop.org (Ilia Mirkin) Date: Tue, 28 Apr 2015 17:17:43 -0700 (PDT) Subject: Mesa (master): nvc0/ir: flush denorms to zero in non-compute shaders Message-ID: <20150429001743.E5135761E8@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 6fe0d4f0354418c6e68dd352996e9891ddd4dfd6 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6fe0d4f0354418c6e68dd352996e9891ddd4dfd6 Author: Ilia Mirkin Date: Tue Apr 28 03:30:08 2015 -0400 nvc0/ir: flush denorms to zero in non-compute shaders This will set the FTZ flag (flush denorms to zero) on all opcodes that can take it. This resolves issues in Unigine Heaven 4.0 where there were solid-filled boxes popping up. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89455 Cc: "10.4 10.5" Signed-off-by: Ilia Mirkin --- .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 25 +++++++++++++++++++- .../nouveau/codegen/nv50_ir_lowering_nvc0.h | 1 + 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 18e8e67..b61f3c4 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -96,6 +96,26 @@ NVC0LegalizeSSA::handleRCPRSQ(Instruction *i) bld.mkOp2(OP_MERGE, TYPE_U64, def, dst[0], dst[1]); } +void +NVC0LegalizeSSA::handleFTZ(Instruction *i) +{ + // Only want to flush float inputs + if (i->sType != TYPE_F32) + return; + + // If we're already flushing denorms (and NaN's) to zero, no need for this. + if (i->dnz) + return; + + // Only certain classes of operations can flush + OpClass cls = prog->getTarget()->getOpClass(i->op); + if (cls != OPCLASS_ARITH && cls != OPCLASS_COMPARE && + cls != OPCLASS_CONVERT) + return; + + i->ftz = true; +} + bool NVC0LegalizeSSA::visit(Function *fn) { @@ -109,8 +129,11 @@ NVC0LegalizeSSA::visit(BasicBlock *bb) Instruction *next; for (Instruction *i = bb->getEntry(); i; i = next) { next = i->next; - if (i->dType == TYPE_F32) + if (i->dType == TYPE_F32) { + if (prog->getType() != Program::TYPE_COMPUTE) + handleFTZ(i); continue; + } switch (i->op) { case OP_DIV: case OP_MOD: diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h index d8ff5cd..260e101 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h @@ -36,6 +36,7 @@ private: // we want to insert calls to the builtin library only after optimization void handleDIV(Instruction *); // integer division, modulus void handleRCPRSQ(Instruction *); // double precision float recip/rsqrt + void handleFTZ(Instruction *); private: BuildUtil bld; From airlied at kemper.freedesktop.org Wed Apr 29 05:13:33 2015 From: airlied at kemper.freedesktop.org (Dave Airlie) Date: Tue, 28 Apr 2015 22:13:33 -0700 (PDT) Subject: Mesa (master): egl: misc fixes for EGL_MESA_image_dma_buf_export Message-ID: <20150429051333.436C6761E8@kemper.freedesktop.org> Module: Mesa Branch: master Commit: c66c158e59298fc4183148c466dd4eecb945f87c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c66c158e59298fc4183148c466dd4eecb945f87c Author: Marc-Andr? Lureau Date: Tue Apr 28 12:23:42 2015 +0200 egl: misc fixes for EGL_MESA_image_dma_buf_export Fix define and a function argument name introduced in commit 8f7338f284cdb1fef64c85e3293d2200d0cc6387 Signed-off-by: Dave Airlie --- src/egl/main/eglapi.c | 2 +- src/egl/main/eglapi.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c index ea2ee73..8b7b9be 100644 --- a/src/egl/main/eglapi.c +++ b/src/egl/main/eglapi.c @@ -1241,7 +1241,7 @@ eglGetProcAddress(const char *procname) { "eglCreatePlatformWindowSurfaceEXT", (_EGLProc) eglCreatePlatformWindowSurfaceEXT }, { "eglCreatePlatformPixmapSurfaceEXT", (_EGLProc) eglCreatePlatformPixmapSurfaceEXT }, { "eglGetSyncValuesCHROMIUM", (_EGLProc) eglGetSyncValuesCHROMIUM }, -#ifdef EGL_MESA_drm_buf_image_export +#ifdef EGL_MESA_dma_buf_image_export { "eglExportDMABUFImageQueryMESA", (_EGLProc) eglExportDMABUFImageQueryMESA }, { "eglExportDMABUFImageMESA", (_EGLProc) eglExportDMABUFImageMESA }, #endif diff --git a/src/egl/main/eglapi.h b/src/egl/main/eglapi.h index eb5f58e..3245327 100644 --- a/src/egl/main/eglapi.h +++ b/src/egl/main/eglapi.h @@ -141,7 +141,7 @@ typedef EGLBoolean (*SwapBuffersWithDamageEXT_t) (_EGLDriver *drv, _EGLDisplay * typedef EGLBoolean (*GetSyncValuesCHROMIUM_t) (_EGLDisplay *dpy, _EGLSurface *surface, EGLuint64KHR *ust, EGLuint64KHR *msc, EGLuint64KHR *sbc); #ifdef EGL_MESA_image_dma_buf_export -typedef EGLBoolean (*ExportDMABUFImageQueryMESA_t)(_EGLDriver *drv, _EGLDisplay *disp, _EGLImage *img, EGLint *fourcc, EGLint *stride, EGLuint64MESA *modifiers); +typedef EGLBoolean (*ExportDMABUFImageQueryMESA_t)(_EGLDriver *drv, _EGLDisplay *disp, _EGLImage *img, EGLint *fourcc, EGLint *nplanes, EGLuint64MESA *modifiers); typedef EGLBoolean (*ExportDMABUFImageMESA_t)(_EGLDriver *drv, _EGLDisplay *disp, _EGLImage *img, EGLint *fds, EGLint *strides, EGLint *offsets); #endif From jrfonseca at kemper.freedesktop.org Wed Apr 29 05:49:41 2015 From: jrfonseca at kemper.freedesktop.org (Jose Fonseca) Date: Tue, 28 Apr 2015 22:49:41 -0700 (PDT) Subject: Mesa (master): mesa: Fix glGetProgramiv(GL_ACTIVE_ATTRIBUTES). Message-ID: <20150429054941.459FC761E8@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 114ac39a888509b133f15ddae813fcf64adc72a7 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=114ac39a888509b133f15ddae813fcf64adc72a7 Author: Jose Fonseca Date: Tue Apr 28 21:49:36 2015 +0100 mesa: Fix glGetProgramiv(GL_ACTIVE_ATTRIBUTES). It's returning random values, because RESOURCE_VAR() is casting different objects into ir_variable pointers. This updates _mesa_count_active_attribs to filter the resources with the same logic used in _mesa_longest_attribute_name_length. https://bugs.freedesktop.org/show_bug.cgi?id=90207 Reviewed-by: Tapani P?lli --- src/mesa/main/shader_query.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp index a84ec84..d2ca49b 100644 --- a/src/mesa/main/shader_query.cpp +++ b/src/mesa/main/shader_query.cpp @@ -302,8 +302,10 @@ _mesa_count_active_attribs(struct gl_shader_program *shProg) struct gl_program_resource *res = shProg->ProgramResourceList; unsigned count = 0; for (unsigned j = 0; j < shProg->NumProgramResourceList; j++, res++) { - if (is_active_attrib(RESOURCE_VAR(res))) - count++; + if (res->Type == GL_PROGRAM_INPUT && + res->StageReferences & (1 << MESA_SHADER_VERTEX) && + is_active_attrib(RESOURCE_VAR(res))) + count++; } return count; } From axeldavy at kemper.freedesktop.org Wed Apr 29 06:29:21 2015 From: axeldavy at kemper.freedesktop.org (Axel Davy) Date: Tue, 28 Apr 2015 23:29:21 -0700 (PDT) Subject: Mesa (master): 29 new commits Message-ID: <20150429062921.8FB11761E8@kemper.freedesktop.org> URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=231be57ee2515428e3de31f514effe9cf06bcda0 Author: Axel Davy Date: Fri Feb 20 12:59:45 2015 +0100 st/nine: Remove Managed texture hack. Previously binding an unitialized managed texture was causing a crash, and a workaround was added to prevent the crash. This patch removes this workaround and instead set the initial state of managed textures as dirty, so that when the texture is bound for the first time, it is always initialized. Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=58d295d41e30434c570248eeee73af2006b79eea Author: Axel Davy Date: Fri Feb 20 12:34:47 2015 +0100 st/nine: Enforce LOD 0 for D3DUSAGE_AUTOGENMIPMAP For D3DUSAGE_AUTOGENMIPMAP textures, applications can only lock/copy from/get surface descriptor for/etc the first level. Thus it makes sense to restrict the LOD to 0, and use only the first level to generate the sublevels. Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6f57e014365563c0dcc32451401a76212abf0a54 Author: Axel Davy Date: Fri Feb 20 12:11:56 2015 +0100 st/nine: Some D3DUSAGE_AUTOGENMIPMAP fixes Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=24eca6a30dea4cde0b0528b85e83d45a6430762e Author: Axel Davy Date: Thu Feb 19 22:28:37 2015 +0100 st/nine: util_gen_mipmap doesn't need we reset states. util_gen_mipmap uses pipe->blit, and thus doesn't need we restore all states after using it. Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7a7758c5525fa90bf7c546acc58a9ea41f7d320b Author: Axel Davy Date: Thu Feb 19 20:55:42 2015 +0100 st/nine: D3DUSAGE_AUTOGENMIPMAP is forbidden for volumes Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ec411d9b74f9aca941b27fafeae1416df0f9eaec Author: Axel Davy Date: Thu Feb 19 20:23:06 2015 +0100 st/nine: Fix NineBaseTexture9_PreLoad It wasn't uploading the texture when the lod had changed. Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b45fa97a22d091a5c9a3d9c9432ff880e79231d6 Author: Axel Davy Date: Thu Feb 19 19:34:02 2015 +0100 st/nine: Rewrite Managed texture uploads That part of the code was quite obscure. This new implementation tries to make it clearer by separating the differents parts, and commenting more. Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=090ebc7638e64592716ac4ecd4c29609b2f35421 Author: Axel Davy Date: Thu Feb 19 17:44:43 2015 +0100 st/nine: Bound the dirty regions to resource size Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=520e36f89cb9048258d9c1ca42de68564ad876a9 Author: Axel Davy Date: Thu Feb 19 17:34:45 2015 +0100 st/nine: Simplify Surface9 Managed resources implementation Remove the Surface9 code for dirty rects, used only for Managed resources. Instead convey the information to the parent texture. According to documentation, this seems to be the expected behaviour, and if documentation is wrong there, that's not a problem since it can only leads to more texture updates in corner cases. Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4c2247ac60f426a6441f964f2204af9f8dc04dc5 Author: Axel Davy Date: Thu Feb 19 16:35:45 2015 +0100 st/nine: Remove impossible cases with Managed textures Copying to/from a Managed texture is forbidden. Rendering to a Managed texture is forbidden. Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e558ce98f2b01e11ff79c29efaea2877121ae04a Author: Axel Davy Date: Thu Feb 19 16:18:00 2015 +0100 st/nine: Encapsulate variables for MANAGED resource Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=35fe920e1ec877d487e5dd33c9aea7e1ec1dbe11 Author: Axel Davy Date: Thu Feb 19 11:21:12 2015 +0100 st/nine: Rework texture data allocation Some applications assume the memory for multilevel textures is allocated per continuous blocks. This patch implements that behaviour. v2: cache offsets Reviewed-by: Ilia Mirkin Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=54f8e8a18da58c85a2f515d5fd0552fa4f5547bb Author: Axel Davy Date: Mon Mar 23 23:31:18 2015 +0100 st/nine: Fix update_vertex_elements bad rebase This code was supposed to be removed, but a rebase seems to have made it stay. Reviewed-by: Ilia Mirkin Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=87868d38328a3875881fe1ca6861eb7816a5b0cf Author: Axel Davy Date: Sun Mar 22 11:49:03 2015 +0100 st/nine: Add debug warning when application uses sw processing Reviewed-by: Ilia Mirkin Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4acbf420d1101bd32e8f23acadff5fe6c6fa9b26 Author: Axel Davy Date: Fri Feb 20 18:25:44 2015 +0100 st/nine: Rework update_vertex_buffers Previous code was trying to optimise to call set_vertex_buffers on big packets, and thus avoids as many calls as possible. However in practice doing so won't be faster (drivers implement set_vertex_buffers by a loop over the buffers we want to bind) When we want to unbind a buffer, we were calling set_vertex_buffers on a buffer with vtxbuf->buffer = NULL. It works on some drivers, but not on all of them, because it isn't in Gallium spec. This patch fixes that. Reviewed-by: Ilia Mirkin Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5beb411bf753da4c86082c505929bcc9629c666c Author: Xavier Bouchoux Date: Mon Feb 16 09:43:23 2015 +0100 st/nine: Fix computation of const_used_size Was sometimes too large for PS. Reviewed-by: Axel Davy Signed-off-by: Xavier Bouchoux URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=559342d01d4199e8fd475241a574a92c4ce63896 Author: Axel Davy Date: Sun Apr 26 23:19:03 2015 +0200 gallium/svga: Remove useless ARRAY_SIZE declaration This is already declared in util/macros.h Reviewed-by: Brian Paul Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=64880d073ab21ae1abad0c049ea2d6a1169a3cfa Author: Axel Davy Date: Sun Apr 26 23:17:45 2015 +0200 util/macros: Move DIV_ROUND_UP to util/macros.h Move DIV_ROUND_UP to a shared location accessible everywhere Reviewed-by: Brian Paul Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=405c7d75114fadcf744f6bbd5556c86c66ac631b Author: Xavier Bouchoux Date: Sat Feb 21 19:58:38 2015 +0100 st/nine: Fix behaviour of D3DUSAGE_QUERY_POSTPIXELSHADER_BLENDING Ignore D3DUSAGE_QUERY_POSTPIXELSHADER_BLENDING when D3DUSAGE_RENDERTARGET is not specified. This behaviour matches windows drivers. Reviewed-by: Axel Davy Signed-off-by: Xavier Bouchoux URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d838fe8243aa0e96b198c876c1b5d79c098d1c0d Author: Xavier Bouchoux Date: Sat Feb 21 19:56:40 2015 +0100 st/nine: Improve D3DQUERYTYPE_TIMESTAMP Avoid blocking when retrieving D3DQUERYTYPE_TIMESTAMP result with NineQuery9_GetData(), when D3DGETDATA_FLUSH is not specified. This mimics Win behaviour and gives slightly better performance for some games. Reviewed-by: Ilia Mirkin Reviewed-by: Axel Davy Signed-off-by: Xavier Bouchoux URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=851abb91459b13d28303f54605b8da51c24efcb7 Author: Xavier Bouchoux Date: Sun Feb 8 15:56:15 2015 +0100 st/nine: Fix D3DQUERYTYPE_TIMESTAMPFREQ query D3DQUERYTYPE_TIMESTAMPFREQ is supposed to give the frequency at which the clock of D3DQUERYTYPE_TIMESTAMP runs. PIPE_QUERY_TIMESTAMP returns a value in ns, thus the corresponding frequency is 1000000000. PIPE_QUERY_TIMESTAMP_DISJOINT returns the frequency at which PIPE_QUERY_TIMESTAMP value is updated. It isn't always 1000000000. Reviewed-by: Axel Davy Signed-off-by: Xavier Bouchoux URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=31bb4cd5c6e3d696b2f240c270a257abfcd8848f Author: Tiziano Bacocco Date: Sun Jan 25 12:15:39 2015 +0100 st/nine: Change x86 FPU Control word on device creation As on wined3d and windows, when D3DCREATE_FPU_PRESERVE is not specified, change the fpu control word to all exceptions masked, single precision, round to nearest. Signed-off-by: Axel Davy Signed-off-by: Tiziano Bacocco URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e7b1a1e57cdfd8d019ba0ff4cdc2c7239066869f Author: Axel Davy Date: Fri Feb 20 12:51:55 2015 +0100 st/nine: Do not advertise D3DDEVCAPS_TEXTURESYSTEMMEMORY No major vendor advertises it, and we weren't supporting it. Reviewed-by: Ilia Mirkin Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=907f28f87e3858028bce4477f0b57f7e5d76060a Author: Axel Davy Date: Thu Apr 23 21:46:24 2015 +0200 st/nine: Fix comment in update_viewport Reviewed-by: Ilia Mirkin Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6e825b69bd4fb163cba0a565616ed966fb1a8929 Author: Axel Davy Date: Thu Apr 23 21:46:09 2015 +0200 st/nine: Workaround barycentrics issue on some cards Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f3fd06e94d29172a5de68594d3a6433f91a41362 Author: Xavier Bouchoux Date: Mon Feb 16 10:02:42 2015 +0100 st/nine: Clear struct pipe_blit_info before use. render_condition_enable was uninitialized. Reviewed-by: Ilia Mirkin Reviewed-by: Axel Davy Signed-off-by: Xavier Bouchoux URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=77a38d2088678fa756aca61592777f12e245ac0c Author: Patrick Rudolph Date: Fri Feb 20 15:48:57 2015 +0100 st/nine: NineDevice9_Clear skip fastpath for bigger depth-buffers This adds an additional check to make sure the bound depth-buffer doesn't exceed the rendertarget size when clearing depth and color buffer at once. D3D9 clears only a rectangle with the same dimensions as the viewport, leaving other parts of the depth-buffer intact. This fixes failing WINE test visual.c:depth_buffer_test() Signed-off-by: Patrick Rudolph Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=716bef2643367d3696aa20ab7178cbe19bf78abc Author: Axel Davy Date: Sat Mar 21 13:36:25 2015 +0100 st/nine: Fix wrong assert in nine_shader The sampler src index was wrong for texldl and texldd Reviewed-by: Ilia Mirkin Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8d3e063e6851ca6c33c8bac1d8e9b9b344d27294 Author: Axel Davy Date: Tue Dec 23 15:15:10 2014 +0100 st/nine: Handle special LIT case Reviewed-by: Ilia Mirkin Signed-off-by: Axel Davy From tstellar at kemper.freedesktop.org Wed Apr 29 13:52:42 2015 From: tstellar at kemper.freedesktop.org (Tom Stellard) Date: Wed, 29 Apr 2015 06:52:42 -0700 (PDT) Subject: Mesa (master): clover: compile all sources with c++11 Message-ID: <20150429135242.75460761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 3c61ff0d89da4a8cc921d131ce0c2480ddb111a0 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3c61ff0d89da4a8cc921d131ce0c2480ddb111a0 Author: EdB Date: Tue Apr 21 15:49:09 2015 +0200 clover: compile all sources with c++11 Later we can remove the compat code Reviewed-by: Francisco Jerez Reviewed-by: Tom Stellard --- src/gallium/state_trackers/clover/Makefile.am | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gallium/state_trackers/clover/Makefile.am b/src/gallium/state_trackers/clover/Makefile.am index 62c13fa..f46d9ef 100644 --- a/src/gallium/state_trackers/clover/Makefile.am +++ b/src/gallium/state_trackers/clover/Makefile.am @@ -35,12 +35,13 @@ endif noinst_LTLIBRARIES = libclover.la libcltgsi.la libclllvm.la libcltgsi_la_CXXFLAGS = \ - -std=c++0x \ + -std=c++11 \ $(VISIBILITY_CXXFLAGS) libcltgsi_la_SOURCES = $(TGSI_SOURCES) libclllvm_la_CXXFLAGS = \ + -std=c++11 \ $(VISIBILITY_CXXFLAGS) \ $(LLVM_CXXFLAGS) \ $(DEFINES) \ From tstellar at kemper.freedesktop.org Wed Apr 29 13:52:42 2015 From: tstellar at kemper.freedesktop.org (Tom Stellard) Date: Wed, 29 Apr 2015 06:52:42 -0700 (PDT) Subject: Mesa (master): clover: remove compat classes that match std one Message-ID: <20150429135242.7E31F761EA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 1b4a1d0049646e574565bab38b8ae935c1c45fae URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1b4a1d0049646e574565bab38b8ae935c1c45fae Author: EdB Date: Fri Apr 24 12:59:54 2015 +0200 clover: remove compat classes that match std one Acked-by: Francisco Jerez Reviewed-by: Tom Stellard --- src/gallium/state_trackers/clover/Makefile.sources | 1 - src/gallium/state_trackers/clover/api/program.cpp | 2 +- .../state_trackers/clover/core/compiler.hpp | 2 +- src/gallium/state_trackers/clover/core/error.hpp | 6 ++-- src/gallium/state_trackers/clover/util/compat.cpp | 38 -------------------- src/gallium/state_trackers/clover/util/compat.hpp | 27 -------------- 6 files changed, 6 insertions(+), 70 deletions(-) diff --git a/src/gallium/state_trackers/clover/Makefile.sources b/src/gallium/state_trackers/clover/Makefile.sources index 5b3344c..03eb754 100644 --- a/src/gallium/state_trackers/clover/Makefile.sources +++ b/src/gallium/state_trackers/clover/Makefile.sources @@ -45,7 +45,6 @@ CPP_SOURCES := \ util/adaptor.hpp \ util/algebra.hpp \ util/algorithm.hpp \ - util/compat.cpp \ util/compat.hpp \ util/factor.hpp \ util/functional.hpp \ diff --git a/src/gallium/state_trackers/clover/api/program.cpp b/src/gallium/state_trackers/clover/api/program.cpp index 60184ed..c985690 100644 --- a/src/gallium/state_trackers/clover/api/program.cpp +++ b/src/gallium/state_trackers/clover/api/program.cpp @@ -216,7 +216,7 @@ clCompileProgram(cl_program d_prog, cl_uint num_devs, throw error(CL_INVALID_OPERATION); if (!any_of(key_equals(name), headers)) - headers.push_back(compat::pair( + headers.push_back(std::pair( name, header.source())); }, range(header_names, num_headers), diff --git a/src/gallium/state_trackers/clover/core/compiler.hpp b/src/gallium/state_trackers/clover/core/compiler.hpp index 7210d1e..bec8aac 100644 --- a/src/gallium/state_trackers/clover/core/compiler.hpp +++ b/src/gallium/state_trackers/clover/core/compiler.hpp @@ -29,7 +29,7 @@ #include "pipe/p_defines.h" namespace clover { - typedef compat::vector > header_map; module compile_program_llvm(const compat::string &source, diff --git a/src/gallium/state_trackers/clover/core/error.hpp b/src/gallium/state_trackers/clover/core/error.hpp index 7b010f1..45a38c1 100644 --- a/src/gallium/state_trackers/clover/core/error.hpp +++ b/src/gallium/state_trackers/clover/core/error.hpp @@ -25,6 +25,8 @@ #include "CL/cl.h" +#include + #include "util/compat.hpp" namespace clover { @@ -50,10 +52,10 @@ namespace clover { /// Class that represents an error that can be converted to an /// OpenCL status code. /// - class error : public compat::runtime_error { + class error : public std::runtime_error { public: error(cl_int code, compat::string what = "") : - compat::runtime_error(what), code(code) { + std::runtime_error(what), code(code) { } cl_int get() const { diff --git a/src/gallium/state_trackers/clover/util/compat.cpp b/src/gallium/state_trackers/clover/util/compat.cpp deleted file mode 100644 index 80d5b3e..0000000 --- a/src/gallium/state_trackers/clover/util/compat.cpp +++ /dev/null @@ -1,38 +0,0 @@ -// -// Copyright 2013 Francisco Jerez -// -// Permission is hereby granted, free of charge, to any person obtaining a -// copy of this software and associated documentation files (the "Software"), -// to deal in the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. -// - -#include "util/compat.hpp" - -using namespace clover::compat; - -exception::~exception() { -} - -const char * -exception::what() const { - return ""; -} - -const char * -runtime_error::what() const { - return _what.c_str(); -} diff --git a/src/gallium/state_trackers/clover/util/compat.hpp b/src/gallium/state_trackers/clover/util/compat.hpp index 735994f..ea7d3a0 100644 --- a/src/gallium/state_trackers/clover/util/compat.hpp +++ b/src/gallium/state_trackers/clover/util/compat.hpp @@ -411,33 +411,6 @@ namespace clover { private: mutable vector v; }; - - template - struct pair { - pair(T first, S second) : - first(first), second(second) {} - - T first; - S second; - }; - - class exception { - public: - exception() {} - virtual ~exception(); - - virtual const char *what() const; - }; - - class runtime_error : public exception { - public: - runtime_error(const string &what) : _what(what) {} - - virtual const char *what() const; - - protected: - string _what; - }; } } From tstellar at kemper.freedesktop.org Wed Apr 29 13:52:42 2015 From: tstellar at kemper.freedesktop.org (Tom Stellard) Date: Wed, 29 Apr 2015 06:52:42 -0700 (PDT) Subject: Mesa (master): clover: remove compat::string Message-ID: <20150429135242.899B4761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 5ca9b23319db66d9768d46c0a7504b7bb079164a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5ca9b23319db66d9768d46c0a7504b7bb079164a Author: EdB Date: Fri Apr 24 12:59:55 2015 +0200 clover: remove compat::string Acked-by: Francisco Jerez Reviewed-by: Tom Stellard --- src/gallium/state_trackers/clover/api/program.cpp | 2 +- .../state_trackers/clover/core/compiler.hpp | 14 +-- src/gallium/state_trackers/clover/core/error.hpp | 4 +- src/gallium/state_trackers/clover/core/program.cpp | 2 +- .../state_trackers/clover/llvm/invocation.cpp | 22 ++--- .../state_trackers/clover/tgsi/compiler.cpp | 7 +- src/gallium/state_trackers/clover/util/compat.hpp | 104 -------------------- 7 files changed, 26 insertions(+), 129 deletions(-) diff --git a/src/gallium/state_trackers/clover/api/program.cpp b/src/gallium/state_trackers/clover/api/program.cpp index c985690..b3be2b8 100644 --- a/src/gallium/state_trackers/clover/api/program.cpp +++ b/src/gallium/state_trackers/clover/api/program.cpp @@ -216,7 +216,7 @@ clCompileProgram(cl_program d_prog, cl_uint num_devs, throw error(CL_INVALID_OPERATION); if (!any_of(key_equals(name), headers)) - headers.push_back(std::pair( + headers.push_back(std::pair( name, header.source())); }, range(header_names, num_headers), diff --git a/src/gallium/state_trackers/clover/core/compiler.hpp b/src/gallium/state_trackers/clover/core/compiler.hpp index bec8aac..62c0f47 100644 --- a/src/gallium/state_trackers/clover/core/compiler.hpp +++ b/src/gallium/state_trackers/clover/core/compiler.hpp @@ -29,17 +29,17 @@ #include "pipe/p_defines.h" namespace clover { - typedef compat::vector > header_map; + typedef compat::vector > header_map; - module compile_program_llvm(const compat::string &source, + module compile_program_llvm(const std::string &source, const header_map &headers, pipe_shader_ir ir, - const compat::string &target, - const compat::string &opts, - compat::string &r_log); + const std::string &target, + const std::string &opts, + std::string &r_log); - module compile_program_tgsi(const compat::string &source); + module compile_program_tgsi(const std::string &source); } #endif diff --git a/src/gallium/state_trackers/clover/core/error.hpp b/src/gallium/state_trackers/clover/core/error.hpp index 45a38c1..805a0ec 100644 --- a/src/gallium/state_trackers/clover/core/error.hpp +++ b/src/gallium/state_trackers/clover/core/error.hpp @@ -54,7 +54,7 @@ namespace clover { /// class error : public std::runtime_error { public: - error(cl_int code, compat::string what = "") : + error(cl_int code, std::string what = "") : std::runtime_error(what), code(code) { } @@ -68,7 +68,7 @@ namespace clover { class build_error : public error { public: - build_error(const compat::string &what = "") : + build_error(const std::string &what = "") : error(CL_COMPILE_PROGRAM_FAILURE, what) { } }; diff --git a/src/gallium/state_trackers/clover/core/program.cpp b/src/gallium/state_trackers/clover/core/program.cpp index c07548c..50ac01b 100644 --- a/src/gallium/state_trackers/clover/core/program.cpp +++ b/src/gallium/state_trackers/clover/core/program.cpp @@ -52,7 +52,7 @@ program::build(const ref_vector &devs, const char *opts, _opts.insert({ &dev, opts }); - compat::string log; + std::string log; try { auto module = (dev.ir_format() == PIPE_SHADER_IR_TGSI ? diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp index e07d95b..2157909 100644 --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp @@ -136,7 +136,7 @@ namespace { const std::string &name, const std::string &triple, const std::string &processor, const std::string &opts, clang::LangAS::Map& address_spaces, unsigned &optimization_level, - compat::string &r_log) { + std::string &r_log) { clang::CompilerInstance c; clang::EmitLLVMOnlyAction act(&llvm_ctx); @@ -470,7 +470,7 @@ namespace { emit_code(LLVMTargetMachineRef tm, LLVMModuleRef mod, LLVMCodeGenFileType file_type, LLVMMemoryBufferRef *out_buffer, - compat::string &r_log) { + std::string &r_log) { LLVMBool err; char *err_message = NULL; @@ -491,7 +491,7 @@ namespace { std::vector compile_native(const llvm::Module *mod, const std::string &triple, const std::string &processor, unsigned dump_asm, - compat::string &r_log) { + std::string &r_log) { std::string log; LLVMTargetRef target; @@ -545,7 +545,7 @@ namespace { std::map get_kernel_offsets(std::vector &code, const std::vector &kernels, - compat::string &r_log) { + std::string &r_log) { // One of the libelf implementations // (http://www.mr511.de/software/english.htm) requires calling @@ -611,7 +611,7 @@ namespace { const llvm::Module *mod, const std::vector &kernels, const clang::LangAS::Map &address_spaces, - compat::string &r_log) { + std::string &r_log) { std::map kernel_offsets = get_kernel_offsets(code, kernels, r_log); @@ -641,12 +641,12 @@ namespace { void diagnostic_handler(const llvm::DiagnosticInfo &di, void *data) { if (di.getSeverity() == llvm::DS_Error) { - std::string message = *(compat::string*)data; + std::string message = *(std::string*)data; llvm::raw_string_ostream stream(message); llvm::DiagnosticPrinterRawOStream dp(stream); di.print(dp); stream.flush(); - *(compat::string*)data = message; + *(std::string*)data = message; throw build_error(); } @@ -686,12 +686,12 @@ namespace { } // End anonymous namespace module -clover::compile_program_llvm(const compat::string &source, +clover::compile_program_llvm(const std::string &source, const header_map &headers, enum pipe_shader_ir ir, - const compat::string &target, - const compat::string &opts, - compat::string &r_log) { + const std::string &target, + const std::string &opts, + std::string &r_log) { init_targets(); diff --git a/src/gallium/state_trackers/clover/tgsi/compiler.cpp b/src/gallium/state_trackers/clover/tgsi/compiler.cpp index 93dfeb5..9e959f7 100644 --- a/src/gallium/state_trackers/clover/tgsi/compiler.cpp +++ b/src/gallium/state_trackers/clover/tgsi/compiler.cpp @@ -88,11 +88,12 @@ namespace { } module -clover::compile_program_tgsi(const compat::string &source) { - const char *body = source.find("COMP\n"); +clover::compile_program_tgsi(const std::string &source) { + const size_t body_pos = source.find("COMP\n"); + const char *body = &source[body_pos]; module m; - read_header({ source.begin(), body }, m); + read_header({ source.begin(), source.begin() + body_pos }, m); read_body(body, m); return m; diff --git a/src/gallium/state_trackers/clover/util/compat.hpp b/src/gallium/state_trackers/clover/util/compat.hpp index ea7d3a0..dc06334 100644 --- a/src/gallium/state_trackers/clover/util/compat.hpp +++ b/src/gallium/state_trackers/clover/util/compat.hpp @@ -307,110 +307,6 @@ namespace clover { size_t offset; }; - class string { - public: - typedef char *iterator; - typedef const char *const_iterator; - typedef char value_type; - typedef char &reference; - typedef const char &const_reference; - typedef std::ptrdiff_t difference_type; - typedef std::size_t size_type; - - string() : v() { - } - - string(const char *p) : v(p, std::strlen(p)) { - } - - template - string(const C &v) : v(v) { - } - - operator std::string() const { - return std::string(v.begin(), v.end()); - } - - bool - operator==(const string &s) const { - return this->v == s.v; - } - - void - reserve(size_type n) { - v.reserve(n); - } - - void - resize(size_type n, char x = char()) { - v.resize(n, x); - } - - void - push_back(char x) { - v.push_back(x); - } - - size_type - size() const { - return v.size(); - } - - size_type - capacity() const { - return v.capacity(); - } - - iterator - begin() { - return v.begin(); - } - - const_iterator - begin() const { - return v.begin(); - } - - iterator - end() { - return v.end(); - } - - const_iterator - end() const { - return v.end(); - } - - reference - operator[](size_type i) { - return v[i]; - } - - const_reference - operator[](size_type i) const { - return v[i]; - } - - const char * - c_str() const { - v.reserve(size() + 1); - *v.end() = 0; - return v.begin(); - } - - const char * - find(const string &s) const { - for (size_t i = 0; i + s.size() < size(); ++i) { - if (!std::memcmp(begin() + i, s.begin(), s.size())) - return begin() + i; - } - - return end(); - } - - private: - mutable vector v; - }; } } From tstellar at kemper.freedesktop.org Wed Apr 29 13:52:42 2015 From: tstellar at kemper.freedesktop.org (Tom Stellard) Date: Wed, 29 Apr 2015 06:52:42 -0700 (PDT) Subject: Mesa (master): clover: make module::symbol::name a string Message-ID: <20150429135242.947C0761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 2d112ed96152bc62e5417472270f29966c8feece URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=2d112ed96152bc62e5417472270f29966c8feece Author: EdB Date: Fri Apr 24 12:59:56 2015 +0200 clover: make module::symbol::name a string Acked-by: Francisco Jerez Reviewed-by: Tom Stellard --- src/gallium/state_trackers/clover/api/program.cpp | 3 +-- src/gallium/state_trackers/clover/core/module.cpp | 21 +++++++++++++++++++++ src/gallium/state_trackers/clover/core/module.hpp | 4 ++-- 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/src/gallium/state_trackers/clover/api/program.cpp b/src/gallium/state_trackers/clover/api/program.cpp index b3be2b8..913d195 100644 --- a/src/gallium/state_trackers/clover/api/program.cpp +++ b/src/gallium/state_trackers/clover/api/program.cpp @@ -293,8 +293,7 @@ clGetProgramInfo(cl_program d_prog, cl_program_info param, case CL_PROGRAM_KERNEL_NAMES: buf.as_string() = fold([](const std::string &a, const module::symbol &s) { - return ((a.empty() ? "" : a + ";") + - std::string(s.name.begin(), s.name.size())); + return ((a.empty() ? "" : a + ";") + s.name); }, std::string(), prog.symbols()); break; diff --git a/src/gallium/state_trackers/clover/core/module.cpp b/src/gallium/state_trackers/clover/core/module.cpp index be10e35..f098b05 100644 --- a/src/gallium/state_trackers/clover/core/module.cpp +++ b/src/gallium/state_trackers/clover/core/module.cpp @@ -133,6 +133,27 @@ namespace { } }; + /// (De)serialize a string. + template<> + struct _serializer { + static void + proc(compat::ostream &os, const std::string &s) { + _proc(os, s.size()); + os.write(&s[0], s.size() * sizeof(std::string::value_type)); + } + + static void + proc(compat::istream &is, std::string &s) { + s.resize(_proc(is)); + is.read(&s[0], s.size() * sizeof(std::string::value_type)); + } + + static void + proc(module::size_t &sz, const std::string &s) { + sz += sizeof(uint32_t) + sizeof(std::string::value_type) * s.size(); + } + }; + /// (De)serialize a module::section. template<> struct _serializer { diff --git a/src/gallium/state_trackers/clover/core/module.hpp b/src/gallium/state_trackers/clover/core/module.hpp index ee6caf9..46112a3 100644 --- a/src/gallium/state_trackers/clover/core/module.hpp +++ b/src/gallium/state_trackers/clover/core/module.hpp @@ -100,12 +100,12 @@ namespace clover { }; struct symbol { - symbol(const compat::vector &name, resource_id section, + symbol(const std::string &name, resource_id section, size_t offset, const compat::vector &args) : name(name), section(section), offset(offset), args(args) { } symbol() : name(), section(0), offset(0), args() { } - compat::vector name; + std::string name; resource_id section; size_t offset; compat::vector args; From evelikov at kemper.freedesktop.org Wed Apr 29 14:12:08 2015 From: evelikov at kemper.freedesktop.org (Emil Velikov) Date: Wed, 29 Apr 2015 07:12:08 -0700 (PDT) Subject: Mesa (master): r300: do not link against libdrm_intel Message-ID: <20150429141208.21085761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: b124dc2b70a1ba546d1ce46578036d263a4287fe URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b124dc2b70a1ba546d1ce46578036d263a4287fe Author: Emil Velikov Date: Wed Apr 15 14:44:02 2015 +0100 r300: do not link against libdrm_intel Accidentally added since the introduction of the file. Cc: "10.4 10.5" Signed-off-by: Emil Velikov Reviewed-by: Marek Ol??k --- src/gallium/drivers/r300/Automake.inc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/r300/Automake.inc b/src/gallium/drivers/r300/Automake.inc index 9334973..d4ddc40 100644 --- a/src/gallium/drivers/r300/Automake.inc +++ b/src/gallium/drivers/r300/Automake.inc @@ -5,7 +5,7 @@ TARGET_CPPFLAGS += -DGALLIUM_R300 TARGET_LIB_DEPS += \ $(top_builddir)/src/gallium/drivers/r300/libr300.la \ $(RADEON_LIBS) \ - $(INTEL_LIBS) + $(LIBDRM_LIBS) TARGET_RADEON_WINSYS = \ $(top_builddir)/src/gallium/winsys/radeon/drm/libradeonwinsys.la From nroberts at kemper.freedesktop.org Wed Apr 29 14:40:57 2015 From: nroberts at kemper.freedesktop.org (Neil Roberts) Date: Wed, 29 Apr 2015 07:40:57 -0700 (PDT) Subject: Mesa (master): i965: Don' t try to apply the opt_sampler_eot extension for vs Message-ID: <20150429144057.48962761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 5d4f085a43ccd1122301421f2013e42a3f0a7604 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5d4f085a43ccd1122301421f2013e42a3f0a7604 Author: Neil Roberts Date: Tue Apr 28 14:20:06 2015 +0100 i965: Don't try to apply the opt_sampler_eot extension for vs The opt_sampler_eot optimisation of fs_visitor effectively assumes that it is running on a fragment shader because it casts the program key to a brw_wm_prog_key. However on Skylake fs_visitor can also be used for vertex shaders. It looks like this usually works anyway because the optimisation is skipped if key->nr_color_regions != 1. However for a vertex shader the key is actually a brw_vs_prog_key so the space for nr_color_regions is probably taken up by key->base.program_string_id. This can end up making nr_color_regions be 1 in which case the function will later assert when the last instruction is not FS_OPCODE_FB_WRITE. This was making the DEQP test suite assert. Presumably this only happens there because that compiles a lot of shaders so it would end up with a high value for program_string_id. Reviewed-by: Kristian H?gsberg Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 61ee056..255ddf4 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -2548,6 +2548,9 @@ fs_visitor::opt_sampler_eot() { brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; + if (stage != MESA_SHADER_FRAGMENT) + return false; + if (devinfo->gen < 9 && !devinfo->is_cherryview) return false; From tstellar at kemper.freedesktop.org Wed Apr 29 15:31:51 2015 From: tstellar at kemper.freedesktop.org (Tom Stellard) Date: Wed, 29 Apr 2015 08:31:51 -0700 (PDT) Subject: Mesa (master): clover: remove util/compat Message-ID: <20150429153151.68ACF761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: d8f817ae7f4241a9ea23140805aaeb724a0ac851 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d8f817ae7f4241a9ea23140805aaeb724a0ac851 Author: EdB Date: Thu Apr 23 20:13:51 2015 +0200 clover: remove util/compat Acked-by: Francisco Jerez Reviewed-by: Tom Stellard --- src/gallium/state_trackers/clover/Makefile.sources | 1 - src/gallium/state_trackers/clover/api/program.cpp | 14 +- .../state_trackers/clover/core/compiler.hpp | 4 +- src/gallium/state_trackers/clover/core/error.hpp | 2 - src/gallium/state_trackers/clover/core/kernel.cpp | 2 +- src/gallium/state_trackers/clover/core/module.cpp | 39 +-- src/gallium/state_trackers/clover/core/module.hpp | 19 +- src/gallium/state_trackers/clover/core/program.cpp | 2 +- src/gallium/state_trackers/clover/core/program.hpp | 2 +- .../state_trackers/clover/llvm/invocation.cpp | 20 +- .../state_trackers/clover/tgsi/compiler.cpp | 5 +- src/gallium/state_trackers/clover/util/compat.hpp | 313 -------------------- 12 files changed, 56 insertions(+), 367 deletions(-) diff --git a/src/gallium/state_trackers/clover/Makefile.sources b/src/gallium/state_trackers/clover/Makefile.sources index 03eb754..fa96774 100644 --- a/src/gallium/state_trackers/clover/Makefile.sources +++ b/src/gallium/state_trackers/clover/Makefile.sources @@ -45,7 +45,6 @@ CPP_SOURCES := \ util/adaptor.hpp \ util/algebra.hpp \ util/algorithm.hpp \ - util/compat.hpp \ util/factor.hpp \ util/functional.hpp \ util/lazy.hpp \ diff --git a/src/gallium/state_trackers/clover/api/program.cpp b/src/gallium/state_trackers/clover/api/program.cpp index 913d195..e9b1f38 100644 --- a/src/gallium/state_trackers/clover/api/program.cpp +++ b/src/gallium/state_trackers/clover/api/program.cpp @@ -23,6 +23,8 @@ #include "api/util.hpp" #include "core/program.hpp" +#include + using namespace clover; namespace { @@ -94,12 +96,12 @@ clCreateProgramWithBinary(cl_context d_ctx, cl_uint n, return { CL_INVALID_VALUE, {} }; try { - compat::istream::buffer_t bin(p, l); - compat::istream s(bin); + std::stringbuf bin( { (char*)p, l } ); + std::istream s(&bin); return { CL_SUCCESS, module::deserialize(s) }; - } catch (compat::istream::error &e) { + } catch (std::istream::failure &e) { return { CL_INVALID_BINARY, {} }; } }, @@ -279,10 +281,10 @@ clGetProgramInfo(cl_program d_prog, cl_program_info param, case CL_PROGRAM_BINARIES: buf.as_matrix() = map([&](const device &dev) { - compat::ostream::buffer_t bin; - compat::ostream s(bin); + std::stringbuf bin; + std::ostream s(&bin); prog.binary(dev).serialize(s); - return bin; + return bin.str(); }, prog.devices()); break; diff --git a/src/gallium/state_trackers/clover/core/compiler.hpp b/src/gallium/state_trackers/clover/core/compiler.hpp index 62c0f47..c68aa39 100644 --- a/src/gallium/state_trackers/clover/core/compiler.hpp +++ b/src/gallium/state_trackers/clover/core/compiler.hpp @@ -23,14 +23,12 @@ #ifndef CLOVER_CORE_COMPILER_HPP #define CLOVER_CORE_COMPILER_HPP -#include "util/compat.hpp" #include "core/error.hpp" #include "core/module.hpp" #include "pipe/p_defines.h" namespace clover { - typedef compat::vector > header_map; + typedef std::vector > header_map; module compile_program_llvm(const std::string &source, const header_map &headers, diff --git a/src/gallium/state_trackers/clover/core/error.hpp b/src/gallium/state_trackers/clover/core/error.hpp index 805a0ec..eb65d62 100644 --- a/src/gallium/state_trackers/clover/core/error.hpp +++ b/src/gallium/state_trackers/clover/core/error.hpp @@ -27,8 +27,6 @@ #include -#include "util/compat.hpp" - namespace clover { class command_queue; class context; diff --git a/src/gallium/state_trackers/clover/core/kernel.cpp b/src/gallium/state_trackers/clover/core/kernel.cpp index 442762c..0756f06 100644 --- a/src/gallium/state_trackers/clover/core/kernel.cpp +++ b/src/gallium/state_trackers/clover/core/kernel.cpp @@ -192,7 +192,7 @@ kernel::exec_context::bind(intrusive_ptr _q, if (st) _q->pipe->delete_compute_state(_q->pipe, st); - cs.prog = msec.data.begin(); + cs.prog = &(msec.data[0]); cs.req_local_mem = mem_local; cs.req_input_mem = input.size(); st = q->pipe->create_compute_state(q->pipe, &cs); diff --git a/src/gallium/state_trackers/clover/core/module.cpp b/src/gallium/state_trackers/clover/core/module.cpp index f098b05..a6c5b98 100644 --- a/src/gallium/state_trackers/clover/core/module.cpp +++ b/src/gallium/state_trackers/clover/core/module.cpp @@ -21,6 +21,7 @@ // #include +#include #include "core/module.hpp" @@ -33,20 +34,20 @@ namespace { /// Serialize the specified object. template void - _proc(compat::ostream &os, const T &x) { + _proc(std::ostream &os, const T &x) { _serializer::proc(os, x); } /// Deserialize the specified object. template void - _proc(compat::istream &is, T &x) { + _proc(std::istream &is, T &x) { _serializer::proc(is, x); } template T - _proc(compat::istream &is) { + _proc(std::istream &is) { T x; _serializer::proc(is, x); return x; @@ -64,12 +65,12 @@ namespace { struct _serializer::value>::type> { static void - proc(compat::ostream &os, const T &x) { + proc(std::ostream &os, const T &x) { os.write(reinterpret_cast(&x), sizeof(x)); } static void - proc(compat::istream &is, T &x) { + proc(std::istream &is, T &x) { is.read(reinterpret_cast(&x), sizeof(x)); } @@ -81,11 +82,11 @@ namespace { /// (De)serialize a vector. template - struct _serializer, + struct _serializer, typename std::enable_if< !std::is_scalar::value>::type> { static void - proc(compat::ostream &os, const compat::vector &v) { + proc(std::ostream &os, const std::vector &v) { _proc(os, v.size()); for (size_t i = 0; i < v.size(); i++) @@ -93,7 +94,7 @@ namespace { } static void - proc(compat::istream &is, compat::vector &v) { + proc(std::istream &is, std::vector &v) { v.resize(_proc(is)); for (size_t i = 0; i < v.size(); i++) @@ -101,7 +102,7 @@ namespace { } static void - proc(module::size_t &sz, const compat::vector &v) { + proc(module::size_t &sz, const std::vector &v) { sz += sizeof(uint32_t); for (size_t i = 0; i < v.size(); i++) @@ -110,25 +111,25 @@ namespace { }; template - struct _serializer, + struct _serializer, typename std::enable_if< std::is_scalar::value>::type> { static void - proc(compat::ostream &os, const compat::vector &v) { + proc(std::ostream &os, const std::vector &v) { _proc(os, v.size()); - os.write(reinterpret_cast(v.begin()), + os.write(reinterpret_cast(&v[0]), v.size() * sizeof(T)); } static void - proc(compat::istream &is, compat::vector &v) { + proc(std::istream &is, std::vector &v) { v.resize(_proc(is)); - is.read(reinterpret_cast(v.begin()), + is.read(reinterpret_cast(&v[0]), v.size() * sizeof(T)); } static void - proc(module::size_t &sz, const compat::vector &v) { + proc(module::size_t &sz, const std::vector &v) { sz += sizeof(uint32_t) + sizeof(T) * v.size(); } }; @@ -137,13 +138,13 @@ namespace { template<> struct _serializer { static void - proc(compat::ostream &os, const std::string &s) { + proc(std::ostream &os, const std::string &s) { _proc(os, s.size()); os.write(&s[0], s.size() * sizeof(std::string::value_type)); } static void - proc(compat::istream &is, std::string &s) { + proc(std::istream &is, std::string &s) { s.resize(_proc(is)); is.read(&s[0], s.size() * sizeof(std::string::value_type)); } @@ -209,12 +210,12 @@ namespace { namespace clover { void - module::serialize(compat::ostream &os) const { + module::serialize(std::ostream &os) const { _proc(os, *this); } module - module::deserialize(compat::istream &is) { + module::deserialize(std::istream &is) { return _proc(is); } diff --git a/src/gallium/state_trackers/clover/core/module.hpp b/src/gallium/state_trackers/clover/core/module.hpp index 46112a3..9d65688 100644 --- a/src/gallium/state_trackers/clover/core/module.hpp +++ b/src/gallium/state_trackers/clover/core/module.hpp @@ -23,7 +23,8 @@ #ifndef CLOVER_CORE_MODULE_HPP #define CLOVER_CORE_MODULE_HPP -#include "util/compat.hpp" +#include +#include namespace clover { struct module { @@ -40,14 +41,14 @@ namespace clover { }; section(resource_id id, enum type type, size_t size, - const compat::vector &data) : + const std::vector &data) : id(id), type(type), size(size), data(data) { } section() : id(0), type(text), size(0), data() { } resource_id id; type type; size_t size; - compat::vector data; + std::vector data; }; struct argument { @@ -101,22 +102,22 @@ namespace clover { struct symbol { symbol(const std::string &name, resource_id section, - size_t offset, const compat::vector &args) : + size_t offset, const std::vector &args) : name(name), section(section), offset(offset), args(args) { } symbol() : name(), section(0), offset(0), args() { } std::string name; resource_id section; size_t offset; - compat::vector args; + std::vector args; }; - void serialize(compat::ostream &os) const; - static module deserialize(compat::istream &is); + void serialize(std::ostream &os) const; + static module deserialize(std::istream &is); size_t size() const; - compat::vector syms; - compat::vector
      secs; + std::vector syms; + std::vector
      secs; }; } diff --git a/src/gallium/state_trackers/clover/core/program.cpp b/src/gallium/state_trackers/clover/core/program.cpp index 50ac01b..0d6cc40 100644 --- a/src/gallium/state_trackers/clover/core/program.cpp +++ b/src/gallium/state_trackers/clover/core/program.cpp @@ -106,7 +106,7 @@ program::build_log(const device &dev) const { return _logs.count(&dev) ? _logs.find(&dev)->second : ""; } -const compat::vector & +const std::vector & program::symbols() const { if (_binaries.empty()) throw error(CL_INVALID_PROGRAM_EXECUTABLE); diff --git a/src/gallium/state_trackers/clover/core/program.hpp b/src/gallium/state_trackers/clover/core/program.hpp index 661fa03..183145e 100644 --- a/src/gallium/state_trackers/clover/core/program.hpp +++ b/src/gallium/state_trackers/clover/core/program.hpp @@ -60,7 +60,7 @@ namespace clover { std::string build_opts(const device &dev) const; std::string build_log(const device &dev) const; - const compat::vector &symbols() const; + const std::vector &symbols() const; unsigned kernel_ref_count() const; diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp index 2157909..7d2d941 100644 --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp @@ -340,11 +340,11 @@ namespace { PM.run(*mod); } - compat::vector + std::vector get_kernel_args(const llvm::Module *mod, const std::string &kernel_name, const clang::LangAS::Map &address_spaces) { - compat::vector args; + std::vector args; llvm::Function *kernel_func = mod->getFunction(kernel_name); llvm::DataLayout TD(mod); @@ -449,15 +449,16 @@ namespace { for (unsigned i = 0; i < kernels.size(); ++i) { std::string kernel_name = kernels[i]->getName(); - compat::vector args = + std::vector args = get_kernel_args(mod, kernel_name, address_spaces); m.syms.push_back(module::symbol(kernel_name, 0, i, args )); } header.num_bytes = llvm_bitcode.size(); - std::string data; - data.insert(0, (char*)(&header), sizeof(header)); + std::vector data; + data.insert(data.end(), (char*)(&header), + (char*)(&header) + sizeof(header)); data.insert(data.end(), llvm_bitcode.begin(), llvm_bitcode.end()); m.secs.push_back(module::section(0, module::section::text, @@ -622,15 +623,16 @@ namespace { // Store the generated ELF binary in the module's text section. header.num_bytes = code.size(); - std::string data; - data.append((char*)(&header), sizeof(header)); - data.append(code.begin(), code.end()); + std::vector data; + data.insert(data.end(), (char*)(&header), + (char*)(&header) + sizeof(header)); + data.insert(data.end(), code.begin(), code.end()); m.secs.push_back(module::section(0, module::section::text, header.num_bytes, data)); for (std::map::iterator i = kernel_offsets.begin(), e = kernel_offsets.end(); i != e; ++i) { - compat::vector args = + std::vector args = get_kernel_args(mod, i->first, address_spaces); m.syms.push_back(module::symbol(i->first, 0, i->second, args )); } diff --git a/src/gallium/state_trackers/clover/tgsi/compiler.cpp b/src/gallium/state_trackers/clover/tgsi/compiler.cpp index 9e959f7..b70104e 100644 --- a/src/gallium/state_trackers/clover/tgsi/compiler.cpp +++ b/src/gallium/state_trackers/clover/tgsi/compiler.cpp @@ -40,7 +40,7 @@ namespace { std::istringstream ts(line); std::string name, tok; module::size_t offset; - compat::vector args; + std::vector args; if (!(ts >> name)) continue; @@ -83,7 +83,8 @@ namespace { throw build_error("translate failed"); unsigned sz = tgsi_num_tokens(prog) * sizeof(tgsi_token); - m.secs.push_back({ 0, module::section::text, sz, { (char *)prog, sz } }); + std::vector data( (char *)prog, (char *)prog + sz ); + m.secs.push_back({ 0, module::section::text, sz, data }); } } diff --git a/src/gallium/state_trackers/clover/util/compat.hpp b/src/gallium/state_trackers/clover/util/compat.hpp deleted file mode 100644 index dc06334..0000000 --- a/src/gallium/state_trackers/clover/util/compat.hpp +++ /dev/null @@ -1,313 +0,0 @@ -// -// Copyright 2012 Francisco Jerez -// -// Permission is hereby granted, free of charge, to any person obtaining a -// copy of this software and associated documentation files (the "Software"), -// to deal in the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. -// - -#ifndef CLOVER_UTIL_COMPAT_HPP -#define CLOVER_UTIL_COMPAT_HPP - -#include -#include -#include -#include -#include - -namespace clover { - namespace compat { - // XXX - For cases where we can't rely on STL... I.e. the - // interface between code compiled as C++98 and C++11 - // source. Get rid of this as soon as everything can be - // compiled as C++11. - - namespace detail { - template - bool - ranges_equal(const R &a, const S &b) { - if (a.size() != b.size()) - return false; - - for (size_t i = 0; i < a.size(); ++i) - if (a[i] != b[i]) - return false; - - return true; - } - } - - template - class vector { - protected: - static T * - alloc(int n, const T *q, int m) { - T *p = reinterpret_cast(std::malloc(n * sizeof(T))); - - for (int i = 0; i < m; ++i) - new(&p[i]) T(q[i]); - - return p; - } - - static void - free(int n, T *p) { - for (int i = 0; i < n; ++i) - p[i].~T(); - - std::free(p); - } - - public: - typedef T *iterator; - typedef const T *const_iterator; - typedef T value_type; - typedef T &reference; - typedef const T &const_reference; - typedef std::ptrdiff_t difference_type; - typedef std::size_t size_type; - - vector() : p(NULL), _size(0), _capacity(0) { - } - - vector(const vector &v) : - p(alloc(v._size, v.p, v._size)), - _size(v._size), _capacity(v._size) { - } - - vector(const_iterator p, size_type n) : - p(alloc(n, p, n)), _size(n), _capacity(n) { - } - - template - vector(const C &v) : - p(alloc(v.size(), NULL, 0)), _size(0), - _capacity(v.size()) { - for (typename C::const_iterator it = v.begin(); it != v.end(); ++it) - new(&p[_size++]) T(*it); - } - - ~vector() { - free(_size, p); - } - - vector & - operator=(const vector &v) { - free(_size, p); - - p = alloc(v._size, v.p, v._size); - _size = v._size; - _capacity = v._size; - - return *this; - } - - bool - operator==(const vector &v) const { - return detail::ranges_equal(*this, v); - } - - void - reserve(size_type n) { - if (_capacity < n) { - T *q = alloc(n, p, _size); - free(_size, p); - - p = q; - _capacity = n; - } - } - - void - resize(size_type n, T x = T()) { - if (n <= _size) { - for (size_type i = n; i < _size; ++i) - p[i].~T(); - - } else { - reserve(n); - - for (size_type i = _size; i < n; ++i) - new(&p[i]) T(x); - } - - _size = n; - } - - void - push_back(const T &x) { - reserve(_size + 1); - new(&p[_size]) T(x); - ++_size; - } - - size_type - size() const { - return _size; - } - - size_type - capacity() const { - return _capacity; - } - - iterator - begin() { - return p; - } - - const_iterator - begin() const { - return p; - } - - iterator - end() { - return p + _size; - } - - const_iterator - end() const { - return p + _size; - } - - reference - operator[](size_type i) { - return p[i]; - } - - const_reference - operator[](size_type i) const { - return p[i]; - } - - private: - iterator p; - size_type _size; - size_type _capacity; - }; - - template - class vector_ref { - public: - typedef T *iterator; - typedef const T *const_iterator; - typedef T value_type; - typedef T &reference; - typedef const T &const_reference; - typedef std::ptrdiff_t difference_type; - typedef std::size_t size_type; - - vector_ref(iterator p, size_type n) : p(p), n(n) { - } - - template - vector_ref(C &v) : p(&*v.begin()), n(v.size()) { - } - - bool - operator==(const vector_ref &v) const { - return detail::ranges_equal(*this, v); - } - - size_type - size() const { - return n; - } - - iterator - begin() { - return p; - } - - const_iterator - begin() const { - return p; - } - - iterator - end() { - return p + n; - } - - const_iterator - end() const { - return p + n; - } - - reference - operator[](int i) { - return p[i]; - } - - const_reference - operator[](int i) const { - return p[i]; - } - - private: - iterator p; - size_type n; - }; - - class istream { - public: - typedef vector_ref buffer_t; - - class error { - public: - virtual ~error() {} - }; - - istream(const buffer_t &buf) : buf(buf), offset(0) {} - - void - read(char *p, size_t n) { - if (offset + n > buf.size()) - throw error(); - - std::memcpy(p, buf.begin() + offset, n); - offset += n; - } - - private: - const buffer_t &buf; - size_t offset; - }; - - class ostream { - public: - typedef vector buffer_t; - - ostream(buffer_t &buf) : buf(buf), offset(buf.size()) {} - - void - write(const char *p, size_t n) { - buf.resize(offset + n); - std::memcpy(buf.begin() + offset, p, n); - offset += n; - } - - private: - buffer_t &buf; - size_t offset; - }; - - } -} - -#endif From mareko at kemper.freedesktop.org Wed Apr 29 20:05:24 2015 From: mareko at kemper.freedesktop.org (Marek Olšák) Date: Wed, 29 Apr 2015 13:05:24 -0700 (PDT) Subject: Mesa (master): winsys/radeon: move radeon_winsys.h to drivers/radeon Message-ID: <20150429200524.1375B761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: dcfbc006b6b07d41338b87c64cdc01c36608087b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=dcfbc006b6b07d41338b87c64cdc01c36608087b Author: Marek Ol??k Date: Thu Apr 16 22:50:33 2015 +0200 winsys/radeon: move radeon_winsys.h to drivers/radeon --- src/gallium/auxiliary/target-helpers/inline_drm_helper.h | 6 +++--- src/gallium/drivers/r300/r300_chipset.c | 2 +- src/gallium/drivers/r300/r300_context.h | 1 - src/gallium/drivers/r300/r300_screen.h | 2 +- src/gallium/drivers/radeon/Makefile.sources | 3 ++- src/gallium/drivers/radeon/r600_pipe_common.h | 2 +- src/gallium/drivers/radeon/radeon_uvd.c | 1 - src/gallium/drivers/radeon/radeon_uvd.h | 2 +- src/gallium/drivers/radeon/radeon_vce.c | 1 - src/gallium/drivers/radeon/radeon_vce_40_2_2.c | 1 - src/gallium/drivers/radeon/radeon_video.c | 1 - src/gallium/drivers/radeon/radeon_video.h | 2 +- .../{winsys/radeon/drm => drivers/radeon}/radeon_winsys.h | 0 src/gallium/drivers/radeonsi/si_pm4.h | 2 +- src/gallium/targets/pipe-loader/pipe_r300.c | 2 +- src/gallium/targets/pipe-loader/pipe_r600.c | 2 +- src/gallium/targets/pipe-loader/pipe_radeonsi.c | 2 +- src/gallium/winsys/radeon/drm/Makefile.sources | 3 +-- src/gallium/winsys/radeon/drm/radeon_drm_winsys.h | 2 +- 19 files changed, 16 insertions(+), 21 deletions(-) diff --git a/src/gallium/auxiliary/target-helpers/inline_drm_helper.h b/src/gallium/auxiliary/target-helpers/inline_drm_helper.h index 542ad43..d3c331d 100644 --- a/src/gallium/auxiliary/target-helpers/inline_drm_helper.h +++ b/src/gallium/auxiliary/target-helpers/inline_drm_helper.h @@ -28,19 +28,19 @@ #endif #if GALLIUM_R300 -#include "radeon/drm/radeon_winsys.h" +#include "radeon/radeon_winsys.h" #include "radeon/drm/radeon_drm_public.h" #include "r300/r300_public.h" #endif #if GALLIUM_R600 -#include "radeon/drm/radeon_winsys.h" +#include "radeon/radeon_winsys.h" #include "radeon/drm/radeon_drm_public.h" #include "r600/r600_public.h" #endif #if GALLIUM_RADEONSI -#include "radeon/drm/radeon_winsys.h" +#include "radeon/radeon_winsys.h" #include "radeon/drm/radeon_drm_public.h" #include "radeonsi/si_public.h" #endif diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index 7a83611..c1c7ce3 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -22,7 +22,7 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "r300_chipset.h" -#include "radeon/drm/radeon_winsys.h" +#include "radeon/radeon_winsys.h" #include "util/u_debug.h" #include "util/u_memory.h" diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 4d2b153..3873c9a 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -36,7 +36,6 @@ #include "r300_defines.h" #include "r300_screen.h" #include "compiler/radeon_regalloc.h" -#include "radeon/drm/radeon_winsys.h" struct u_upload_mgr; struct r300_context; diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index f0dd3c6..7bba39b 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -25,7 +25,7 @@ #define R300_SCREEN_H #include "r300_chipset.h" -#include "radeon/drm/radeon_winsys.h" +#include "radeon/radeon_winsys.h" #include "pipe/p_screen.h" #include "util/u_slab.h" #include "os/os_thread.h" diff --git a/src/gallium/drivers/radeon/Makefile.sources b/src/gallium/drivers/radeon/Makefile.sources index 469f6d1..c655fe5 100644 --- a/src/gallium/drivers/radeon/Makefile.sources +++ b/src/gallium/drivers/radeon/Makefile.sources @@ -15,7 +15,8 @@ C_SOURCES := \ radeon_vce.c \ radeon_vce.h \ radeon_video.c \ - radeon_video.h + radeon_video.h \ + radeon_winsys.h LLVM_C_FILES := \ radeon_elf_util.c \ diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index faa6e0d..f1c9503 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -34,7 +34,7 @@ #include -#include "radeon/drm/radeon_winsys.h" +#include "radeon/radeon_winsys.h" #include "util/u_blitter.h" #include "util/u_double_list.h" diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c index 9668d7d..4d4b54b 100644 --- a/src/gallium/drivers/radeon/radeon_uvd.c +++ b/src/gallium/drivers/radeon/radeon_uvd.c @@ -45,7 +45,6 @@ #include "vl/vl_defines.h" #include "vl/vl_mpeg12_decoder.h" -#include "radeon/drm/radeon_winsys.h" #include "r600_pipe_common.h" #include "radeon_video.h" #include "radeon_uvd.h" diff --git a/src/gallium/drivers/radeon/radeon_uvd.h b/src/gallium/drivers/radeon/radeon_uvd.h index 462b101..41a6fb4 100644 --- a/src/gallium/drivers/radeon/radeon_uvd.h +++ b/src/gallium/drivers/radeon/radeon_uvd.h @@ -34,7 +34,7 @@ #ifndef RADEON_UVD_H #define RADEON_UVD_H -#include "radeon/drm/radeon_winsys.h" +#include "radeon/radeon_winsys.h" #include "vl/vl_video_buffer.h" /* UVD uses PM4 packet type 0 and 2 */ diff --git a/src/gallium/drivers/radeon/radeon_vce.c b/src/gallium/drivers/radeon/radeon_vce.c index 6d34bd3..5f710e6 100644 --- a/src/gallium/drivers/radeon/radeon_vce.c +++ b/src/gallium/drivers/radeon/radeon_vce.c @@ -40,7 +40,6 @@ #include "vl/vl_video_buffer.h" -#include "radeon/drm/radeon_winsys.h" #include "r600_pipe_common.h" #include "radeon_video.h" #include "radeon_vce.h" diff --git a/src/gallium/drivers/radeon/radeon_vce_40_2_2.c b/src/gallium/drivers/radeon/radeon_vce_40_2_2.c index b176aa7..0902957 100644 --- a/src/gallium/drivers/radeon/radeon_vce_40_2_2.c +++ b/src/gallium/drivers/radeon/radeon_vce_40_2_2.c @@ -40,7 +40,6 @@ #include "vl/vl_video_buffer.h" -#include "radeon/drm/radeon_winsys.h" #include "r600_pipe_common.h" #include "radeon_video.h" #include "radeon_vce.h" diff --git a/src/gallium/drivers/radeon/radeon_video.c b/src/gallium/drivers/radeon/radeon_video.c index 1420798..6ec10c1 100644 --- a/src/gallium/drivers/radeon/radeon_video.c +++ b/src/gallium/drivers/radeon/radeon_video.c @@ -39,7 +39,6 @@ #include "vl/vl_defines.h" #include "vl/vl_video_buffer.h" -#include "radeon/drm/radeon_winsys.h" #include "r600_pipe_common.h" #include "radeon_video.h" #include "radeon_vce.h" diff --git a/src/gallium/drivers/radeon/radeon_video.h b/src/gallium/drivers/radeon/radeon_video.h index 974ea4f..6d0ff28 100644 --- a/src/gallium/drivers/radeon/radeon_video.h +++ b/src/gallium/drivers/radeon/radeon_video.h @@ -34,7 +34,7 @@ #ifndef RADEON_VIDEO_H #define RADEON_VIDEO_H -#include "radeon/drm/radeon_winsys.h" +#include "radeon/radeon_winsys.h" #include "vl/vl_video_buffer.h" #define RVID_ERR(fmt, args...) \ diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h similarity index 100% rename from src/gallium/winsys/radeon/drm/radeon_winsys.h rename to src/gallium/drivers/radeon/radeon_winsys.h diff --git a/src/gallium/drivers/radeonsi/si_pm4.h b/src/gallium/drivers/radeonsi/si_pm4.h index bfb5562..d215882 100644 --- a/src/gallium/drivers/radeonsi/si_pm4.h +++ b/src/gallium/drivers/radeonsi/si_pm4.h @@ -27,7 +27,7 @@ #ifndef SI_PM4_H #define SI_PM4_H -#include "radeon/drm/radeon_winsys.h" +#include "radeon/radeon_winsys.h" #define SI_PM4_MAX_DW 256 #define SI_PM4_MAX_BO 32 diff --git a/src/gallium/targets/pipe-loader/pipe_r300.c b/src/gallium/targets/pipe-loader/pipe_r300.c index abcade4..368b8c2 100644 --- a/src/gallium/targets/pipe-loader/pipe_r300.c +++ b/src/gallium/targets/pipe-loader/pipe_r300.c @@ -1,7 +1,7 @@ #include "target-helpers/inline_debug_helper.h" #include "state_tracker/drm_driver.h" #include "radeon/drm/radeon_drm_public.h" -#include "radeon/drm/radeon_winsys.h" +#include "radeon/radeon_winsys.h" #include "r300/r300_public.h" static struct pipe_screen * diff --git a/src/gallium/targets/pipe-loader/pipe_r600.c b/src/gallium/targets/pipe-loader/pipe_r600.c index eb53637..65b11c8 100644 --- a/src/gallium/targets/pipe-loader/pipe_r600.c +++ b/src/gallium/targets/pipe-loader/pipe_r600.c @@ -1,7 +1,7 @@ #include "state_tracker/drm_driver.h" #include "target-helpers/inline_debug_helper.h" #include "radeon/drm/radeon_drm_public.h" -#include "radeon/drm/radeon_winsys.h" +#include "radeon/radeon_winsys.h" #include "r600/r600_public.h" static struct pipe_screen * diff --git a/src/gallium/targets/pipe-loader/pipe_radeonsi.c b/src/gallium/targets/pipe-loader/pipe_radeonsi.c index 1dcd781..5457b5b 100644 --- a/src/gallium/targets/pipe-loader/pipe_radeonsi.c +++ b/src/gallium/targets/pipe-loader/pipe_radeonsi.c @@ -1,7 +1,7 @@ #include "state_tracker/drm_driver.h" #include "target-helpers/inline_debug_helper.h" #include "radeon/drm/radeon_drm_public.h" -#include "radeon/drm/radeon_winsys.h" +#include "radeon/radeon_winsys.h" #include "radeonsi/si_public.h" static struct pipe_screen * diff --git a/src/gallium/winsys/radeon/drm/Makefile.sources b/src/gallium/winsys/radeon/drm/Makefile.sources index d30969e..87a04fa 100644 --- a/src/gallium/winsys/radeon/drm/Makefile.sources +++ b/src/gallium/winsys/radeon/drm/Makefile.sources @@ -6,8 +6,7 @@ C_SOURCES := \ radeon_drm_cs.h \ radeon_drm_public.h \ radeon_drm_winsys.c \ - radeon_drm_winsys.h \ - radeon_winsys.h + radeon_drm_winsys.h TOOLS_HDR := \ radeon_ctx.h diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h index 5711ffa..ea475c8 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h @@ -30,7 +30,7 @@ #ifndef RADEON_DRM_WINSYS_H #define RADEON_DRM_WINSYS_H -#include "radeon_winsys.h" +#include "gallium/drivers/radeon/radeon_winsys.h" #include "os/os_thread.h" #include From mareko at kemper.freedesktop.org Wed Apr 29 20:05:24 2015 From: mareko at kemper.freedesktop.org (Marek Olšák) Date: Wed, 29 Apr 2015 13:05:24 -0700 (PDT) Subject: Mesa (master): winsys/radeon: add a private interface for radeon_surface Message-ID: <20150429200524.1EC37761EA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: a582b22c6382f24d921e9fe8a24917100c1396f1 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a582b22c6382f24d921e9fe8a24917100c1396f1 Author: Marek Ol??k Date: Thu Apr 16 22:53:04 2015 +0200 winsys/radeon: add a private interface for radeon_surface --- src/gallium/drivers/r600/evergreen_state.c | 6 +- src/gallium/drivers/r600/r600_uvd.c | 2 +- src/gallium/drivers/radeon/r600_pipe_common.h | 2 +- src/gallium/drivers/radeon/r600_texture.c | 12 +- src/gallium/drivers/radeon/radeon_uvd.c | 6 +- src/gallium/drivers/radeon/radeon_uvd.h | 4 +- src/gallium/drivers/radeon/radeon_vce.c | 2 +- src/gallium/drivers/radeon/radeon_vce.h | 6 +- src/gallium/drivers/radeon/radeon_video.c | 2 +- src/gallium/drivers/radeon/radeon_video.h | 2 +- src/gallium/drivers/radeon/radeon_winsys.h | 79 ++++++++- src/gallium/drivers/radeonsi/si_state.c | 4 +- src/gallium/drivers/radeonsi/si_uvd.c | 4 +- src/gallium/winsys/radeon/drm/Makefile.sources | 1 + src/gallium/winsys/radeon/drm/radeon_drm_surface.c | 180 ++++++++++++++++++++ src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 20 +-- src/gallium/winsys/radeon/drm/radeon_drm_winsys.h | 1 + 17 files changed, 286 insertions(+), 47 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 266e372..4ddbc0b 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -664,7 +664,7 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx, unsigned height, depth, width; unsigned macro_aspect, tile_split, bankh, bankw, nbanks, fmask_bankh; enum pipe_format pipe_format = state->format; - struct radeon_surface_level *surflevel; + struct radeon_surf_level *surflevel; unsigned base_level, first_level, last_level; uint64_t va; @@ -918,7 +918,7 @@ static void evergreen_emit_scissor_state(struct r600_context *rctx, struct r600_ /** * This function intializes the CB* register values for RATs. It is meant * to be used for 1D aligned buffers that do not have an associated - * radeon_surface. + * radeon_surf. */ void evergreen_init_color_surface_rat(struct r600_context *rctx, struct r600_surface *surf) @@ -1163,7 +1163,7 @@ static void evergreen_init_depth_surface(struct r600_context *rctx, struct r600_screen *rscreen = rctx->screen; struct r600_texture *rtex = (struct r600_texture*)surf->base.texture; unsigned level = surf->base.u.tex.level; - struct radeon_surface_level *levelinfo = &rtex->surface.level[level]; + struct radeon_surf_level *levelinfo = &rtex->surface.level[level]; uint64_t offset; unsigned format, array_mode; unsigned macro_aspect, tile_split, bankh, bankw, nbanks; diff --git a/src/gallium/drivers/r600/r600_uvd.c b/src/gallium/drivers/r600/r600_uvd.c index ee5288f..357e901 100644 --- a/src/gallium/drivers/r600/r600_uvd.c +++ b/src/gallium/drivers/r600/r600_uvd.c @@ -57,7 +57,7 @@ struct pipe_video_buffer *r600_video_buffer_create(struct pipe_context *pipe, { struct r600_context *ctx = (struct r600_context *)pipe; struct r600_texture *resources[VL_NUM_COMPONENTS] = {}; - struct radeon_surface* surfaces[VL_NUM_COMPONENTS] = {}; + struct radeon_surf* surfaces[VL_NUM_COMPONENTS] = {}; struct pb_buffer **pbs[VL_NUM_COMPONENTS] = {}; const enum pipe_format *resource_formats; struct pipe_video_buffer template; diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index f1c9503..8d885ab 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -199,7 +199,7 @@ struct r600_texture { unsigned dirty_level_mask; /* each bit says if that mipmap is compressed */ struct r600_texture *flushed_depth_texture; boolean is_flushing_texture; - struct radeon_surface surface; + struct radeon_surf surface; /* Colorbuffer compression and fast clear. */ struct r600_fmask_info fmask; diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c index ab8ce7b..dc510c9 100644 --- a/src/gallium/drivers/radeon/r600_texture.c +++ b/src/gallium/drivers/radeon/r600_texture.c @@ -119,7 +119,7 @@ static unsigned r600_texture_get_offset(struct r600_texture *rtex, unsigned leve } static int r600_init_surface(struct r600_common_screen *rscreen, - struct radeon_surface *surface, + struct radeon_surf *surface, const struct pipe_resource *ptex, unsigned array_mode, bool is_flushed_depth) @@ -234,7 +234,7 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen, { struct r600_texture *rtex = (struct r600_texture*)ptex; struct r600_resource *resource = &rtex->resource; - struct radeon_surface *surface = &rtex->surface; + struct radeon_surf *surface = &rtex->surface; struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; rscreen->ws->buffer_set_tiling(resource->buf, @@ -280,7 +280,7 @@ void r600_texture_get_fmask_info(struct r600_common_screen *rscreen, struct r600_fmask_info *out) { /* FMASK is allocated like an ordinary texture. */ - struct radeon_surface fmask = rtex->surface; + struct radeon_surf fmask = rtex->surface; memset(out, 0, sizeof(*out)); @@ -570,7 +570,7 @@ r600_texture_create_object(struct pipe_screen *screen, const struct pipe_resource *base, unsigned pitch_in_bytes_override, struct pb_buffer *buf, - struct radeon_surface *surface) + struct radeon_surf *surface) { struct r600_texture *rtex; struct r600_resource *resource; @@ -764,7 +764,7 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen, const struct pipe_resource *templ) { struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; - struct radeon_surface surface = {0}; + struct radeon_surf surface = {0}; int r; r = r600_init_surface(rscreen, &surface, templ, @@ -790,7 +790,7 @@ static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen unsigned stride = 0; unsigned array_mode; enum radeon_bo_layout micro, macro; - struct radeon_surface surface; + struct radeon_surf surface; bool scanout; int r; diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c index 4d4b54b..be58d0b 100644 --- a/src/gallium/drivers/radeon/radeon_uvd.c +++ b/src/gallium/drivers/radeon/radeon_uvd.c @@ -870,7 +870,7 @@ error: } /* calculate top/bottom offset */ -static unsigned texture_offset(struct radeon_surface *surface, unsigned layer) +static unsigned texture_offset(struct radeon_surf *surface, unsigned layer) { return surface->level[0].offset + layer * surface->level[0].slice_size; @@ -905,8 +905,8 @@ static unsigned bank_wh(unsigned bankwh) /** * fill decoding target field from the luma and chroma surfaces */ -void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surface *luma, - struct radeon_surface *chroma) +void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma, + struct radeon_surf *chroma) { msg->body.decode.dt_pitch = luma->level[0].pitch_bytes; switch (luma->level[0].mode) { diff --git a/src/gallium/drivers/radeon/radeon_uvd.h b/src/gallium/drivers/radeon/radeon_uvd.h index 41a6fb4..7442865 100644 --- a/src/gallium/drivers/radeon/radeon_uvd.h +++ b/src/gallium/drivers/radeon/radeon_uvd.h @@ -353,6 +353,6 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context, ruvd_set_dtb set_dtb); /* fill decoding target field from the luma and chroma surfaces */ -void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surface *luma, - struct radeon_surface *chroma); +void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma, + struct radeon_surf *chroma); #endif diff --git a/src/gallium/drivers/radeon/radeon_vce.c b/src/gallium/drivers/radeon/radeon_vce.c index 5f710e6..e220f40 100644 --- a/src/gallium/drivers/radeon/radeon_vce.c +++ b/src/gallium/drivers/radeon/radeon_vce.c @@ -337,7 +337,7 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context, struct r600_common_screen *rscreen = (struct r600_common_screen *)context->screen; struct rvce_encoder *enc; struct pipe_video_buffer *tmp_buf, templat = {}; - struct radeon_surface *tmp_surf; + struct radeon_surf *tmp_surf; unsigned cpb_size; if (!rscreen->info.vce_fw_version) { diff --git a/src/gallium/drivers/radeon/radeon_vce.h b/src/gallium/drivers/radeon/radeon_vce.h index 7f0cd1f..5c6317a 100644 --- a/src/gallium/drivers/radeon/radeon_vce.h +++ b/src/gallium/drivers/radeon/radeon_vce.h @@ -50,7 +50,7 @@ struct r600_common_screen; /* driver dependent callback */ typedef void (*rvce_get_buffer)(struct pipe_resource *resource, struct radeon_winsys_cs_handle **handle, - struct radeon_surface **surface); + struct radeon_surf **surface); /* Coded picture buffer slot */ struct rvce_cpb_slot { @@ -88,8 +88,8 @@ struct rvce_encoder { rvce_get_buffer get_buffer; struct radeon_winsys_cs_handle* handle; - struct radeon_surface* luma; - struct radeon_surface* chroma; + struct radeon_surf* luma; + struct radeon_surf* chroma; struct radeon_winsys_cs_handle* bs_handle; unsigned bs_size; diff --git a/src/gallium/drivers/radeon/radeon_video.c b/src/gallium/drivers/radeon/radeon_video.c index 6ec10c1..826e076 100644 --- a/src/gallium/drivers/radeon/radeon_video.c +++ b/src/gallium/drivers/radeon/radeon_video.c @@ -132,7 +132,7 @@ void rvid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer) */ void rvid_join_surfaces(struct radeon_winsys* ws, unsigned bind, struct pb_buffer** buffers[VL_NUM_COMPONENTS], - struct radeon_surface *surfaces[VL_NUM_COMPONENTS]) + struct radeon_surf *surfaces[VL_NUM_COMPONENTS]) { unsigned best_tiling, best_wh, off; unsigned size, alignment; diff --git a/src/gallium/drivers/radeon/radeon_video.h b/src/gallium/drivers/radeon/radeon_video.h index 6d0ff28..c9ee67f 100644 --- a/src/gallium/drivers/radeon/radeon_video.h +++ b/src/gallium/drivers/radeon/radeon_video.h @@ -68,7 +68,7 @@ void rvid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer) sumup their sizes and replace the backend buffers with a single bo */ void rvid_join_surfaces(struct radeon_winsys* ws, unsigned bind, struct pb_buffer** buffers[VL_NUM_COMPONENTS], - struct radeon_surface *surfaces[VL_NUM_COMPONENTS]); + struct radeon_surf *surfaces[VL_NUM_COMPONENTS]); /* returns supported codecs and other parameters */ int rvid_get_video_param(struct pipe_screen *screen, diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h index ee0a904..3bfbb6d 100644 --- a/src/gallium/drivers/radeon/radeon_winsys.h +++ b/src/gallium/drivers/radeon/radeon_winsys.h @@ -41,7 +41,6 @@ */ #include "pipebuffer/pb_buffer.h" -#include "radeon_surface.h" #define RADEON_MAX_CMDBUF_DWORDS (16 * 1024) @@ -246,6 +245,80 @@ enum radeon_feature_id { RADEON_FID_R300_CMASK_ACCESS, }; +#define RADEON_SURF_MAX_LEVEL 32 + +#define RADEON_SURF_TYPE_MASK 0xFF +#define RADEON_SURF_TYPE_SHIFT 0 +#define RADEON_SURF_TYPE_1D 0 +#define RADEON_SURF_TYPE_2D 1 +#define RADEON_SURF_TYPE_3D 2 +#define RADEON_SURF_TYPE_CUBEMAP 3 +#define RADEON_SURF_TYPE_1D_ARRAY 4 +#define RADEON_SURF_TYPE_2D_ARRAY 5 +#define RADEON_SURF_MODE_MASK 0xFF +#define RADEON_SURF_MODE_SHIFT 8 +#define RADEON_SURF_MODE_LINEAR 0 +#define RADEON_SURF_MODE_LINEAR_ALIGNED 1 +#define RADEON_SURF_MODE_1D 2 +#define RADEON_SURF_MODE_2D 3 +#define RADEON_SURF_SCANOUT (1 << 16) +#define RADEON_SURF_ZBUFFER (1 << 17) +#define RADEON_SURF_SBUFFER (1 << 18) +#define RADEON_SURF_Z_OR_SBUFFER (RADEON_SURF_ZBUFFER | RADEON_SURF_SBUFFER) +#define RADEON_SURF_HAS_SBUFFER_MIPTREE (1 << 19) +#define RADEON_SURF_HAS_TILE_MODE_INDEX (1 << 20) +#define RADEON_SURF_FMASK (1 << 21) + +#define RADEON_SURF_GET(v, field) (((v) >> RADEON_SURF_ ## field ## _SHIFT) & RADEON_SURF_ ## field ## _MASK) +#define RADEON_SURF_SET(v, field) (((v) & RADEON_SURF_ ## field ## _MASK) << RADEON_SURF_ ## field ## _SHIFT) +#define RADEON_SURF_CLR(v, field) ((v) & ~(RADEON_SURF_ ## field ## _MASK << RADEON_SURF_ ## field ## _SHIFT)) + +struct radeon_surf_level { + uint64_t offset; + uint64_t slice_size; + uint32_t npix_x; + uint32_t npix_y; + uint32_t npix_z; + uint32_t nblk_x; + uint32_t nblk_y; + uint32_t nblk_z; + uint32_t pitch_bytes; + uint32_t mode; +}; + +struct radeon_surf { + /* These are inputs to the calculator. */ + uint32_t npix_x; + uint32_t npix_y; + uint32_t npix_z; + uint32_t blk_w; + uint32_t blk_h; + uint32_t blk_d; + uint32_t array_size; + uint32_t last_level; + uint32_t bpe; + uint32_t nsamples; + uint32_t flags; + + /* These are return values. Some of them can be set by the caller, but + * they will be treated as hints (e.g. bankw, bankh) and might be + * changed by the calculator. + */ + uint64_t bo_size; + uint64_t bo_alignment; + /* This applies to EG and later. */ + uint32_t bankw; + uint32_t bankh; + uint32_t mtilea; + uint32_t tile_split; + uint32_t stencil_tile_split; + uint64_t stencil_offset; + struct radeon_surf_level level[RADEON_SURF_MAX_LEVEL]; + struct radeon_surf_level stencil_level[RADEON_SURF_MAX_LEVEL]; + uint32_t tiling_index[RADEON_SURF_MAX_LEVEL]; + uint32_t stencil_tiling_index[RADEON_SURF_MAX_LEVEL]; +}; + struct radeon_winsys { /** * The screen object this winsys was created for @@ -576,7 +649,7 @@ struct radeon_winsys { * \param surf Surface structure ptr */ int (*surface_init)(struct radeon_winsys *ws, - struct radeon_surface *surf); + struct radeon_surf *surf); /** * Find best values for a surface @@ -585,7 +658,7 @@ struct radeon_winsys { * \param surf Surface structure ptr */ int (*surface_best)(struct radeon_winsys *ws, - struct radeon_surface *surf); + struct radeon_surf *surf); uint64_t (*query_value)(struct radeon_winsys *ws, enum radeon_value_id value); diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index dc19d29..7f0fdd5 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -1852,7 +1852,7 @@ static void si_init_depth_surface(struct si_context *sctx, struct si_screen *sscreen = sctx->screen; struct r600_texture *rtex = (struct r600_texture*)surf->base.texture; unsigned level = surf->base.u.tex.level; - struct radeon_surface_level *levelinfo = &rtex->surface.level[level]; + struct radeon_surf_level *levelinfo = &rtex->surface.level[level]; unsigned format, tile_mode_index, array_mode; unsigned macro_aspect, tile_split, stile_split, bankh, bankw, nbanks, pipe_config; uint32_t z_info, s_info, db_depth_info; @@ -2258,7 +2258,7 @@ static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx unsigned char state_swizzle[4], swizzle[4]; unsigned height, depth, width; enum pipe_format pipe_format = state->format; - struct radeon_surface_level *surflevel; + struct radeon_surf_level *surflevel; int first_non_void; uint64_t va; diff --git a/src/gallium/drivers/radeonsi/si_uvd.c b/src/gallium/drivers/radeonsi/si_uvd.c index 0ba3b12..2f10f9b 100644 --- a/src/gallium/drivers/radeonsi/si_uvd.c +++ b/src/gallium/drivers/radeonsi/si_uvd.c @@ -44,7 +44,7 @@ struct pipe_video_buffer *si_video_buffer_create(struct pipe_context *pipe, { struct si_context *ctx = (struct si_context *)pipe; struct r600_texture *resources[VL_NUM_COMPONENTS] = {}; - struct radeon_surface *surfaces[VL_NUM_COMPONENTS] = {}; + struct radeon_surf *surfaces[VL_NUM_COMPONENTS] = {}; struct pb_buffer **pbs[VL_NUM_COMPONENTS] = {}; const enum pipe_format *resource_formats; struct pipe_video_buffer template; @@ -136,7 +136,7 @@ static struct radeon_winsys_cs_handle* si_uvd_set_dtb(struct ruvd_msg *msg, stru /* get the radeon resources for VCE */ static void si_vce_get_buffer(struct pipe_resource *resource, struct radeon_winsys_cs_handle **handle, - struct radeon_surface **surface) + struct radeon_surf **surface) { struct r600_texture *res = (struct r600_texture *)resource; diff --git a/src/gallium/winsys/radeon/drm/Makefile.sources b/src/gallium/winsys/radeon/drm/Makefile.sources index 87a04fa..a00c84d 100644 --- a/src/gallium/winsys/radeon/drm/Makefile.sources +++ b/src/gallium/winsys/radeon/drm/Makefile.sources @@ -5,6 +5,7 @@ C_SOURCES := \ radeon_drm_cs_dump.c \ radeon_drm_cs.h \ radeon_drm_public.h \ + radeon_drm_surface.c \ radeon_drm_winsys.c \ radeon_drm_winsys.h diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_surface.c b/src/gallium/winsys/radeon/drm/radeon_drm_surface.c new file mode 100644 index 0000000..29d3467 --- /dev/null +++ b/src/gallium/winsys/radeon/drm/radeon_drm_surface.c @@ -0,0 +1,180 @@ +/* + * Copyright ? 2014 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * Authors: + * Marek Ol??k + */ + +#include "radeon_drm_winsys.h" + +#include + +static void surf_level_winsys_to_drm(struct radeon_surface_level *level_drm, + const struct radeon_surf_level *level_ws) +{ + level_drm->offset = level_ws->offset; + level_drm->slice_size = level_ws->slice_size; + level_drm->npix_x = level_ws->npix_x; + level_drm->npix_y = level_ws->npix_y; + level_drm->npix_z = level_ws->npix_z; + level_drm->nblk_x = level_ws->nblk_x; + level_drm->nblk_y = level_ws->nblk_y; + level_drm->nblk_z = level_ws->nblk_z; + level_drm->pitch_bytes = level_ws->pitch_bytes; + level_drm->mode = level_ws->mode; +} + +static void surf_level_drm_to_winsys(struct radeon_surf_level *level_ws, + const struct radeon_surface_level *level_drm) +{ + level_ws->offset = level_drm->offset; + level_ws->slice_size = level_drm->slice_size; + level_ws->npix_x = level_drm->npix_x; + level_ws->npix_y = level_drm->npix_y; + level_ws->npix_z = level_drm->npix_z; + level_ws->nblk_x = level_drm->nblk_x; + level_ws->nblk_y = level_drm->nblk_y; + level_ws->nblk_z = level_drm->nblk_z; + level_ws->pitch_bytes = level_drm->pitch_bytes; + level_ws->mode = level_drm->mode; +} + +static void surf_winsys_to_drm(struct radeon_surface *surf_drm, + const struct radeon_surf *surf_ws) +{ + int i; + + memset(surf_drm, 0, sizeof(*surf_drm)); + + surf_drm->npix_x = surf_ws->npix_x; + surf_drm->npix_y = surf_ws->npix_y; + surf_drm->npix_z = surf_ws->npix_z; + surf_drm->blk_w = surf_ws->blk_w; + surf_drm->blk_h = surf_ws->blk_h; + surf_drm->blk_d = surf_ws->blk_d; + surf_drm->array_size = surf_ws->array_size; + surf_drm->last_level = surf_ws->last_level; + surf_drm->bpe = surf_ws->bpe; + surf_drm->nsamples = surf_ws->nsamples; + surf_drm->flags = surf_ws->flags; + + surf_drm->bo_size = surf_ws->bo_size; + surf_drm->bo_alignment = surf_ws->bo_alignment; + + surf_drm->bankw = surf_ws->bankw; + surf_drm->bankh = surf_ws->bankh; + surf_drm->mtilea = surf_ws->mtilea; + surf_drm->tile_split = surf_ws->tile_split; + surf_drm->stencil_tile_split = surf_ws->stencil_tile_split; + surf_drm->stencil_offset = surf_ws->stencil_offset; + + for (i = 0; i < RADEON_SURF_MAX_LEVEL; i++) { + surf_level_winsys_to_drm(&surf_drm->level[i], &surf_ws->level[i]); + surf_level_winsys_to_drm(&surf_drm->stencil_level[i], + &surf_ws->stencil_level[i]); + + surf_drm->tiling_index[i] = surf_ws->tiling_index[i]; + surf_drm->stencil_tiling_index[i] = surf_ws->stencil_tiling_index[i]; + } +} + +static void surf_drm_to_winsys(struct radeon_surf *surf_ws, + const struct radeon_surface *surf_drm) +{ + int i; + + memset(surf_ws, 0, sizeof(*surf_ws)); + + surf_ws->npix_x = surf_drm->npix_x; + surf_ws->npix_y = surf_drm->npix_y; + surf_ws->npix_z = surf_drm->npix_z; + surf_ws->blk_w = surf_drm->blk_w; + surf_ws->blk_h = surf_drm->blk_h; + surf_ws->blk_d = surf_drm->blk_d; + surf_ws->array_size = surf_drm->array_size; + surf_ws->last_level = surf_drm->last_level; + surf_ws->bpe = surf_drm->bpe; + surf_ws->nsamples = surf_drm->nsamples; + surf_ws->flags = surf_drm->flags; + + surf_ws->bo_size = surf_drm->bo_size; + surf_ws->bo_alignment = surf_drm->bo_alignment; + + surf_ws->bankw = surf_drm->bankw; + surf_ws->bankh = surf_drm->bankh; + surf_ws->mtilea = surf_drm->mtilea; + surf_ws->tile_split = surf_drm->tile_split; + surf_ws->stencil_tile_split = surf_drm->stencil_tile_split; + surf_ws->stencil_offset = surf_drm->stencil_offset; + + for (i = 0; i < RADEON_SURF_MAX_LEVEL; i++) { + surf_level_drm_to_winsys(&surf_ws->level[i], &surf_drm->level[i]); + surf_level_drm_to_winsys(&surf_ws->stencil_level[i], + &surf_drm->stencil_level[i]); + + surf_ws->tiling_index[i] = surf_drm->tiling_index[i]; + surf_ws->stencil_tiling_index[i] = surf_drm->stencil_tiling_index[i]; + } +} + +static int radeon_winsys_surface_init(struct radeon_winsys *rws, + struct radeon_surf *surf_ws) +{ + struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws; + struct radeon_surface surf_drm; + int r; + + surf_winsys_to_drm(&surf_drm, surf_ws); + + r = radeon_surface_init(ws->surf_man, &surf_drm); + if (r) + return r; + + surf_drm_to_winsys(surf_ws, &surf_drm); + return 0; +} + +static int radeon_winsys_surface_best(struct radeon_winsys *rws, + struct radeon_surf *surf_ws) +{ + struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws; + struct radeon_surface surf_drm; + int r; + + surf_winsys_to_drm(&surf_drm, surf_ws); + + r = radeon_surface_best(ws->surf_man, &surf_drm); + if (r) + return r; + + surf_drm_to_winsys(surf_ws, &surf_drm); + return 0; +} + +void radeon_surface_init_functions(struct radeon_drm_winsys *ws) +{ + ws->base.surface_init = radeon_winsys_surface_init; + ws->base.surface_best = radeon_winsys_surface_best; +} diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index a6f847f..ba8d143 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -44,6 +44,7 @@ #include #include #include +#include #ifndef RADEON_INFO_ACTIVE_CU_COUNT #define RADEON_INFO_ACTIVE_CU_COUNT 0x20 @@ -514,22 +515,6 @@ static boolean radeon_cs_request_feature(struct radeon_winsys_cs *rcs, return FALSE; } -static int radeon_drm_winsys_surface_init(struct radeon_winsys *rws, - struct radeon_surface *surf) -{ - struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws; - - return radeon_surface_init(ws->surf_man, surf); -} - -static int radeon_drm_winsys_surface_best(struct radeon_winsys *rws, - struct radeon_surface *surf) -{ - struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws; - - return radeon_surface_best(ws->surf_man, surf); -} - static uint64_t radeon_query_value(struct radeon_winsys *rws, enum radeon_value_id value) { @@ -740,13 +725,12 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create) ws->base.destroy = radeon_winsys_destroy; ws->base.query_info = radeon_query_info; ws->base.cs_request_feature = radeon_cs_request_feature; - ws->base.surface_init = radeon_drm_winsys_surface_init; - ws->base.surface_best = radeon_drm_winsys_surface_best; ws->base.query_value = radeon_query_value; ws->base.read_registers = radeon_read_registers; radeon_bomgr_init_functions(ws); radeon_drm_cs_init_functions(ws); + radeon_surface_init_functions(ws); pipe_mutex_init(ws->hyperz_owner_mutex); pipe_mutex_init(ws->cmask_owner_mutex); diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h index ea475c8..166b6b9 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h @@ -103,5 +103,6 @@ radeon_drm_winsys(struct radeon_winsys *base) } void radeon_drm_ws_queue_cs(struct radeon_drm_winsys *ws, struct radeon_drm_cs *cs); +void radeon_surface_init_functions(struct radeon_drm_winsys *ws); #endif From idr at kemper.freedesktop.org Wed Apr 29 20:19:13 2015 From: idr at kemper.freedesktop.org (Ian Romanick) Date: Wed, 29 Apr 2015 13:19:13 -0700 (PDT) Subject: Mesa (master): glx: Massive update of comments in struct extension_info Message-ID: <20150429201913.BB756761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 2c7e289d8b1d3c63ab55b64ab3961067fd5a1985 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=2c7e289d8b1d3c63ab55b64ab3961067fd5a1985 Author: Ian Romanick Date: Wed Apr 15 11:34:50 2015 -0700 glx: Massive update of comments in struct extension_info In response to another patch, Emil asked for some clarification how this stuff works. Rather than just reply to the e-mail, I decided to update the exlanation in the code. Signed-off-by: Ian Romanick Cc: Emil Velikov --- src/glx/glxextensions.c | 69 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 65 insertions(+), 4 deletions(-) diff --git a/src/glx/glxextensions.c b/src/glx/glxextensions.c index a326f0d..cb8cd66 100644 --- a/src/glx/glxextensions.c +++ b/src/glx/glxextensions.c @@ -61,12 +61,73 @@ struct extension_info */ unsigned char version_major; unsigned char version_minor; + + /** + * The client (i.e., libGL) supports this extension. + * + * Except during bring up, all extensions should have this set to Y. There + * are a few cases of extensions that have partial (or speculative) + * support, but these are rare. There also shouldn't be any new ones + * added. + * + * Generally, extensions require server support and ::client_support to be + * enabled. If the display is capable of direct rendering, + * ::direct_support is also required. + * + * \sa ::client_only + */ unsigned char client_support; + + /** + * The direct-renderer (e.g., i965_dri.so) supports this extension. + * + * For cases where all of the infrastructure to support the extension is a + * required part of the loader/driver interface, this can default to Y. + * For most cases, extended functionality, usually in the form of DRI2 + * extensions, is necessary to support the extension. The loader will set + * the flag true if all the requirements are met. + * + * If the display is capable of direct rendering, ::direct_support is + * required for the extension to be enabled. + */ unsigned char direct_support; - unsigned char client_only; /** Is the extension client-side only? */ - unsigned char direct_only; /** Is the extension for direct - * contexts only? - */ + + /** + * The extension depends only on client support. + * + * This is for extensions like GLX_ARB_get_proc_address that are contained + * entirely in the client library. There is no dependency on the server or + * the direct-renderer. + * + * These extensions will be enabled if ::client_support is set. + * + * \note + * An extension \b cannot be both client-only and direct-only because being + * direct-only implies a dependency on the direct renderer. + * + * \sa ::client_support, ::direct_only + */ + unsigned char client_only; + + /** + * The extension only functions with direct-rendering contexts + * + * The extension has no GLX protocol, and, therefore, no explicit + * dependency on the server. The functionality is contained entirely in + * the client library and the direct renderer. A few of the swap-related + * extensions are intended to behave this way. + * + * These extensions will be enabled if both ::client_support and + * ::direct_support are set. + * + * \note + * An extension \b cannot be both client-only and direct-only because being + * client-only implies that all functionality is outside the + * direct-renderer. + * + * \sa ::direct_support, ::client_only + */ + unsigned char direct_only; }; /* *INDENT-OFF* */ From tpohjola at kemper.freedesktop.org Wed Apr 29 21:55:00 2015 From: tpohjola at kemper.freedesktop.org (Topi Pohjolainen) Date: Wed, 29 Apr 2015 14:55:00 -0700 (PDT) Subject: Mesa (master): i965: Expose and refactor brw_update_renderbuffer_surfaces( ) Message-ID: <20150429215500.79300761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: c15e20d8f6f6d632ad55d444149c2a12d0dcc515 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c15e20d8f6f6d632ad55d444149c2a12d0dcc515 Author: Topi Pohjolainen Date: Thu Mar 19 11:09:54 2015 +0200 i965: Expose and refactor brw_update_renderbuffer_surfaces() Note that brw_update_renderbuffer_surfaces() already had a helper variable which was used in parallel to direct access of the current draw buffer of the context. Reviewed-by: Kenneth Graunke Reviewed-by: Matt Turner Signed-off-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_state.h | 5 +++ src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 51 +++++++++++++--------- 2 files changed, 35 insertions(+), 21 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index cfa67b6..83058b9 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -233,6 +233,11 @@ GLuint translate_tex_format(struct brw_context *brw, int brw_get_texture_swizzle(const struct gl_context *ctx, const struct gl_texture_object *t); +void brw_update_renderbuffer_surfaces(struct brw_context *brw, + const struct gl_framebuffer *fb, + uint32_t render_target_start, + uint32_t *surf_offset); + /* gen7_wm_surface_state.c */ uint32_t gen7_surface_tiling_mode(uint32_t tiling); uint32_t gen7_surface_msaa_bits(unsigned num_samples, enum intel_msaa_layout l); diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index d451940..25fb543 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -731,40 +731,49 @@ brw_update_renderbuffer_surface(struct brw_context *brw, /** * Construct SURFACE_STATE objects for renderbuffers/draw buffers. */ -static void -brw_update_renderbuffer_surfaces(struct brw_context *brw) +void +brw_update_renderbuffer_surfaces(struct brw_context *brw, + const struct gl_framebuffer *fb, + uint32_t render_target_start, + uint32_t *surf_offset) { - struct gl_context *ctx = &brw->ctx; - /* _NEW_BUFFERS */ - const struct gl_framebuffer *fb = ctx->DrawBuffer; GLuint i; - /* _NEW_BUFFERS | _NEW_COLOR */ /* Update surfaces for drawing buffers */ - if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) { - for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { - const uint32_t surf_index = - brw->wm.prog_data->binding_table.render_target_start + i; + if (fb->_NumColorDrawBuffers >= 1) { + for (i = 0; i < fb->_NumColorDrawBuffers; i++) { + const uint32_t surf_index = render_target_start + i; - if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) { - brw->wm.base.surf_offset[surf_index] = + if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) { + surf_offset[surf_index] = brw->vtbl.update_renderbuffer_surface( - brw, ctx->DrawBuffer->_ColorDrawBuffers[i], - ctx->DrawBuffer->MaxNumLayers > 0, i, surf_index); + brw, fb->_ColorDrawBuffers[i], + fb->MaxNumLayers > 0, i, surf_index); } else { brw->vtbl.emit_null_surface_state( brw, fb->Width, fb->Height, fb->Visual.samples, - &brw->wm.base.surf_offset[surf_index]); + &surf_offset[surf_index]); } } } else { - const uint32_t surf_index = - brw->wm.prog_data->binding_table.render_target_start; - + const uint32_t surf_index = render_target_start; brw->vtbl.emit_null_surface_state( brw, fb->Width, fb->Height, fb->Visual.samples, - &brw->wm.base.surf_offset[surf_index]); + &surf_offset[surf_index]); } +} + +static void +update_renderbuffer_surfaces(struct brw_context *brw) +{ + const struct gl_context *ctx = &brw->ctx; + + /* _NEW_BUFFERS | _NEW_COLOR */ + const struct gl_framebuffer *fb = ctx->DrawBuffer; + brw_update_renderbuffer_surfaces( + brw, fb, + brw->wm.prog_data->binding_table.render_target_start, + brw->wm.base.surf_offset); brw->ctx.NewDriverState |= BRW_NEW_SURFACES; } @@ -775,7 +784,7 @@ const struct brw_tracked_state brw_renderbuffer_surfaces = { .brw = BRW_NEW_BATCH | BRW_NEW_FS_PROG_DATA, }, - .emit = brw_update_renderbuffer_surfaces, + .emit = update_renderbuffer_surfaces, }; const struct brw_tracked_state gen6_renderbuffer_surfaces = { @@ -783,7 +792,7 @@ const struct brw_tracked_state gen6_renderbuffer_surfaces = { .mesa = _NEW_BUFFERS, .brw = BRW_NEW_BATCH, }, - .emit = brw_update_renderbuffer_surfaces, + .emit = update_renderbuffer_surfaces, }; From tpohjola at kemper.freedesktop.org Wed Apr 29 21:55:00 2015 From: tpohjola at kemper.freedesktop.org (Topi Pohjolainen) Date: Wed, 29 Apr 2015 14:55:00 -0700 (PDT) Subject: Mesa (master): i965: Refactor sampler state setup Message-ID: <20150429215500.9C33A761EA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 02dbc79297203a063b91e6b5a0b81bda8aa48c19 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=02dbc79297203a063b91e6b5a0b81bda8aa48c19 Author: Topi Pohjolainen Date: Thu Apr 2 12:45:02 2015 +0300 i965: Refactor sampler state setup v2 (Matt): Moved * to the name. Reviewed-by: Kenneth Graunke Reviewed-by: Matt Turner Signed-off-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_sampler_state.c | 60 ++++++++++++++++--------- src/mesa/drivers/dri/i965/brw_state.h | 9 ++++ 2 files changed, 47 insertions(+), 22 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_sampler_state.c b/src/mesa/drivers/dri/i965/brw_sampler_state.c index c78e2e3..22ccbfe 100644 --- a/src/mesa/drivers/dri/i965/brw_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_sampler_state.c @@ -375,21 +375,16 @@ upload_default_color(struct brw_context *brw, * Sets the sampler state for a single unit based off of the sampler key * entry. */ -static void +void brw_update_sampler_state(struct brw_context *brw, - int unit, + GLenum target, bool tex_cube_map_seamless, + GLfloat tex_unit_lod_bias, + mesa_format format, GLenum base_format, + bool is_integer_format, + const struct gl_sampler_object *sampler, uint32_t *sampler_state, uint32_t batch_offset_for_sampler_state) { - struct gl_context *ctx = &brw->ctx; - const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - const struct gl_texture_object *texObj = texUnit->_Current; - const struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit); - - /* These don't use samplers at all. */ - if (texObj->Target == GL_TEXTURE_BUFFER) - return; - unsigned min_filter, mag_filter, mip_filter; /* Select min and mip filters. */ @@ -459,12 +454,12 @@ brw_update_sampler_state(struct brw_context *brw, unsigned wrap_t = translate_wrap_mode(brw, sampler->WrapT, either_nearest); unsigned wrap_r = translate_wrap_mode(brw, sampler->WrapR, either_nearest); - if (texObj->Target == GL_TEXTURE_CUBE_MAP || - texObj->Target == GL_TEXTURE_CUBE_MAP_ARRAY) { + if (target == GL_TEXTURE_CUBE_MAP || + target == GL_TEXTURE_CUBE_MAP_ARRAY) { /* Cube maps must use the same wrap mode for all three coordinate * dimensions. Prior to Haswell, only CUBE and CLAMP are valid. */ - if ((ctx->Texture.CubeMapSeamless || sampler->CubeMapSeamless) && + if ((tex_cube_map_seamless || sampler->CubeMapSeamless) && (sampler->MinFilter != GL_NEAREST || sampler->MagFilter != GL_NEAREST)) { wrap_s = BRW_TEXCOORDMODE_CUBE; @@ -475,7 +470,7 @@ brw_update_sampler_state(struct brw_context *brw, wrap_t = BRW_TEXCOORDMODE_CLAMP; wrap_r = BRW_TEXCOORDMODE_CLAMP; } - } else if (texObj->Target == GL_TEXTURE_1D) { + } else if (target == GL_TEXTURE_1D) { /* There's a bug in 1D texture sampling - it actually pays * attention to the wrap_t value, though it should not. * Override the wrap_t value here to GL_REPEAT to keep @@ -495,7 +490,7 @@ brw_update_sampler_state(struct brw_context *brw, const unsigned min_lod = U_FIXED(CLAMP(sampler->MinLod, 0, 13), lod_bits); const unsigned max_lod = U_FIXED(CLAMP(sampler->MaxLod, 0, 13), lod_bits); const int lod_bias = - S_FIXED(CLAMP(texUnit->LodBias + sampler->LodBias, -16, 15), lod_bits); + S_FIXED(CLAMP(tex_unit_lod_bias + sampler->LodBias, -16, 15), lod_bits); const unsigned base_level = U_FIXED(0, 1); /* Upload the border color if necessary. If not, just point it at @@ -506,14 +501,12 @@ brw_update_sampler_state(struct brw_context *brw, if (wrap_mode_needs_border_color(wrap_s) || wrap_mode_needs_border_color(wrap_t) || wrap_mode_needs_border_color(wrap_r)) { - const struct gl_texture_image *first_image = - texObj->Image[0][texObj->BaseLevel]; upload_default_color(brw, sampler, - first_image->TexFormat, first_image->_BaseFormat, - texObj->_IsIntegerFormat, &border_color_offset); + format, base_format, is_integer_format, + &border_color_offset); } - const bool non_normalized_coords = texObj->Target == GL_TEXTURE_RECTANGLE; + const bool non_normalized_coords = target == GL_TEXTURE_RECTANGLE; brw_emit_sampler_state(brw, sampler_state, @@ -528,6 +521,29 @@ brw_update_sampler_state(struct brw_context *brw, border_color_offset); } +static void +update_sampler_state(struct brw_context *brw, + int unit, + uint32_t *sampler_state, + uint32_t batch_offset_for_sampler_state) +{ + struct gl_context *ctx = &brw->ctx; + const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + const struct gl_texture_object *texObj = texUnit->_Current; + const struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit); + + /* These don't use samplers at all. */ + if (texObj->Target == GL_TEXTURE_BUFFER) + return; + + struct gl_texture_image *firstImage = texObj->Image[0][texObj->BaseLevel]; + brw_update_sampler_state(brw, texObj->Target, ctx->Texture.CubeMapSeamless, + texUnit->LodBias, + firstImage->TexFormat, firstImage->_BaseFormat, + texObj->_IsIntegerFormat, + sampler, + sampler_state, batch_offset_for_sampler_state); +} static void brw_upload_sampler_state_table(struct brw_context *brw, @@ -557,7 +573,7 @@ brw_upload_sampler_state_table(struct brw_context *brw, if (SamplersUsed & (1 << s)) { const unsigned unit = prog->SamplerUnits[s]; if (ctx->Texture.Unit[unit]._Current) { - brw_update_sampler_state(brw, unit, sampler_state, + update_sampler_state(brw, unit, sampler_state, batch_offset_for_sampler_state); } } diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 8798369..ab067c3 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -283,6 +283,15 @@ void brw_emit_sampler_state(struct brw_context *brw, bool non_normalized_coordinates, uint32_t border_color_offset); +void brw_update_sampler_state(struct brw_context *brw, + GLenum target, bool tex_cube_map_seamless, + GLfloat tex_unit_lod_bias, + mesa_format format, GLenum base_format, + bool is_integer_format, + const struct gl_sampler_object *sampler, + uint32_t *sampler_state, + uint32_t batch_offset_for_sampler_state); + /* gen6_sf_state.c */ void calculate_attr_overrides(const struct brw_context *brw, From tpohjola at kemper.freedesktop.org Wed Apr 29 21:55:00 2015 From: tpohjola at kemper.freedesktop.org (Topi Pohjolainen) Date: Wed, 29 Apr 2015 14:55:00 -0700 (PDT) Subject: Mesa (master): i965/blorp: Remove constant parameter Message-ID: <20150429215500.C7E6F761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: d271a13ba31168e0de75d7d4c1d4d7a2e2fb136c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d271a13ba31168e0de75d7d4c1d4d7a2e2fb136c Author: Topi Pohjolainen Date: Sat Mar 14 10:21:33 2015 +0200 i965/blorp: Remove constant parameter This was still needed when we had support for blorp clears but now this is fixed to nop. Reviewed-by: Kenneth Graunke Signed-off-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_blorp.cpp | 1 - src/mesa/drivers/dri/i965/brw_blorp.h | 8 -------- src/mesa/drivers/dri/i965/gen7_blorp.cpp | 11 ----------- 3 files changed, 20 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp.cpp b/src/mesa/drivers/dri/i965/brw_blorp.cpp index 131e155..b0de55d 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp @@ -162,7 +162,6 @@ brw_blorp_params::brw_blorp_params() y1(0), depth_format(0), hiz_op(GEN6_HIZ_OP_NONE), - fast_clear_op(GEN7_FAST_CLEAR_OP_NONE), use_wm_prog(false) { color_write_disable[0] = false; diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h index ff68000..59aecab 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.h +++ b/src/mesa/drivers/dri/i965/brw_blorp.h @@ -208,13 +208,6 @@ struct brw_blorp_prog_data }; -enum gen7_fast_clear_op { - GEN7_FAST_CLEAR_OP_NONE, - GEN7_FAST_CLEAR_OP_FAST_CLEAR, - GEN7_FAST_CLEAR_OP_RESOLVE, -}; - - class brw_blorp_params { public: @@ -232,7 +225,6 @@ public: brw_blorp_surface_info src; brw_blorp_surface_info dst; enum gen6_hiz_op hiz_op; - enum gen7_fast_clear_op fast_clear_op; bool use_wm_prog; brw_blorp_wm_push_constants wm_push_consts; bool color_write_disable[4]; diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp index fb6a0dd..8215fe9 100644 --- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp @@ -529,17 +529,6 @@ gen7_blorp_emit_ps_config(struct brw_context *brw, dw5 |= prog_data->first_curbe_grf << GEN7_PS_DISPATCH_START_GRF_SHIFT_0; } - switch (params->fast_clear_op) { - case GEN7_FAST_CLEAR_OP_FAST_CLEAR: - dw4 |= GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE; - break; - case GEN7_FAST_CLEAR_OP_RESOLVE: - dw4 |= GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE; - break; - default: - break; - } - BEGIN_BATCH(8); OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2)); OUT_BATCH(params->use_wm_prog ? prog_offset : 0); From tpohjola at kemper.freedesktop.org Wed Apr 29 21:55:00 2015 From: tpohjola at kemper.freedesktop.org (Topi Pohjolainen) Date: Wed, 29 Apr 2015 14:55:00 -0700 (PDT) Subject: Mesa (master): i965/ps: Use SET_FIELD() for sampler count Message-ID: <20150429215500.4EB0D761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: f39846fb57c2b4d29b65a40019ba55219b062117 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f39846fb57c2b4d29b65a40019ba55219b062117 Author: Topi Pohjolainen Date: Wed Apr 29 20:35:45 2015 +0300 i965/ps: Use SET_FIELD() for sampler count The value is actually clamped to 0-16 as sample state pointer can be used to support more than 16 samplers. Reviewed-by: Kenneth Graunke Signed-off-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_defines.h | 1 + src/mesa/drivers/dri/i965/gen7_wm_state.c | 5 +++-- src/mesa/drivers/dri/i965/gen8_ps_state.c | 5 +++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index bd3218a..7d55d85 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -2259,6 +2259,7 @@ enum brw_wm_barycentric_interp_mode { # define GEN7_PS_SPF_MODE (1 << 31) # define GEN7_PS_VECTOR_MASK_ENABLE (1 << 30) # define GEN7_PS_SAMPLER_COUNT_SHIFT 27 +# define GEN7_PS_SAMPLER_COUNT_MASK INTEL_MASK(29, 27) # define GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 # define GEN7_PS_FLOATING_POINT_MODE_IEEE_754 (0 << 16) # define GEN7_PS_FLOATING_POINT_MODE_ALT (1 << 16) diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index 923414e..55a1acd 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -135,8 +135,9 @@ upload_ps_state(struct brw_context *brw) dw2 = dw4 = dw5 = ksp2 = 0; - dw2 |= - (ALIGN(brw->wm.base.sampler_count, 4) / 4) << GEN7_PS_SAMPLER_COUNT_SHIFT; + const unsigned sampler_count = + DIV_ROUND_UP(CLAMP(brw->wm.base.sampler_count, 0, 16), 4); + dw2 |= SET_FIELD(sampler_count, GEN7_PS_SAMPLER_COUNT); dw2 |= ((prog_data->base.binding_table.size_bytes / 4) << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT); diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c index 5f39e12..8481153 100644 --- a/src/mesa/drivers/dri/i965/gen8_ps_state.c +++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c @@ -133,8 +133,9 @@ upload_ps_state(struct brw_context *brw) */ dw3 |= GEN7_PS_VECTOR_MASK_ENABLE; - dw3 |= - (ALIGN(brw->wm.base.sampler_count, 4) / 4) << GEN7_PS_SAMPLER_COUNT_SHIFT; + const unsigned sampler_count = + DIV_ROUND_UP(CLAMP(brw->wm.base.sampler_count, 0, 16), 4); + dw3 |= SET_FIELD(sampler_count, GEN7_PS_SAMPLER_COUNT); /* BRW_NEW_FS_PROG_DATA */ dw3 |= From tpohjola at kemper.freedesktop.org Wed Apr 29 21:55:00 2015 From: tpohjola at kemper.freedesktop.org (Topi Pohjolainen) Date: Wed, 29 Apr 2015 14:55:00 -0700 (PDT) Subject: Mesa (master): i965: Remove dependency to tex object in default color setup Message-ID: <20150429215500.912A4761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 47f32cb50d19145ed502e1fccd949d931c0cd392 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=47f32cb50d19145ed502e1fccd949d931c0cd392 Author: Topi Pohjolainen Date: Sat Apr 4 20:28:45 2015 +0300 i965: Remove dependency to tex object in default color setup Reviewed-by: Kenneth Graunke Reviewed-by: Matt Turner Signed-off-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_sampler_state.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_sampler_state.c b/src/mesa/drivers/dri/i965/brw_sampler_state.c index c4bd949..c78e2e3 100644 --- a/src/mesa/drivers/dri/i965/brw_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_sampler_state.c @@ -201,16 +201,13 @@ wrap_mode_needs_border_color(unsigned wrap_mode) static void upload_default_color(struct brw_context *brw, const struct gl_sampler_object *sampler, - int unit, + mesa_format format, GLenum base_format, + bool is_integer_format, uint32_t *sdc_offset) { - struct gl_context *ctx = &brw->ctx; - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - struct gl_texture_object *texObj = texUnit->_Current; - struct gl_texture_image *firstImage = texObj->Image[0][texObj->BaseLevel]; union gl_color_union color; - switch (firstImage->_BaseFormat) { + switch (base_format) { case GL_DEPTH_COMPONENT: /* GL specs that border color for depth textures is taken from the * R channel, while the hardware uses A. Spam R into all the @@ -257,7 +254,7 @@ upload_default_color(struct brw_context *brw, * where we've initialized the A channel to 1.0. We also have to set * the border color alpha to 1.0 in that case. */ - if (firstImage->_BaseFormat == GL_RGB) + if (base_format == GL_RGB) color.ui[3] = float_as_int(1.0); if (brw->gen >= 8) { @@ -269,7 +266,7 @@ upload_default_color(struct brw_context *brw, uint32_t *sdc = brw_state_batch(brw, AUB_TRACE_SAMPLER_DEFAULT_COLOR, 4 * 4, 64, sdc_offset); memcpy(sdc, color.ui, 4 * 4); - } else if (brw->is_haswell && texObj->_IsIntegerFormat) { + } else if (brw->is_haswell && is_integer_format) { /* Haswell's integer border color support is completely insane: * SAMPLER_BORDER_COLOR_STATE is 20 DWords. The first four are * for float colors. The next 12 DWords are MBZ and only exist to @@ -283,7 +280,6 @@ upload_default_color(struct brw_context *brw, memset(sdc, 0, 20 * 4); sdc = &sdc[16]; - mesa_format format = firstImage->TexFormat; int bits_per_channel = _mesa_get_format_bits(format, GL_RED_BITS); /* From the Haswell PRM, "Command Reference: Structures", Page 36: @@ -314,7 +310,7 @@ upload_default_color(struct brw_context *brw, ((uint16_t *) sdc)[5] = c[3]; /* A -> DWord 3, bits 31:16 */ break; case 32: - if (firstImage->_BaseFormat == GL_RG) { + if (base_format == GL_RG) { /* Careful inspection of the tables reveals that for RG32 formats, * the green channel needs to go where blue normally belongs. */ @@ -510,7 +506,11 @@ brw_update_sampler_state(struct brw_context *brw, if (wrap_mode_needs_border_color(wrap_s) || wrap_mode_needs_border_color(wrap_t) || wrap_mode_needs_border_color(wrap_r)) { - upload_default_color(brw, sampler, unit, &border_color_offset); + const struct gl_texture_image *first_image = + texObj->Image[0][texObj->BaseLevel]; + upload_default_color(brw, sampler, + first_image->TexFormat, first_image->_BaseFormat, + texObj->_IsIntegerFormat, &border_color_offset); } const bool non_normalized_coords = texObj->Target == GL_TEXTURE_RECTANGLE; From tpohjola at kemper.freedesktop.org Wed Apr 29 21:55:00 2015 From: tpohjola at kemper.freedesktop.org (Topi Pohjolainen) Date: Wed, 29 Apr 2015 14:55:00 -0700 (PDT) Subject: Mesa (master): i965/ps/gen8: Refactor state uploading Message-ID: <20150429215500.B3135761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: fea168f49584333aeeabad2d2b0dc6aaee86f881 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=fea168f49584333aeeabad2d2b0dc6aaee86f881 Author: Topi Pohjolainen Date: Mon Mar 2 12:31:17 2015 +0200 i965/ps/gen8: Refactor state uploading v2: Use SET_FIELD() for sampler count, and for that reason added GEN7_PS_SAMPLER_COUNT_MASK. Reviewed-by: Kenneth Graunke Reviewed-by: Matt Turner Signed-off-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_state.h | 12 +++++ src/mesa/drivers/dri/i965/gen8_ps_state.c | 72 ++++++++++++++++++----------- 2 files changed, 58 insertions(+), 26 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index f8d56b9..13d541b 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -265,6 +265,18 @@ void gen7_set_surface_mcs_info(struct brw_context *brw, void gen7_check_surface_setup(uint32_t *surf, bool is_render_target); void gen7_init_vtable_surface_functions(struct brw_context *brw); +/* gen8_ps_state.c */ +void gen8_upload_ps_state(struct brw_context *brw, + const struct gl_fragment_program *fp, + const struct brw_stage_state *stage_state, + const struct brw_wm_prog_data *prog_data, + uint32_t fast_clear_op); + +void gen8_upload_ps_extra(struct brw_context *brw, + const struct gl_fragment_program *fp, + const struct brw_wm_prog_data *prog_data, + bool multisampled_fbo); + /* gen7_sol_state.c */ void gen7_upload_3dstate_so_decl_list(struct brw_context *brw, const struct brw_vue_map *vue_map); diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c index 8481153..85ad3b6 100644 --- a/src/mesa/drivers/dri/i965/gen8_ps_state.c +++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c @@ -27,15 +27,13 @@ #include "brw_defines.h" #include "intel_batchbuffer.h" -static void -upload_ps_extra(struct brw_context *brw) +void +gen8_upload_ps_extra(struct brw_context *brw, + const struct gl_fragment_program *fp, + const struct brw_wm_prog_data *prog_data, + bool multisampled_fbo) { struct gl_context *ctx = &brw->ctx; - /* BRW_NEW_FRAGMENT_PROGRAM */ - const struct brw_fragment_program *fp = - brw_fragment_program_const(brw->fragment_program); - /* BRW_NEW_FS_PROG_DATA */ - const struct brw_wm_prog_data *prog_data = brw->wm.prog_data; uint32_t dw1 = 0; dw1 |= GEN8_PSX_PIXEL_SHADER_VALID; @@ -47,16 +45,14 @@ upload_ps_extra(struct brw_context *brw) if (prog_data->num_varying_inputs != 0) dw1 |= GEN8_PSX_ATTRIBUTE_ENABLE; - if (fp->program.Base.InputsRead & VARYING_BIT_POS) + if (fp->Base.InputsRead & VARYING_BIT_POS) dw1 |= GEN8_PSX_USES_SOURCE_DEPTH | GEN8_PSX_USES_SOURCE_W; - /* BRW_NEW_NUM_SAMPLES | _NEW_MULTISAMPLE */ - bool multisampled_fbo = brw->num_samples > 1; if (multisampled_fbo && - _mesa_get_min_invocations_per_fragment(ctx, &fp->program, false) > 1) + _mesa_get_min_invocations_per_fragment(ctx, fp, false) > 1) dw1 |= GEN8_PSX_SHADER_IS_PER_SAMPLE; - if (fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_MASK_IN) + if (fp->Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_MASK_IN) dw1 |= GEN8_PSX_SHADER_USES_INPUT_COVERAGE_MASK; if (prog_data->uses_omask) @@ -68,6 +64,20 @@ upload_ps_extra(struct brw_context *brw) ADVANCE_BATCH(); } +static void +upload_ps_extra(struct brw_context *brw) +{ + /* BRW_NEW_FRAGMENT_PROGRAM */ + const struct brw_fragment_program *fp = + brw_fragment_program_const(brw->fragment_program); + /* BRW_NEW_FS_PROG_DATA */ + const struct brw_wm_prog_data *prog_data = brw->wm.prog_data; + /* BRW_NEW_NUM_SAMPLES | _NEW_MULTISAMPLE */ + const bool multisampled_fbo = brw->num_samples > 1; + + gen8_upload_ps_extra(brw, &fp->program, prog_data, multisampled_fbo); +} + const struct brw_tracked_state gen8_ps_extra = { .dirty = { .mesa = _NEW_MULTISAMPLE, @@ -118,15 +128,16 @@ const struct brw_tracked_state gen8_wm_state = { .emit = upload_wm_state, }; -static void -upload_ps_state(struct brw_context *brw) +void +gen8_upload_ps_state(struct brw_context *brw, + const struct gl_fragment_program *fp, + const struct brw_stage_state *stage_state, + const struct brw_wm_prog_data *prog_data, + uint32_t fast_clear_op) { struct gl_context *ctx = &brw->ctx; uint32_t dw3 = 0, dw6 = 0, dw7 = 0, ksp0, ksp2 = 0; - /* BRW_NEW_FS_PROG_DATA */ - const struct brw_wm_prog_data *prog_data = brw->wm.prog_data; - /* Initialize the execution mask with VMask. Otherwise, derivatives are * incorrect for subspans where some of the pixels are unlit. We believe * the bit just didn't take effect in previous generations. @@ -134,7 +145,7 @@ upload_ps_state(struct brw_context *brw) dw3 |= GEN7_PS_VECTOR_MASK_ENABLE; const unsigned sampler_count = - DIV_ROUND_UP(CLAMP(brw->wm.base.sampler_count, 0, 16), 4); + DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4); dw3 |= SET_FIELD(sampler_count, GEN7_PS_SAMPLER_COUNT); /* BRW_NEW_FS_PROG_DATA */ @@ -171,12 +182,12 @@ upload_ps_state(struct brw_context *brw) * We only require XY sample offsets. So, this recommendation doesn't * look useful at the moment. We might need this in future. */ - if (brw->wm.prog_data->uses_pos_offset) + if (prog_data->uses_pos_offset) dw6 |= GEN7_PS_POSOFFSET_SAMPLE; else dw6 |= GEN7_PS_POSOFFSET_NONE; - dw6 |= brw->wm.fast_clear_op; + dw6 |= fast_clear_op; /* _NEW_MULTISAMPLE * In case of non 1x per sample shading, only one of SIMD8 and SIMD16 @@ -185,7 +196,7 @@ upload_ps_state(struct brw_context *brw) * better performance than 'SIMD8 only' dispatch. */ int min_invocations_per_fragment = - _mesa_get_min_invocations_per_fragment(ctx, brw->fragment_program, false); + _mesa_get_min_invocations_per_fragment(ctx, fp, false); assert(min_invocations_per_fragment >= 1); if (prog_data->prog_offset_16 || prog_data->no_8) { @@ -196,19 +207,19 @@ upload_ps_state(struct brw_context *brw) GEN7_PS_DISPATCH_START_GRF_SHIFT_0); dw7 |= (prog_data->dispatch_grf_start_reg_16 << GEN7_PS_DISPATCH_START_GRF_SHIFT_2); - ksp0 = brw->wm.base.prog_offset; - ksp2 = brw->wm.base.prog_offset + prog_data->prog_offset_16; + ksp0 = stage_state->prog_offset; + ksp2 = stage_state->prog_offset + prog_data->prog_offset_16; } else { dw7 |= (prog_data->dispatch_grf_start_reg_16 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0); - ksp0 = brw->wm.base.prog_offset + prog_data->prog_offset_16; + ksp0 = stage_state->prog_offset + prog_data->prog_offset_16; } } else { dw6 |= GEN7_PS_8_DISPATCH_ENABLE; dw7 |= (prog_data->base.dispatch_grf_start_reg << GEN7_PS_DISPATCH_START_GRF_SHIFT_0); - ksp0 = brw->wm.base.prog_offset; + ksp0 = stage_state->prog_offset; } BEGIN_BATCH(12); @@ -217,7 +228,7 @@ upload_ps_state(struct brw_context *brw) OUT_BATCH(0); OUT_BATCH(dw3); if (prog_data->base.total_scratch) { - OUT_RELOC64(brw->wm.base.scratch_bo, + OUT_RELOC64(stage_state->scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, ffs(prog_data->base.total_scratch) - 11); } else { @@ -233,6 +244,15 @@ upload_ps_state(struct brw_context *brw) ADVANCE_BATCH(); } +static void +upload_ps_state(struct brw_context *brw) +{ + /* BRW_NEW_FS_PROG_DATA */ + const struct brw_wm_prog_data *prog_data = brw->wm.prog_data; + gen8_upload_ps_state(brw, brw->fragment_program, &brw->wm.base, prog_data, + brw->wm.fast_clear_op); +} + const struct brw_tracked_state gen8_ps_state = { .dirty = { .mesa = _NEW_MULTISAMPLE, From tpohjola at kemper.freedesktop.org Wed Apr 29 21:55:00 2015 From: tpohjola at kemper.freedesktop.org (Topi Pohjolainen) Date: Wed, 29 Apr 2015 14:55:00 -0700 (PDT) Subject: Mesa (master): i965/gen7/blorp: Remove unused arguments Message-ID: <20150429215500.E913E761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: dce1972945a4568c181011880e0336a2a14909ec URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=dce1972945a4568c181011880e0336a2a14909ec Author: Topi Pohjolainen Date: Fri Jan 30 11:30:34 2015 +0200 i965/gen7/blorp: Remove unused arguments Reviewed-by: Kenneth Graunke Signed-off-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/gen7_blorp.cpp | 75 +++++++++++------------------- 1 file changed, 28 insertions(+), 47 deletions(-) diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp index d841346..c9e7cb7 100644 --- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp @@ -48,8 +48,7 @@ * valid. */ static void -gen7_blorp_emit_urb_config(struct brw_context *brw, - const brw_blorp_params *params) +gen7_blorp_emit_urb_config(struct brw_context *brw) { unsigned urb_size = (brw->is_haswell && brw->gt == 3) ? 32 : 16; gen7_emit_push_constant_state(brw, @@ -73,7 +72,6 @@ gen7_blorp_emit_urb_config(struct brw_context *brw, /* 3DSTATE_BLEND_STATE_POINTERS */ static void gen7_blorp_emit_blend_state_pointer(struct brw_context *brw, - const brw_blorp_params *params, uint32_t cc_blend_state_offset) { BEGIN_BATCH(2); @@ -86,7 +84,6 @@ gen7_blorp_emit_blend_state_pointer(struct brw_context *brw, /* 3DSTATE_CC_STATE_POINTERS */ static void gen7_blorp_emit_cc_state_pointer(struct brw_context *brw, - const brw_blorp_params *params, uint32_t cc_state_offset) { BEGIN_BATCH(2); @@ -96,8 +93,7 @@ gen7_blorp_emit_cc_state_pointer(struct brw_context *brw, } static void -gen7_blorp_emit_cc_viewport(struct brw_context *brw, - const brw_blorp_params *params) +gen7_blorp_emit_cc_viewport(struct brw_context *brw) { struct brw_cc_viewport *ccv; uint32_t cc_vp_offset; @@ -121,7 +117,6 @@ gen7_blorp_emit_cc_viewport(struct brw_context *brw, */ static void gen7_blorp_emit_depth_stencil_state_pointers(struct brw_context *brw, - const brw_blorp_params *params, uint32_t depthstencil_offset) { BEGIN_BATCH(2); @@ -136,7 +131,6 @@ gen7_blorp_emit_depth_stencil_state_pointers(struct brw_context *brw, */ static uint32_t gen7_blorp_emit_surface_state(struct brw_context *brw, - const brw_blorp_params *params, const brw_blorp_surface_info *surface, uint32_t read_domains, uint32_t write_domain, bool is_render_target) @@ -228,8 +222,7 @@ gen7_blorp_emit_surface_state(struct brw_context *brw, * Disable vertex shader. */ static void -gen7_blorp_emit_vs_disable(struct brw_context *brw, - const brw_blorp_params *params) +gen7_blorp_emit_vs_disable(struct brw_context *brw) { BEGIN_BATCH(7); OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (7 - 2)); @@ -257,8 +250,7 @@ gen7_blorp_emit_vs_disable(struct brw_context *brw, * Disable the hull shader. */ static void -gen7_blorp_emit_hs_disable(struct brw_context *brw, - const brw_blorp_params *params) +gen7_blorp_emit_hs_disable(struct brw_context *brw) { BEGIN_BATCH(7); OUT_BATCH(_3DSTATE_CONSTANT_HS << 16 | (7 - 2)); @@ -287,8 +279,7 @@ gen7_blorp_emit_hs_disable(struct brw_context *brw, * Disable the tesselation engine. */ static void -gen7_blorp_emit_te_disable(struct brw_context *brw, - const brw_blorp_params *params) +gen7_blorp_emit_te_disable(struct brw_context *brw) { BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_TE << 16 | (4 - 2)); @@ -304,8 +295,7 @@ gen7_blorp_emit_te_disable(struct brw_context *brw, * Disable the domain shader. */ static void -gen7_blorp_emit_ds_disable(struct brw_context *brw, - const brw_blorp_params *params) +gen7_blorp_emit_ds_disable(struct brw_context *brw) { BEGIN_BATCH(7); OUT_BATCH(_3DSTATE_CONSTANT_DS << 16 | (7 - 2)); @@ -332,8 +322,7 @@ gen7_blorp_emit_ds_disable(struct brw_context *brw, * Disable the geometry shader. */ static void -gen7_blorp_emit_gs_disable(struct brw_context *brw, - const brw_blorp_params *params) +gen7_blorp_emit_gs_disable(struct brw_context *brw) { BEGIN_BATCH(7); OUT_BATCH(_3DSTATE_CONSTANT_GS << 16 | (7 - 2)); @@ -377,8 +366,7 @@ gen7_blorp_emit_gs_disable(struct brw_context *brw, * Disable streamout. */ static void -gen7_blorp_emit_streamout_disable(struct brw_context *brw, - const brw_blorp_params *params) +gen7_blorp_emit_streamout_disable(struct brw_context *brw) { BEGIN_BATCH(3); OUT_BATCH(_3DSTATE_STREAMOUT << 16 | (3 - 2)); @@ -544,7 +532,6 @@ gen7_blorp_emit_ps_config(struct brw_context *brw, static void gen7_blorp_emit_binding_table_pointers_ps(struct brw_context *brw, - const brw_blorp_params *params, uint32_t wm_bind_bo_offset) { BEGIN_BATCH(2); @@ -556,7 +543,6 @@ gen7_blorp_emit_binding_table_pointers_ps(struct brw_context *brw, static void gen7_blorp_emit_sampler_state_pointers_ps(struct brw_context *brw, - const brw_blorp_params *params, uint32_t sampler_offset) { BEGIN_BATCH(2); @@ -568,7 +554,6 @@ gen7_blorp_emit_sampler_state_pointers_ps(struct brw_context *brw, static void gen7_blorp_emit_constant_ps(struct brw_context *brw, - const brw_blorp_params *params, uint32_t wm_push_const_offset) { const uint8_t mocs = GEN7_MOCS_L3; @@ -595,8 +580,7 @@ gen7_blorp_emit_constant_ps(struct brw_context *brw, } static void -gen7_blorp_emit_constant_ps_disable(struct brw_context *brw, - const brw_blorp_params *params) +gen7_blorp_emit_constant_ps_disable(struct brw_context *brw) { BEGIN_BATCH(7); OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | (7 - 2)); @@ -708,8 +692,7 @@ gen7_blorp_emit_depth_stencil_config(struct brw_context *brw, static void -gen7_blorp_emit_depth_disable(struct brw_context *brw, - const brw_blorp_params *params) +gen7_blorp_emit_depth_disable(struct brw_context *brw) { intel_emit_depth_stall_flushes(brw); @@ -800,29 +783,28 @@ gen7_blorp_exec(struct brw_context *brw, (1 << params->dst.num_samples) - 1 : 1); gen6_blorp_emit_state_base_address(brw, params); gen6_blorp_emit_vertices(brw, params); - gen7_blorp_emit_urb_config(brw, params); + gen7_blorp_emit_urb_config(brw); if (params->use_wm_prog) { cc_blend_state_offset = gen6_blorp_emit_blend_state(brw, params); cc_state_offset = gen6_blorp_emit_cc_state(brw, params); - gen7_blorp_emit_blend_state_pointer(brw, params, cc_blend_state_offset); - gen7_blorp_emit_cc_state_pointer(brw, params, cc_state_offset); + gen7_blorp_emit_blend_state_pointer(brw, cc_blend_state_offset); + gen7_blorp_emit_cc_state_pointer(brw, cc_state_offset); } depthstencil_offset = gen6_blorp_emit_depth_stencil_state(brw, params); - gen7_blorp_emit_depth_stencil_state_pointers(brw, params, - depthstencil_offset); + gen7_blorp_emit_depth_stencil_state_pointers(brw, depthstencil_offset); if (params->use_wm_prog) { uint32_t wm_surf_offset_renderbuffer; uint32_t wm_surf_offset_texture = 0; wm_push_const_offset = gen6_blorp_emit_wm_constants(brw, params); intel_miptree_used_for_rendering(params->dst.mt); wm_surf_offset_renderbuffer = - gen7_blorp_emit_surface_state(brw, params, ¶ms->dst, + gen7_blorp_emit_surface_state(brw, ¶ms->dst, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, true /* is_render_target */); if (params->src.mt) { wm_surf_offset_texture = - gen7_blorp_emit_surface_state(brw, params, ¶ms->src, + gen7_blorp_emit_surface_state(brw, ¶ms->src, I915_GEM_DOMAIN_SAMPLER, 0, false /* is_render_target */); } @@ -833,30 +815,29 @@ gen7_blorp_exec(struct brw_context *brw, sampler_offset = gen6_blorp_emit_sampler_state(brw, BRW_MAPFILTER_LINEAR, 0, true); } - gen7_blorp_emit_vs_disable(brw, params); - gen7_blorp_emit_hs_disable(brw, params); - gen7_blorp_emit_te_disable(brw, params); - gen7_blorp_emit_ds_disable(brw, params); - gen7_blorp_emit_gs_disable(brw, params); - gen7_blorp_emit_streamout_disable(brw, params); + gen7_blorp_emit_vs_disable(brw); + gen7_blorp_emit_hs_disable(brw); + gen7_blorp_emit_te_disable(brw); + gen7_blorp_emit_ds_disable(brw); + gen7_blorp_emit_gs_disable(brw); + gen7_blorp_emit_streamout_disable(brw); gen6_blorp_emit_clip_disable(brw, params); gen7_blorp_emit_sf_config(brw, params); gen7_blorp_emit_wm_config(brw, params, prog_data); if (params->use_wm_prog) { - gen7_blorp_emit_binding_table_pointers_ps(brw, params, - wm_bind_bo_offset); - gen7_blorp_emit_sampler_state_pointers_ps(brw, params, sampler_offset); - gen7_blorp_emit_constant_ps(brw, params, wm_push_const_offset); + gen7_blorp_emit_binding_table_pointers_ps(brw, wm_bind_bo_offset); + gen7_blorp_emit_sampler_state_pointers_ps(brw, sampler_offset); + gen7_blorp_emit_constant_ps(brw, wm_push_const_offset); } else { - gen7_blorp_emit_constant_ps_disable(brw, params); + gen7_blorp_emit_constant_ps_disable(brw); } gen7_blorp_emit_ps_config(brw, params, prog_offset, prog_data); - gen7_blorp_emit_cc_viewport(brw, params); + gen7_blorp_emit_cc_viewport(brw); if (params->depth.mt) gen7_blorp_emit_depth_stencil_config(brw, params); else - gen7_blorp_emit_depth_disable(brw, params); + gen7_blorp_emit_depth_disable(brw); gen7_blorp_emit_clear_params(brw, params); gen6_blorp_emit_drawing_rectangle(brw, params); gen7_blorp_emit_primitive(brw, params); From tpohjola at kemper.freedesktop.org Wed Apr 29 21:55:01 2015 From: tpohjola at kemper.freedesktop.org (Topi Pohjolainen) Date: Wed, 29 Apr 2015 14:55:01 -0700 (PDT) Subject: Mesa (master): i965/blorp: Allow blend state to be set for multiple render targets Message-ID: <20150429215501.15EBB761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 91daf9f09bac41c84c6868a56e0d538cc59cc334 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=91daf9f09bac41c84c6868a56e0d538cc59cc334 Author: Topi Pohjolainen Date: Thu Mar 26 16:43:31 2015 +0200 i965/blorp: Allow blend state to be set for multiple render targets Original blorp writes only one buffer per shader invocation. Once the launch mechanism is shared with glsl-based programs there will be need for supporting multiple render targets. Also drop the always constant color write disable settings. Reviewed-by: Kenneth Graunke Signed-off-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_blorp.cpp | 10 ++++------ src/mesa/drivers/dri/i965/brw_blorp.h | 5 +++-- src/mesa/drivers/dri/i965/gen6_blorp.cpp | 22 +++++++++++----------- 3 files changed, 18 insertions(+), 19 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp.cpp b/src/mesa/drivers/dri/i965/brw_blorp.cpp index 0c0cd2b..8f82851 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp @@ -155,7 +155,8 @@ brw_blorp_surface_info::compute_tile_offsets(uint32_t *tile_x, } -brw_blorp_params::brw_blorp_params(unsigned num_varyings) +brw_blorp_params::brw_blorp_params(unsigned num_varyings, + unsigned num_draw_buffers) : x0(0), y0(0), x1(0), @@ -163,12 +164,9 @@ brw_blorp_params::brw_blorp_params(unsigned num_varyings) depth_format(0), hiz_op(GEN6_HIZ_OP_NONE), use_wm_prog(false), - num_varyings(num_varyings) + num_varyings(num_varyings), + num_draw_buffers(num_draw_buffers) { - color_write_disable[0] = false; - color_write_disable[1] = false; - color_write_disable[2] = false; - color_write_disable[3] = false; } extern "C" { diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h index 0ba3891..c9957a6 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.h +++ b/src/mesa/drivers/dri/i965/brw_blorp.h @@ -211,7 +211,8 @@ struct brw_blorp_prog_data class brw_blorp_params { public: - explicit brw_blorp_params(unsigned num_varyings = 0); + brw_blorp_params(unsigned num_varyings = 0, + unsigned num_draw_buffers = 1); virtual uint32_t get_wm_prog(struct brw_context *brw, brw_blorp_prog_data **prog_data) const = 0; @@ -227,8 +228,8 @@ public: enum gen6_hiz_op hiz_op; bool use_wm_prog; brw_blorp_wm_push_constants wm_push_consts; - bool color_write_disable[4]; const unsigned num_varyings; + const unsigned num_draw_buffers; }; diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp b/src/mesa/drivers/dri/i965/gen6_blorp.cpp index 405a3e8..bfd2001 100644 --- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp @@ -246,21 +246,21 @@ gen6_blorp_emit_blend_state(struct brw_context *brw, { uint32_t cc_blend_state_offset; + assume(params->num_draw_buffers); + + const unsigned size = params->num_draw_buffers * + sizeof(struct gen6_blend_state); struct gen6_blend_state *blend = (struct gen6_blend_state *) - brw_state_batch(brw, AUB_TRACE_BLEND_STATE, - sizeof(struct gen6_blend_state), 64, + brw_state_batch(brw, AUB_TRACE_BLEND_STATE, size, 64, &cc_blend_state_offset); - memset(blend, 0, sizeof(*blend)); - - blend->blend1.pre_blend_clamp_enable = 1; - blend->blend1.post_blend_clamp_enable = 1; - blend->blend1.clamp_range = BRW_RENDERTARGET_CLAMPRANGE_FORMAT; + memset(blend, 0, size); - blend->blend1.write_disable_r = params->color_write_disable[0]; - blend->blend1.write_disable_g = params->color_write_disable[1]; - blend->blend1.write_disable_b = params->color_write_disable[2]; - blend->blend1.write_disable_a = params->color_write_disable[3]; + for (unsigned i = 0; i < params->num_draw_buffers; ++i) { + blend[i].blend1.pre_blend_clamp_enable = 1; + blend[i].blend1.post_blend_clamp_enable = 1; + blend[i].blend1.clamp_range = BRW_RENDERTARGET_CLAMPRANGE_FORMAT; + } return cc_blend_state_offset; } From tpohjola at kemper.freedesktop.org Wed Apr 29 21:55:01 2015 From: tpohjola at kemper.freedesktop.org (Topi Pohjolainen) Date: Wed, 29 Apr 2015 14:55:01 -0700 (PDT) Subject: Mesa (master): i965/blorp: Prepare drawing rectangle for flipped coordinates Message-ID: <20150429215501.2C61F761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 13670e8bade296c5c5846dc08615563ad51685e4 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=13670e8bade296c5c5846dc08615563ad51685e4 Author: Topi Pohjolainen Date: Sun Mar 29 21:52:02 2015 +0300 i965/blorp: Prepare drawing rectangle for flipped coordinates Reviewed-by: Kenneth Graunke Signed-off-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/gen6_blorp.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp b/src/mesa/drivers/dri/i965/gen6_blorp.cpp index ed08898..b6a3d78 100644 --- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp @@ -950,8 +950,8 @@ gen6_blorp_emit_drawing_rectangle(struct brw_context *brw, BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2)); OUT_BATCH(0); - OUT_BATCH(((params->x1 - 1) & 0xffff) | - ((params->y1 - 1) << 16)); + OUT_BATCH(((MAX2(params->x1, params->x0) - 1) & 0xffff) | + ((MAX2(params->y1, params->y0) - 1) << 16)); OUT_BATCH(0); ADVANCE_BATCH(); } From tpohjola at kemper.freedesktop.org Wed Apr 29 21:55:00 2015 From: tpohjola at kemper.freedesktop.org (Topi Pohjolainen) Date: Wed, 29 Apr 2015 14:55:00 -0700 (PDT) Subject: Mesa (master): i965: Refactor rb surface setup to allow caller to store offsets Message-ID: <20150429215500.6BC0B761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: c8b0d890c0b7e6aa5ed326b94ac30dcb7278e7ea URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c8b0d890c0b7e6aa5ed326b94ac30dcb7278e7ea Author: Topi Pohjolainen Date: Tue Mar 17 13:09:16 2015 +0200 i965: Refactor rb surface setup to allow caller to store offsets Notice that in gen7_wm_surface_state.c there is also indentation change in the surrounding code removing tabs. v2 (Matt): Fixed whitespace: tabs -> spaces Reviewed-by: Kenneth Graunke Reviewed-by: Matt Turner Signed-off-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_context.h | 8 ++--- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 38 +++++++++++---------- src/mesa/drivers/dri/i965/gen6_surface_state.c | 25 +++++++------- src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 28 +++++++-------- src/mesa/drivers/dri/i965/gen8_surface_state.c | 18 +++++----- 5 files changed, 59 insertions(+), 58 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 8db1028..e2f26f5 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -967,10 +967,10 @@ struct brw_context unsigned unit, uint32_t *surf_offset, bool for_gather); - void (*update_renderbuffer_surface)(struct brw_context *brw, - struct gl_renderbuffer *rb, - bool layered, - unsigned unit); + uint32_t (*update_renderbuffer_surface)(struct brw_context *brw, + struct gl_renderbuffer *rb, + bool layered, unsigned unit, + uint32_t surf_index); void (*emit_texture_surface_state)(struct brw_context *brw, struct intel_mipmap_tree *mt, diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 161d140..d451940 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -626,11 +626,11 @@ brw_emit_null_surface_state(struct brw_context *brw, * While it is only used for the front/back buffer currently, it should be * usable for further buffers when doing ARB_draw_buffer support. */ -static void +static uint32_t brw_update_renderbuffer_surface(struct brw_context *brw, - struct gl_renderbuffer *rb, - bool layered, - unsigned int unit) + struct gl_renderbuffer *rb, + bool layered, unsigned unit, + uint32_t surf_index) { struct gl_context *ctx = &brw->ctx; struct intel_renderbuffer *irb = intel_renderbuffer(rb); @@ -638,11 +638,10 @@ brw_update_renderbuffer_surface(struct brw_context *brw, uint32_t *surf; uint32_t tile_x, tile_y; uint32_t format = 0; + uint32_t offset; /* _NEW_BUFFERS */ mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb)); /* BRW_NEW_FS_PROG_DATA */ - uint32_t surf_index = - brw->wm.prog_data->binding_table.render_target_start + unit; assert(!layered); @@ -663,8 +662,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw, intel_miptree_used_for_rendering(irb->mt); - surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, - &brw->wm.base.surf_offset[surf_index]); + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, &offset); format = brw->render_target_format[rb_format]; if (unlikely(!brw->format_supported_as_render_target[rb_format])) { @@ -721,11 +719,13 @@ brw_update_renderbuffer_surface(struct brw_context *brw, } drm_intel_bo_emit_reloc(brw->batch.bo, - brw->wm.base.surf_offset[surf_index] + 4, - mt->bo, - surf[1] - mt->bo->offset64, - I915_GEM_DOMAIN_RENDER, - I915_GEM_DOMAIN_RENDER); + offset + 4, + mt->bo, + surf[1] - mt->bo->offset64, + I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER); + + return offset; } /** @@ -743,13 +743,15 @@ brw_update_renderbuffer_surfaces(struct brw_context *brw) /* Update surfaces for drawing buffers */ if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) { for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { + const uint32_t surf_index = + brw->wm.prog_data->binding_table.render_target_start + i; + if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) { - brw->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i], - ctx->DrawBuffer->MaxNumLayers > 0, i); + brw->wm.base.surf_offset[surf_index] = + brw->vtbl.update_renderbuffer_surface( + brw, ctx->DrawBuffer->_ColorDrawBuffers[i], + ctx->DrawBuffer->MaxNumLayers > 0, i, surf_index); } else { - const uint32_t surf_index = - brw->wm.prog_data->binding_table.render_target_start + i; - brw->vtbl.emit_null_surface_state( brw, fb->Width, fb->Height, fb->Visual.samples, &brw->wm.base.surf_offset[surf_index]); diff --git a/src/mesa/drivers/dri/i965/gen6_surface_state.c b/src/mesa/drivers/dri/i965/gen6_surface_state.c index fadc353..03e913a 100644 --- a/src/mesa/drivers/dri/i965/gen6_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen6_surface_state.c @@ -45,17 +45,18 @@ * While it is only used for the front/back buffer currently, it should be * usable for further buffers when doing ARB_draw_buffer support. */ -static void +static uint32_t gen6_update_renderbuffer_surface(struct brw_context *brw, struct gl_renderbuffer *rb, - bool layered, - unsigned int unit) + bool layered, unsigned unit /* unused */, + uint32_t surf_index) { struct gl_context *ctx = &brw->ctx; struct intel_renderbuffer *irb = intel_renderbuffer(rb); struct intel_mipmap_tree *mt = irb->mt; uint32_t *surf; uint32_t format = 0; + uint32_t offset; /* _NEW_BUFFERS */ mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb)); uint32_t surftype; @@ -63,13 +64,9 @@ gen6_update_renderbuffer_surface(struct brw_context *brw, const GLenum gl_target = rb->TexImage ? rb->TexImage->TexObject->Target : GL_TEXTURE_2D; - uint32_t surf_index = - brw->wm.prog_data->binding_table.render_target_start + unit; - intel_miptree_used_for_rendering(irb->mt); - surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, - &brw->wm.base.surf_offset[surf_index]); + surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, &offset); format = brw->render_target_format[rb_format]; if (unlikely(!brw->format_supported_as_render_target[rb_format])) { @@ -131,11 +128,13 @@ gen6_update_renderbuffer_surface(struct brw_context *brw, surf[5] = (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0); drm_intel_bo_emit_reloc(brw->batch.bo, - brw->wm.base.surf_offset[surf_index] + 4, - mt->bo, - surf[1] - mt->bo->offset64, - I915_GEM_DOMAIN_RENDER, - I915_GEM_DOMAIN_RENDER); + offset + 4, + mt->bo, + surf[1] - mt->bo->offset64, + I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER); + + return offset; } void diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c index 4b8503c..15ab2b0 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c @@ -448,11 +448,11 @@ gen7_emit_null_surface_state(struct brw_context *brw, * While it is only used for the front/back buffer currently, it should be * usable for further buffers when doing ARB_draw_buffer support. */ -static void +static uint32_t gen7_update_renderbuffer_surface(struct brw_context *brw, - struct gl_renderbuffer *rb, - bool layered, - unsigned int unit) + struct gl_renderbuffer *rb, + bool layered, unsigned unit /* unused */, + uint32_t surf_index) { struct gl_context *ctx = &brw->ctx; struct intel_renderbuffer *irb = intel_renderbuffer(rb); @@ -464,17 +464,15 @@ gen7_update_renderbuffer_surface(struct brw_context *brw, bool is_array = false; int depth = MAX2(irb->layer_count, 1); const uint8_t mocs = GEN7_MOCS_L3; + uint32_t offset; int min_array_element = irb->mt_layer / MAX2(mt->num_samples, 1); GLenum gl_target = rb->TexImage ? rb->TexImage->TexObject->Target : GL_TEXTURE_2D; - uint32_t surf_index = - brw->wm.prog_data->binding_table.render_target_start + unit; - uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 8 * 4, 32, - &brw->wm.base.surf_offset[surf_index]); + &offset); memset(surf, 0, 8 * 4); intel_miptree_used_for_rendering(irb->mt); @@ -539,7 +537,7 @@ gen7_update_renderbuffer_surface(struct brw_context *brw, (depth - 1) << GEN7_SURFACE_RENDER_TARGET_VIEW_EXTENT_SHIFT; if (irb->mt->mcs_mt) { - gen7_set_surface_mcs_info(brw, surf, brw->wm.base.surf_offset[surf_index], + gen7_set_surface_mcs_info(brw, surf, offset, irb->mt->mcs_mt, true /* is RT */); } @@ -553,13 +551,15 @@ gen7_update_renderbuffer_surface(struct brw_context *brw, } drm_intel_bo_emit_reloc(brw->batch.bo, - brw->wm.base.surf_offset[surf_index] + 4, - mt->bo, - surf[1] - mt->bo->offset64, - I915_GEM_DOMAIN_RENDER, - I915_GEM_DOMAIN_RENDER); + offset + 4, + mt->bo, + surf[1] - mt->bo->offset64, + I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER); gen7_check_surface_setup(surf, true /* is_render_target */); + + return offset; } void diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c index f347065..d0c2d80 100644 --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c @@ -324,11 +324,11 @@ gen8_emit_null_surface_state(struct brw_context *brw, * While it is only used for the front/back buffer currently, it should be * usable for further buffers when doing ARB_draw_buffer support. */ -static void +static uint32_t gen8_update_renderbuffer_surface(struct brw_context *brw, struct gl_renderbuffer *rb, - bool layered, - unsigned unit) + bool layered, unsigned unit /* unused */, + uint32_t surf_index) { struct gl_context *ctx = &brw->ctx; struct intel_renderbuffer *irb = intel_renderbuffer(rb); @@ -341,14 +341,13 @@ gen8_update_renderbuffer_surface(struct brw_context *brw, uint32_t tiling = mt->tiling; uint32_t format = 0; uint32_t surf_type; + uint32_t offset; bool is_array = false; int depth = MAX2(irb->layer_count, 1); const int min_array_element = (mt->format == MESA_FORMAT_S_UINT8) ? irb->mt_layer : (irb->mt_layer / MAX2(mt->num_samples, 1)); GLenum gl_target = rb->TexImage ? rb->TexImage->TexObject->Target : GL_TEXTURE_2D; - uint32_t surf_index = - brw->wm.prog_data->binding_table.render_target_start + unit; /* FINISHME: Use PTE MOCS on Skylake. */ uint32_t mocs = brw->gen >= 9 ? SKL_MOCS_WT : BDW_MOCS_PTE; @@ -393,8 +392,7 @@ gen8_update_renderbuffer_surface(struct brw_context *brw, aux_mode = GEN8_SURFACE_AUX_MODE_MCS; } - uint32_t *surf = - allocate_surface_state(brw, &brw->wm.base.surf_offset[surf_index]); + uint32_t *surf = allocate_surface_state(brw, &offset); surf[0] = (surf_type << BRW_SURFACE_TYPE_SHIFT) | (is_array ? GEN7_SURFACE_IS_ARRAY : 0) | @@ -439,7 +437,7 @@ gen8_update_renderbuffer_surface(struct brw_context *brw, if (aux_mt) { *((uint64_t *) &surf[10]) = aux_mt->bo->offset64; drm_intel_bo_emit_reloc(brw->batch.bo, - brw->wm.base.surf_offset[surf_index] + 10 * 4, + offset + 10 * 4, aux_mt->bo, 0, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); } else { @@ -449,11 +447,13 @@ gen8_update_renderbuffer_surface(struct brw_context *brw, surf[12] = 0; drm_intel_bo_emit_reloc(brw->batch.bo, - brw->wm.base.surf_offset[surf_index] + 8 * 4, + offset + 8 * 4, mt->bo, mt->offset, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); + + return offset; } void From tpohjola at kemper.freedesktop.org Wed Apr 29 21:55:00 2015 From: tpohjola at kemper.freedesktop.org (Topi Pohjolainen) Date: Wed, 29 Apr 2015 14:55:00 -0700 (PDT) Subject: Mesa (master): i965: Refactor and expose brw_upload_binding_table() Message-ID: <20150429215500.84312761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 21071afc431bb17419c353151544518be7daf05f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=21071afc431bb17419c353151544518be7daf05f Author: Topi Pohjolainen Date: Thu Mar 19 10:42:49 2015 +0200 i965: Refactor and expose brw_upload_binding_table() Read and write parts of the state stage are also split into explicit arguments allowing future patches to use constant program data. v2 (Ken): s/BRW_NEW_WM_PROG_DATA/BRW_NEW_FS_PROG_DATA/ Reviewed-by: Kenneth Graunke Reviewed-by: Matt Turner Signed-off-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_binding_tables.c | 21 ++++++++++++++------- src/mesa/drivers/dri/i965/brw_state.h | 7 +++++++ 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_binding_tables.c b/src/mesa/drivers/dri/i965/brw_binding_tables.c index 459165a..98ff0dd 100644 --- a/src/mesa/drivers/dri/i965/brw_binding_tables.c +++ b/src/mesa/drivers/dri/i965/brw_binding_tables.c @@ -50,15 +50,13 @@ * This copies brw_stage_state::surf_offset[] into the indirect state section * of the batchbuffer (allocated by brw_state_batch()). */ -static void +void brw_upload_binding_table(struct brw_context *brw, uint32_t packet_name, GLbitfield brw_new_binding_table, + const struct brw_stage_prog_data *prog_data, struct brw_stage_state *stage_state) { - /* BRW_NEW_*_PROG_DATA */ - struct brw_stage_prog_data *prog_data = stage_state->prog_data; - if (prog_data->binding_table.size_bytes == 0) { /* There are no surfaces; skip making the binding table altogether. */ if (stage_state->bind_bo_offset == 0 && brw->gen < 9) @@ -103,9 +101,12 @@ brw_upload_binding_table(struct brw_context *brw, static void brw_vs_upload_binding_table(struct brw_context *brw) { + /* BRW_NEW_VS_PROG_DATA */ + const struct brw_stage_prog_data *prog_data = brw->vs.base.prog_data; brw_upload_binding_table(brw, _3DSTATE_BINDING_TABLE_POINTERS_VS, - BRW_NEW_VS_BINDING_TABLE, &brw->vs.base); + BRW_NEW_VS_BINDING_TABLE, prog_data, + &brw->vs.base); } const struct brw_tracked_state brw_vs_binding_table = { @@ -124,9 +125,12 @@ const struct brw_tracked_state brw_vs_binding_table = { static void brw_upload_wm_binding_table(struct brw_context *brw) { + /* BRW_NEW_FS_PROG_DATA */ + const struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data; brw_upload_binding_table(brw, _3DSTATE_BINDING_TABLE_POINTERS_PS, - BRW_NEW_PS_BINDING_TABLE, &brw->wm.base); + BRW_NEW_PS_BINDING_TABLE, prog_data, + &brw->wm.base); } const struct brw_tracked_state brw_wm_binding_table = { @@ -147,9 +151,12 @@ brw_gs_upload_binding_table(struct brw_context *brw) if (brw->geometry_program == NULL) return; + /* BRW_NEW_GS_PROG_DATA */ + const struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data; brw_upload_binding_table(brw, _3DSTATE_BINDING_TABLE_POINTERS_GS, - BRW_NEW_GS_BINDING_TABLE, &brw->gs.base); + BRW_NEW_GS_BINDING_TABLE, prog_data, + &brw->gs.base); } const struct brw_tracked_state brw_gs_binding_table = { diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 83058b9..8798369 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -159,6 +159,13 @@ brw_state_dirty(struct brw_context *brw, GLuint mesa_flags, uint64_t brw_flags) (brw->ctx.NewDriverState & brw_flags)) != 0; } +/* brw_binding_tables.c */ +void brw_upload_binding_table(struct brw_context *brw, + uint32_t packet_name, + GLbitfield brw_new_binding_table, + const struct brw_stage_prog_data *prog_data, + struct brw_stage_state *stage_state); + /* brw_misc_state.c */ void brw_upload_invariant_state(struct brw_context *brw); uint32_t From tpohjola at kemper.freedesktop.org Wed Apr 29 21:55:00 2015 From: tpohjola at kemper.freedesktop.org (Topi Pohjolainen) Date: Wed, 29 Apr 2015 14:55:00 -0700 (PDT) Subject: Mesa (master): i965/blorp: Allow caller to provide sampler settings Message-ID: <20150429215500.DEF64761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 4de0bef7f438147091a7489728c4d187c6efbbc3 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4de0bef7f438147091a7489728c4d187c6efbbc3 Author: Topi Pohjolainen Date: Fri Feb 27 21:59:56 2015 +0200 i965/blorp: Allow caller to provide sampler settings v2 (Ken): s/use_unorm_coords/non_normalized_coords/ Reviewed-by: Kenneth Graunke Signed-off-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_blorp.h | 4 +++- src/mesa/drivers/dri/i965/gen6_blorp.cpp | 15 +++++++++------ src/mesa/drivers/dri/i965/gen7_blorp.cpp | 3 ++- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h index 59aecab..6aaae65 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.h +++ b/src/mesa/drivers/dri/i965/brw_blorp.h @@ -415,7 +415,9 @@ gen6_blorp_emit_drawing_rectangle(struct brw_context *brw, uint32_t gen6_blorp_emit_sampler_state(struct brw_context *brw, - const brw_blorp_params *params); + unsigned tex_filter, unsigned max_lod, + bool non_normalized_coords); + /** \} */ #endif /* __cplusplus */ diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp b/src/mesa/drivers/dri/i965/gen6_blorp.cpp index 6c139ec..14e073b 100644 --- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp @@ -455,7 +455,8 @@ gen6_blorp_emit_binding_table(struct brw_context *brw, */ uint32_t gen6_blorp_emit_sampler_state(struct brw_context *brw, - const brw_blorp_params *params) + unsigned tex_filter, unsigned max_lod, + bool non_normalized_coords) { uint32_t sampler_offset; uint32_t *sampler_state = (uint32_t *) @@ -476,8 +477,8 @@ gen6_blorp_emit_sampler_state(struct brw_context *brw, brw_emit_sampler_state(brw, sampler_state, sampler_offset, - BRW_MAPFILTER_LINEAR, /* min filter */ - BRW_MAPFILTER_LINEAR, /* mag filter */ + tex_filter, /* min filter */ + tex_filter, /* mag filter */ BRW_MIPFILTER_NONE, BRW_ANISORATIO_2, address_rounding, @@ -485,11 +486,11 @@ gen6_blorp_emit_sampler_state(struct brw_context *brw, BRW_TEXCOORDMODE_CLAMP, BRW_TEXCOORDMODE_CLAMP, 0, /* min LOD */ - 0, /* max LOD */ + max_lod, 0, /* LOD bias */ 0, /* base miplevel */ 0, /* shadow function */ - true, /* non-normalized coordinates */ + non_normalized_coords, 0); /* border color offset - unused */ return sampler_offset; @@ -1059,7 +1060,9 @@ gen6_blorp_exec(struct brw_context *brw, gen6_blorp_emit_binding_table(brw, params, wm_surf_offset_renderbuffer, wm_surf_offset_texture); - sampler_offset = gen6_blorp_emit_sampler_state(brw, params); + sampler_offset = + gen6_blorp_emit_sampler_state(brw, BRW_MAPFILTER_LINEAR, 0, true); + gen6_blorp_emit_sampler_state_pointers(brw, params, sampler_offset); } gen6_blorp_emit_vs_disable(brw, params); diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp index 8215fe9..d841346 100644 --- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp @@ -830,7 +830,8 @@ gen7_blorp_exec(struct brw_context *brw, gen6_blorp_emit_binding_table(brw, params, wm_surf_offset_renderbuffer, wm_surf_offset_texture); - sampler_offset = gen6_blorp_emit_sampler_state(brw, params); + sampler_offset = + gen6_blorp_emit_sampler_state(brw, BRW_MAPFILTER_LINEAR, 0, true); } gen7_blorp_emit_vs_disable(brw, params); gen7_blorp_emit_hs_disable(brw, params); From tpohjola at kemper.freedesktop.org Wed Apr 29 21:55:01 2015 From: tpohjola at kemper.freedesktop.org (Topi Pohjolainen) Date: Wed, 29 Apr 2015 14:55:01 -0700 (PDT) Subject: Mesa (master): i965/blorp: Prepare for attributes other than render position Message-ID: <20150429215501.09A50761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 7fb0db4dd18e49d3ccdb872f7ed174740301f3a2 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7fb0db4dd18e49d3ccdb872f7ed174740301f3a2 Author: Topi Pohjolainen Date: Fri Feb 27 11:45:34 2015 +0200 i965/blorp: Prepare for attributes other than render position Note that the magic number of one in gen7 logic is replaced by BRW_SF_URB_ENTRY_READ_OFFSET ( == 1 also) for clarity. On gen6 the change from zero to one (BRW_SF_URB_ENTRY_READ_OFFSET) has no effect for native blorp as blorp doesn't use any additional attributes. In fact, regular pipeline setup always uses BRW_SF_URB_ENTRY_READ_OFFSET even when there are no additional attributes. Hence the change makes the two (blorp and regular) consistent. Reviewed-by: Kenneth Graunke Signed-off-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_blorp.cpp | 5 +++-- src/mesa/drivers/dri/i965/brw_blorp.h | 3 ++- src/mesa/drivers/dri/i965/gen6_blorp.cpp | 5 +++-- src/mesa/drivers/dri/i965/gen7_blorp.cpp | 6 ++++-- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp.cpp b/src/mesa/drivers/dri/i965/brw_blorp.cpp index b0de55d..0c0cd2b 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp @@ -155,14 +155,15 @@ brw_blorp_surface_info::compute_tile_offsets(uint32_t *tile_x, } -brw_blorp_params::brw_blorp_params() +brw_blorp_params::brw_blorp_params(unsigned num_varyings) : x0(0), y0(0), x1(0), y1(0), depth_format(0), hiz_op(GEN6_HIZ_OP_NONE), - use_wm_prog(false) + use_wm_prog(false), + num_varyings(num_varyings) { color_write_disable[0] = false; color_write_disable[1] = false; diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h index f277dee..0ba3891 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.h +++ b/src/mesa/drivers/dri/i965/brw_blorp.h @@ -211,7 +211,7 @@ struct brw_blorp_prog_data class brw_blorp_params { public: - brw_blorp_params(); + explicit brw_blorp_params(unsigned num_varyings = 0); virtual uint32_t get_wm_prog(struct brw_context *brw, brw_blorp_prog_data **prog_data) const = 0; @@ -228,6 +228,7 @@ public: bool use_wm_prog; brw_blorp_wm_push_constants wm_push_consts; bool color_write_disable[4]; + const unsigned num_varyings; }; diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp b/src/mesa/drivers/dri/i965/gen6_blorp.cpp index 2fe2840..405a3e8 100644 --- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp @@ -634,9 +634,10 @@ gen6_blorp_emit_sf_config(struct brw_context *brw, { BEGIN_BATCH(20); OUT_BATCH(_3DSTATE_SF << 16 | (20 - 2)); - OUT_BATCH((1 - 1) << GEN6_SF_NUM_OUTPUTS_SHIFT | /* only position */ + OUT_BATCH(params->num_varyings << GEN6_SF_NUM_OUTPUTS_SHIFT | 1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT | - 0 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT); + BRW_SF_URB_ENTRY_READ_OFFSET << + GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT); OUT_BATCH(0); /* dw2 */ OUT_BATCH(params->dst.num_samples > 1 ? GEN6_SF_MSRAST_ON_PATTERN : 0); for (int i = 0; i < 16; ++i) diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp index 12f515d..3065a4c 100644 --- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp @@ -415,9 +415,11 @@ gen7_blorp_emit_sf_config(struct brw_context *brw, { BEGIN_BATCH(14); OUT_BATCH(_3DSTATE_SBE << 16 | (14 - 2)); - OUT_BATCH((1 - 1) << GEN7_SBE_NUM_OUTPUTS_SHIFT | /* only position */ + OUT_BATCH(GEN7_SBE_SWIZZLE_ENABLE | + params->num_varyings << GEN7_SBE_NUM_OUTPUTS_SHIFT | 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT | - 0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT); + BRW_SF_URB_ENTRY_READ_OFFSET << + GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT); for (int i = 0; i < 12; ++i) OUT_BATCH(0); ADVANCE_BATCH(); From tpohjola at kemper.freedesktop.org Wed Apr 29 21:55:00 2015 From: tpohjola at kemper.freedesktop.org (Topi Pohjolainen) Date: Wed, 29 Apr 2015 14:55:00 -0700 (PDT) Subject: Mesa (master): i965/gen8: Use constant pointers for reading miptree details Message-ID: <20150429215500.5F0FE761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: d6c83c9d863f9f13e46584b93cbab6d3a3885aea URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d6c83c9d863f9f13e46584b93cbab6d3a3885aea Author: Topi Pohjolainen Date: Sun Mar 1 22:23:33 2015 +0200 i965/gen8: Use constant pointers for reading miptree details Reviewed-by: Matt Turner Reviewed-by: Kenneth Graunke Signed-off-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/gen8_surface_state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c index b8ef353..f347065 100644 --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c @@ -70,7 +70,7 @@ surface_tiling_mode(uint32_t tiling) } static unsigned -vertical_alignment(struct intel_mipmap_tree *mt) +vertical_alignment(const struct intel_mipmap_tree *mt) { switch (mt->align_h) { case 4: @@ -85,7 +85,7 @@ vertical_alignment(struct intel_mipmap_tree *mt) } static unsigned -horizontal_alignment(struct intel_mipmap_tree *mt) +horizontal_alignment(const struct intel_mipmap_tree *mt) { switch (mt->align_w) { case 4: From tpohjola at kemper.freedesktop.org Wed Apr 29 21:55:00 2015 From: tpohjola at kemper.freedesktop.org (Topi Pohjolainen) Date: Wed, 29 Apr 2015 14:55:00 -0700 (PDT) Subject: Mesa (master): i965/ps/gen7: Refactor state uploading Message-ID: <20150429215500.A904C761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 4047420ec47488f2cdd7511cbeea95201b620480 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4047420ec47488f2cdd7511cbeea95201b620480 Author: Topi Pohjolainen Date: Fri Feb 13 11:20:05 2015 +0200 i965/ps/gen7: Refactor state uploading Now the uploading depends only on the input parameters instead of consulting the current gl-state. v2: Rebased on top of sampler count clamping Reviewed-by: Kenneth Graunke Reviewed-by: Matt Turner Signed-off-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_state.h | 9 +++++ src/mesa/drivers/dri/i965/gen7_wm_state.c | 56 ++++++++++++++++++----------- 2 files changed, 45 insertions(+), 20 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index ab067c3..f8d56b9 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -245,6 +245,15 @@ void brw_update_renderbuffer_surfaces(struct brw_context *brw, uint32_t render_target_start, uint32_t *surf_offset); +/* gen7_wm_state.c */ +void +gen7_upload_ps_state(struct brw_context *brw, + const struct gl_fragment_program *fp, + const struct brw_stage_state *stage_state, + const struct brw_wm_prog_data *prog_data, + bool enable_dual_src_blend, unsigned sample_mask, + unsigned fast_clear_op); + /* gen7_wm_surface_state.c */ uint32_t gen7_surface_tiling_mode(uint32_t tiling); uint32_t gen7_surface_msaa_bits(unsigned num_samples, enum intel_msaa_layout l); diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index 55a1acd..b918275 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -122,21 +122,23 @@ const struct brw_tracked_state gen7_wm_state = { .emit = upload_wm_state, }; -static void -upload_ps_state(struct brw_context *brw) +void +gen7_upload_ps_state(struct brw_context *brw, + const struct gl_fragment_program *fp, + const struct brw_stage_state *stage_state, + const struct brw_wm_prog_data *prog_data, + bool enable_dual_src_blend, unsigned sample_mask, + unsigned fast_clear_op) { struct gl_context *ctx = &brw->ctx; uint32_t dw2, dw4, dw5, ksp0, ksp2; const int max_threads_shift = brw->is_haswell ? HSW_PS_MAX_THREADS_SHIFT : IVB_PS_MAX_THREADS_SHIFT; - /* BRW_NEW_FS_PROG_DATA */ - const struct brw_wm_prog_data *prog_data = brw->wm.prog_data; - dw2 = dw4 = dw5 = ksp2 = 0; const unsigned sampler_count = - DIV_ROUND_UP(CLAMP(brw->wm.base.sampler_count, 0, 16), 4); + DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4); dw2 |= SET_FIELD(sampler_count, GEN7_PS_SAMPLER_COUNT); dw2 |= ((prog_data->base.binding_table.size_bytes / 4) << @@ -149,7 +151,7 @@ upload_ps_state(struct brw_context *brw) * in 3DSTATE_SAMPLE_MASK; the values should match. */ /* _NEW_BUFFERS, _NEW_MULTISAMPLE */ if (brw->is_haswell) - dw4 |= SET_FIELD(gen6_determine_sample_mask(brw), HSW_PS_SAMPLE_MASK); + dw4 |= SET_FIELD(sample_mask, HSW_PS_SAMPLE_MASK); dw4 |= (brw->max_wm_threads - 1) << max_threads_shift; @@ -182,16 +184,11 @@ upload_ps_state(struct brw_context *brw) else dw4 |= GEN7_PS_POSOFFSET_NONE; - /* BRW_NEW_FS_PROG_DATA | _NEW_COLOR - * - * The hardware wedges if you have this bit set but don't turn on any dual + /* The hardware wedges if you have this bit set but don't turn on any dual * source blend factors. */ - if (prog_data->dual_src_blend && - (ctx->Color.BlendEnabled & 1) && - ctx->Color.Blend[0]._UsesDualSrc) { + if (enable_dual_src_blend) dw4 |= GEN7_PS_DUAL_SOURCE_BLEND_ENABLE; - } /* BRW_NEW_FS_PROG_DATA */ if (prog_data->num_varying_inputs != 0) @@ -203,7 +200,7 @@ upload_ps_state(struct brw_context *brw) * better performance than 'SIMD8 only' dispatch. */ int min_inv_per_frag = - _mesa_get_min_invocations_per_fragment(ctx, brw->fragment_program, false); + _mesa_get_min_invocations_per_fragment(ctx, fp, false); assert(min_inv_per_frag >= 1); if (prog_data->prog_offset_16 || prog_data->no_8) { @@ -214,22 +211,22 @@ upload_ps_state(struct brw_context *brw) GEN7_PS_DISPATCH_START_GRF_SHIFT_0); dw5 |= (prog_data->dispatch_grf_start_reg_16 << GEN7_PS_DISPATCH_START_GRF_SHIFT_2); - ksp0 = brw->wm.base.prog_offset; - ksp2 = brw->wm.base.prog_offset + prog_data->prog_offset_16; + ksp0 = stage_state->prog_offset; + ksp2 = stage_state->prog_offset + prog_data->prog_offset_16; } else { dw5 |= (prog_data->dispatch_grf_start_reg_16 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0); - ksp0 = brw->wm.base.prog_offset + prog_data->prog_offset_16; + ksp0 = stage_state->prog_offset + prog_data->prog_offset_16; } } else { dw4 |= GEN7_PS_8_DISPATCH_ENABLE; dw5 |= (prog_data->base.dispatch_grf_start_reg << GEN7_PS_DISPATCH_START_GRF_SHIFT_0); - ksp0 = brw->wm.base.prog_offset; + ksp0 = stage_state->prog_offset; } - dw4 |= brw->wm.fast_clear_op; + dw4 |= fast_clear_op; BEGIN_BATCH(8); OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2)); @@ -249,6 +246,25 @@ upload_ps_state(struct brw_context *brw) ADVANCE_BATCH(); } +static void +upload_ps_state(struct brw_context *brw) +{ + /* BRW_NEW_FS_PROG_DATA */ + const struct brw_wm_prog_data *prog_data = brw->wm.prog_data; + const struct gl_context *ctx = &brw->ctx; + /* BRW_NEW_FS_PROG_DATA | _NEW_COLOR */ + const bool enable_dual_src_blend = prog_data->dual_src_blend && + (ctx->Color.BlendEnabled & 1) && + ctx->Color.Blend[0]._UsesDualSrc; + /* _NEW_BUFFERS, _NEW_MULTISAMPLE */ + const unsigned sample_mask = + brw->is_haswell ? gen6_determine_sample_mask(brw) : 0; + + gen7_upload_ps_state(brw, brw->fragment_program, &brw->wm.base, prog_data, + enable_dual_src_blend, sample_mask, + brw->wm.fast_clear_op); +} + const struct brw_tracked_state gen7_ps_state = { .dirty = { .mesa = _NEW_BUFFERS | From tpohjola at kemper.freedesktop.org Wed Apr 29 21:55:00 2015 From: tpohjola at kemper.freedesktop.org (Topi Pohjolainen) Date: Wed, 29 Apr 2015 14:55:00 -0700 (PDT) Subject: Mesa (master): i965/gen8: Expose state base address setup Message-ID: <20150429215500.BD303761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: d7e49fba9a48b5f90c0ce8b7d0c0588545090a7f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d7e49fba9a48b5f90c0ce8b7d0c0588545090a7f Author: Topi Pohjolainen Date: Mon Mar 2 11:29:05 2015 +0200 i965/gen8: Expose state base address setup Reviewed-by: Kenneth Graunke Reviewed-by: Matt Turner Signed-off-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_state.h | 3 +++ src/mesa/drivers/dri/i965/gen8_misc_state.c | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 13d541b..a2127d1 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -171,6 +171,9 @@ void brw_upload_invariant_state(struct brw_context *brw); uint32_t brw_depthbuffer_format(struct brw_context *brw); +/* gen8_misc_state.c */ +void gen8_upload_state_base_address(struct brw_context *brw); + /*********************************************************************** * brw_state.c diff --git a/src/mesa/drivers/dri/i965/gen8_misc_state.c b/src/mesa/drivers/dri/i965/gen8_misc_state.c index 88e425f..b20038e 100644 --- a/src/mesa/drivers/dri/i965/gen8_misc_state.c +++ b/src/mesa/drivers/dri/i965/gen8_misc_state.c @@ -29,7 +29,7 @@ /** * Define the base addresses which some state is referenced from. */ -static void upload_state_base_address(struct brw_context *brw) +void gen8_upload_state_base_address(struct brw_context *brw) { uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB; int pkt_len = brw->gen >= 9 ? 19 : 16; @@ -78,5 +78,5 @@ const struct brw_tracked_state gen8_state_base_address = { .brw = BRW_NEW_BATCH | BRW_NEW_PROGRAM_CACHE, }, - .emit = upload_state_base_address + .emit = gen8_upload_state_base_address }; From tpohjola at kemper.freedesktop.org Wed Apr 29 21:55:00 2015 From: tpohjola at kemper.freedesktop.org (Topi Pohjolainen) Date: Wed, 29 Apr 2015 14:55:00 -0700 (PDT) Subject: Mesa (master): i965/blorp: Refactor vertex buffer state setup Message-ID: <20150429215500.D24FF761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: bfdacac86cf82a1ae8d76e3282b8842f08a21c31 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=bfdacac86cf82a1ae8d76e3282b8842f08a21c31 Author: Topi Pohjolainen Date: Fri Feb 27 11:17:05 2015 +0200 i965/blorp: Refactor vertex buffer state setup Reviewed-by: Kenneth Graunke Signed-off-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/gen6_blorp.cpp | 60 +++++++++++++++++------------- 1 file changed, 34 insertions(+), 26 deletions(-) diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp b/src/mesa/drivers/dri/i965/gen6_blorp.cpp index e45705a..6c139ec 100644 --- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp @@ -93,6 +93,37 @@ gen6_blorp_emit_state_base_address(struct brw_context *brw, ADVANCE_BATCH(); } +static void +gen6_blorp_emit_vertex_buffer_state(struct brw_context *brw, + unsigned num_elems, + unsigned vbo_size, + uint32_t vertex_offset) +{ + /* 3DSTATE_VERTEX_BUFFERS */ + const int num_buffers = 1; + const int batch_length = 1 + 4 * num_buffers; + + uint32_t dw0 = GEN6_VB0_ACCESS_VERTEXDATA | + (num_elems * sizeof(float)) << BRW_VB0_PITCH_SHIFT; + + if (brw->gen >= 7) + dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE; + + if (brw->gen == 7) + dw0 |= GEN7_MOCS_L3 << 16; + + BEGIN_BATCH(batch_length); + OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (batch_length - 2)); + OUT_BATCH(dw0); + /* start address */ + OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_VERTEX, 0, + vertex_offset); + /* end address */ + OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_VERTEX, 0, + vertex_offset + vbo_size - 1); + OUT_BATCH(0); + ADVANCE_BATCH(); +} void gen6_blorp_emit_vertices(struct brw_context *brw, @@ -144,32 +175,9 @@ gen6_blorp_emit_vertices(struct brw_context *brw, memcpy(vertex_data, vertices, GEN6_BLORP_VBO_SIZE); } - /* 3DSTATE_VERTEX_BUFFERS */ - { - const int num_buffers = 1; - const int batch_length = 1 + 4 * num_buffers; - - uint32_t dw0 = GEN6_VB0_ACCESS_VERTEXDATA | - (GEN6_BLORP_NUM_VUE_ELEMS * sizeof(float)) << BRW_VB0_PITCH_SHIFT; - - if (brw->gen >= 7) - dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE; - - if (brw->gen == 7) - dw0 |= GEN7_MOCS_L3 << 16; - - BEGIN_BATCH(batch_length); - OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (batch_length - 2)); - OUT_BATCH(dw0); - /* start address */ - OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_VERTEX, 0, - vertex_offset); - /* end address */ - OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_VERTEX, 0, - vertex_offset + GEN6_BLORP_VBO_SIZE - 1); - OUT_BATCH(0); - ADVANCE_BATCH(); - } + gen6_blorp_emit_vertex_buffer_state(brw, GEN6_BLORP_NUM_VUE_ELEMS, + GEN6_BLORP_VBO_SIZE, + vertex_offset); /* 3DSTATE_VERTEX_ELEMENTS * From tpohjola at kemper.freedesktop.org Wed Apr 29 21:55:00 2015 From: tpohjola at kemper.freedesktop.org (Topi Pohjolainen) Date: Wed, 29 Apr 2015 14:55:00 -0700 (PDT) Subject: Mesa (master): i965/blorp: Remove unused arguments Message-ID: <20150429215500.F3683761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 25ce6c6943576e22d8d00049578d0e6cc5feea07 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=25ce6c6943576e22d8d00049578d0e6cc5feea07 Author: Topi Pohjolainen Date: Fri Jan 30 11:37:54 2015 +0200 i965/blorp: Remove unused arguments Reviewed-by: Kenneth Graunke Signed-off-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_blorp.h | 7 ++----- src/mesa/drivers/dri/i965/gen6_blorp.cpp | 20 +++++++------------- src/mesa/drivers/dri/i965/gen7_blorp.cpp | 6 +++--- 3 files changed, 12 insertions(+), 21 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h index 6aaae65..f277dee 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.h +++ b/src/mesa/drivers/dri/i965/brw_blorp.h @@ -380,8 +380,7 @@ gen6_blorp_emit_blend_state(struct brw_context *brw, const brw_blorp_params *params); uint32_t -gen6_blorp_emit_cc_state(struct brw_context *brw, - const brw_blorp_params *params); +gen6_blorp_emit_cc_state(struct brw_context *brw); uint32_t gen6_blorp_emit_wm_constants(struct brw_context *brw, @@ -393,7 +392,6 @@ gen6_blorp_emit_vs_disable(struct brw_context *brw, uint32_t gen6_blorp_emit_binding_table(struct brw_context *brw, - const brw_blorp_params *params, uint32_t wm_surf_offset_renderbuffer, uint32_t wm_surf_offset_texture); @@ -406,8 +404,7 @@ gen6_blorp_emit_gs_disable(struct brw_context *brw, const brw_blorp_params *params); void -gen6_blorp_emit_clip_disable(struct brw_context *brw, - const brw_blorp_params *params); +gen6_blorp_emit_clip_disable(struct brw_context *brw); void gen6_blorp_emit_drawing_rectangle(struct brw_context *brw, diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp b/src/mesa/drivers/dri/i965/gen6_blorp.cpp index 14e073b..2fe2840 100644 --- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp @@ -268,8 +268,7 @@ gen6_blorp_emit_blend_state(struct brw_context *brw, /* CC_STATE */ uint32_t -gen6_blorp_emit_cc_state(struct brw_context *brw, - const brw_blorp_params *params) +gen6_blorp_emit_cc_state(struct brw_context *brw) { uint32_t cc_state_offset; @@ -431,7 +430,6 @@ gen6_blorp_emit_surface_state(struct brw_context *brw, /* BINDING_TABLE. See brw_wm_binding_table(). */ uint32_t gen6_blorp_emit_binding_table(struct brw_context *brw, - const brw_blorp_params *params, uint32_t wm_surf_offset_renderbuffer, uint32_t wm_surf_offset_texture) { @@ -502,7 +500,6 @@ gen6_blorp_emit_sampler_state(struct brw_context *brw, */ static void gen6_blorp_emit_sampler_state_pointers(struct brw_context *brw, - const brw_blorp_params *params, uint32_t sampler_offset) { BEGIN_BATCH(4); @@ -602,8 +599,7 @@ gen6_blorp_emit_gs_disable(struct brw_context *brw, * output, but does spare a few electrons. */ void -gen6_blorp_emit_clip_disable(struct brw_context *brw, - const brw_blorp_params *params) +gen6_blorp_emit_clip_disable(struct brw_context *brw) { BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2)); @@ -767,7 +763,6 @@ gen6_blorp_emit_constant_ps_disable(struct brw_context *brw, */ static void gen6_blorp_emit_binding_table_pointers(struct brw_context *brw, - const brw_blorp_params *params, uint32_t wm_bind_bo_offset) { BEGIN_BATCH(4); @@ -1036,7 +1031,7 @@ gen6_blorp_exec(struct brw_context *brw, gen6_blorp_emit_urb_config(brw, params); if (params->use_wm_prog) { cc_blend_state_offset = gen6_blorp_emit_blend_state(brw, params); - cc_state_offset = gen6_blorp_emit_cc_state(brw, params); + cc_state_offset = gen6_blorp_emit_cc_state(brw); } depthstencil_offset = gen6_blorp_emit_depth_stencil_state(brw, params); gen6_blorp_emit_cc_state_pointers(brw, params, cc_blend_state_offset, @@ -1057,17 +1052,16 @@ gen6_blorp_exec(struct brw_context *brw, I915_GEM_DOMAIN_SAMPLER, 0); } wm_bind_bo_offset = - gen6_blorp_emit_binding_table(brw, params, + gen6_blorp_emit_binding_table(brw, wm_surf_offset_renderbuffer, wm_surf_offset_texture); sampler_offset = gen6_blorp_emit_sampler_state(brw, BRW_MAPFILTER_LINEAR, 0, true); - - gen6_blorp_emit_sampler_state_pointers(brw, params, sampler_offset); + gen6_blorp_emit_sampler_state_pointers(brw, sampler_offset); } gen6_blorp_emit_vs_disable(brw, params); gen6_blorp_emit_gs_disable(brw, params); - gen6_blorp_emit_clip_disable(brw, params); + gen6_blorp_emit_clip_disable(brw); gen6_blorp_emit_sf_config(brw, params); if (params->use_wm_prog) gen6_blorp_emit_constant_ps(brw, params, wm_push_const_offset); @@ -1075,7 +1069,7 @@ gen6_blorp_exec(struct brw_context *brw, gen6_blorp_emit_constant_ps_disable(brw, params); gen6_blorp_emit_wm_config(brw, params, prog_offset, prog_data); if (params->use_wm_prog) - gen6_blorp_emit_binding_table_pointers(brw, params, wm_bind_bo_offset); + gen6_blorp_emit_binding_table_pointers(brw, wm_bind_bo_offset); gen6_blorp_emit_viewport_state(brw, params); if (params->depth.mt) diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp index c9e7cb7..12f515d 100644 --- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp @@ -786,7 +786,7 @@ gen7_blorp_exec(struct brw_context *brw, gen7_blorp_emit_urb_config(brw); if (params->use_wm_prog) { cc_blend_state_offset = gen6_blorp_emit_blend_state(brw, params); - cc_state_offset = gen6_blorp_emit_cc_state(brw, params); + cc_state_offset = gen6_blorp_emit_cc_state(brw); gen7_blorp_emit_blend_state_pointer(brw, cc_blend_state_offset); gen7_blorp_emit_cc_state_pointer(brw, cc_state_offset); } @@ -809,7 +809,7 @@ gen7_blorp_exec(struct brw_context *brw, false /* is_render_target */); } wm_bind_bo_offset = - gen6_blorp_emit_binding_table(brw, params, + gen6_blorp_emit_binding_table(brw, wm_surf_offset_renderbuffer, wm_surf_offset_texture); sampler_offset = @@ -821,7 +821,7 @@ gen7_blorp_exec(struct brw_context *brw, gen7_blorp_emit_ds_disable(brw); gen7_blorp_emit_gs_disable(brw); gen7_blorp_emit_streamout_disable(brw); - gen6_blorp_emit_clip_disable(brw, params); + gen6_blorp_emit_clip_disable(brw); gen7_blorp_emit_sf_config(brw, params); gen7_blorp_emit_wm_config(brw, params, prog_data); if (params->use_wm_prog) { From tpohjola at kemper.freedesktop.org Wed Apr 29 21:55:01 2015 From: tpohjola at kemper.freedesktop.org (Topi Pohjolainen) Date: Wed, 29 Apr 2015 14:55:01 -0700 (PDT) Subject: Mesa (master): i965/blorp: Add support for layered rendering Message-ID: <20150429215501.2202F761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: dfd896699d9f640518c0fbafb0352f454d5fc466 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=dfd896699d9f640518c0fbafb0352f454d5fc466 Author: Topi Pohjolainen Date: Fri Mar 27 16:25:56 2015 +0200 i965/blorp: Add support for layered rendering Reviewed-by: Kenneth Graunke Signed-off-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/brw_blorp.cpp | 6 ++++-- src/mesa/drivers/dri/i965/brw_blorp.h | 4 +++- src/mesa/drivers/dri/i965/gen6_blorp.cpp | 2 +- src/mesa/drivers/dri/i965/gen7_blorp.cpp | 2 +- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp.cpp b/src/mesa/drivers/dri/i965/brw_blorp.cpp index 8f82851..b404869 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp @@ -156,7 +156,8 @@ brw_blorp_surface_info::compute_tile_offsets(uint32_t *tile_x, brw_blorp_params::brw_blorp_params(unsigned num_varyings, - unsigned num_draw_buffers) + unsigned num_draw_buffers, + unsigned num_layers) : x0(0), y0(0), x1(0), @@ -165,7 +166,8 @@ brw_blorp_params::brw_blorp_params(unsigned num_varyings, hiz_op(GEN6_HIZ_OP_NONE), use_wm_prog(false), num_varyings(num_varyings), - num_draw_buffers(num_draw_buffers) + num_draw_buffers(num_draw_buffers), + num_layers(num_layers) { } diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h index c9957a6..dd28d81 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.h +++ b/src/mesa/drivers/dri/i965/brw_blorp.h @@ -212,7 +212,8 @@ class brw_blorp_params { public: brw_blorp_params(unsigned num_varyings = 0, - unsigned num_draw_buffers = 1); + unsigned num_draw_buffers = 1, + unsigned num_layers = 1); virtual uint32_t get_wm_prog(struct brw_context *brw, brw_blorp_prog_data **prog_data) const = 0; @@ -230,6 +231,7 @@ public: brw_blorp_wm_push_constants wm_push_consts; const unsigned num_varyings; const unsigned num_draw_buffers; + const unsigned num_layers; }; diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp b/src/mesa/drivers/dri/i965/gen6_blorp.cpp index bfd2001..ed08898 100644 --- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp @@ -992,7 +992,7 @@ gen6_blorp_emit_primitive(struct brw_context *brw, GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL); OUT_BATCH(3); /* vertex count per instance */ OUT_BATCH(0); - OUT_BATCH(1); /* instance count */ + OUT_BATCH(params->num_layers); /* instance count */ OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp index 3065a4c..2bdc82b 100644 --- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp @@ -753,7 +753,7 @@ gen7_blorp_emit_primitive(struct brw_context *brw, _3DPRIM_RECTLIST); OUT_BATCH(3); /* vertex count per instance */ OUT_BATCH(0); - OUT_BATCH(1); /* instance count */ + OUT_BATCH(params->num_layers); /* instance count */ OUT_BATCH(0); OUT_BATCH(0); ADVANCE_BATCH(); From imirkin at kemper.freedesktop.org Wed Apr 29 22:04:16 2015 From: imirkin at kemper.freedesktop.org (Ilia Mirkin) Date: Wed, 29 Apr 2015 15:04:16 -0700 (PDT) Subject: Mesa (master): gk110/ir: fix set with a register dest to not auto-set the abs flag Message-ID: <20150429220416.7C99C761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 515ac907e68ae1485bd9c65d7351dfb3c3d1e33f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=515ac907e68ae1485bd9c65d7351dfb3c3d1e33f Author: Ilia Mirkin Date: Wed Apr 29 18:01:53 2015 -0400 gk110/ir: fix set with a register dest to not auto-set the abs flag This was causing src0 to always have the absolute value flag set. Signed-off-by: Ilia Mirkin Cc: mesa-stable at lists.freedesktop.org --- src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp index a73bee2..d7c6b80 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp @@ -971,7 +971,7 @@ CodeEmitterGK110::emitSET(const CmpInstruction *i) code[0] |= 0x1c; } else { switch (i->sType) { - case TYPE_F32: op2 = 0x000; op1 = 0x820; break; + case TYPE_F32: op2 = 0x000; op1 = 0x800; break; case TYPE_F64: op2 = 0x080; op1 = 0x900; break; default: op2 = 0x1a8; From imirkin at kemper.freedesktop.org Thu Apr 30 03:35:42 2015 From: imirkin at kemper.freedesktop.org (Ilia Mirkin) Date: Wed, 29 Apr 2015 20:35:42 -0700 (PDT) Subject: Mesa (master): nv50/ir: fix asFlow() const helper for OP_JOIN Message-ID: <20150430033542.48A80761EA@kemper.freedesktop.org> Module: Mesa Branch: master Commit: db269ae495425849804fb1d05cfe42b0d3d304b3 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=db269ae495425849804fb1d05cfe42b0d3d304b3 Author: Ilia Mirkin Date: Wed Apr 29 23:33:27 2015 -0400 nv50/ir: fix asFlow() const helper for OP_JOIN Signed-off-by: Ilia Mirkin Cc: mesa-stable at lists.freedesktop.org --- src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h index 255324f..e465f24 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h @@ -302,7 +302,7 @@ FlowInstruction *Instruction::asFlow() const FlowInstruction *Instruction::asFlow() const { - if (op >= OP_BRA && op <= OP_JOINAT) + if (op >= OP_BRA && op <= OP_JOIN) return static_cast(this); return NULL; } From imirkin at kemper.freedesktop.org Thu Apr 30 03:35:42 2015 From: imirkin at kemper.freedesktop.org (Ilia Mirkin) Date: Wed, 29 Apr 2015 20:35:42 -0700 (PDT) Subject: Mesa (master): nvc0/ir: fix predicated PFETCH emission Message-ID: <20150430033542.41886761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: a9d08a250ada5fbd4e3f78f8e4119ec295d692cf URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a9d08a250ada5fbd4e3f78f8e4119ec295d692cf Author: Ilia Mirkin Date: Wed Apr 29 23:05:44 2015 -0400 nvc0/ir: fix predicated PFETCH emission src1 would contain the predicate, which would get emitted as a register source by an undiscerning srcId helper. Work around this in the same way as in emitTEX. Signed-off-by: Ilia Mirkin Cc: mesa-stable at lists.freedesktop.org --- src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp | 4 +++- src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp index d7c6b80..a6e6c1f 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp @@ -1333,8 +1333,10 @@ CodeEmitterGK110::emitPFETCH(const Instruction *i) emitPredicate(i); + const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2) + defId(i->def(0), 2); - srcId(i->src(1), 10); + srcId(i, 1, 10); } void diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp index 1a4f6e0..4ad098e 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp @@ -1495,8 +1495,10 @@ CodeEmitterNVC0::emitPFETCH(const Instruction *i) emitPredicate(i); + const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2) + defId(i->def(0), 14); - srcId(i->src(1), 20); + srcId(i, 1, 20); } void From imirkin at kemper.freedesktop.org Thu Apr 30 06:03:57 2015 From: imirkin at kemper.freedesktop.org (Ilia Mirkin) Date: Wed, 29 Apr 2015 23:03:57 -0700 (PDT) Subject: Mesa (master): nvc0/ir: fix predicated PFETCH for real Message-ID: <20150430060357.B3897761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 33f0d1138d6ffa4596d3deda68fa5ba9a3d7cf86 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=33f0d1138d6ffa4596d3deda68fa5ba9a3d7cf86 Author: Ilia Mirkin Date: Thu Apr 30 02:01:22 2015 -0400 nvc0/ir: fix predicated PFETCH for real Commit a9d08a250 accidentally didn't make use of the new src1 variable. Use it. Signed-off-by: Ilia Mirkin Cc: mesa-stable at lists.freedesktop.org --- src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp | 2 +- src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp index a6e6c1f..6bb9620 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp @@ -1336,7 +1336,7 @@ CodeEmitterGK110::emitPFETCH(const Instruction *i) const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2) defId(i->def(0), 2); - srcId(i, 1, 10); + srcId(i, src1, 10); } void diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp index 4ad098e..d9aed34 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp @@ -1498,7 +1498,7 @@ CodeEmitterNVC0::emitPFETCH(const Instruction *i) const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2) defId(i->def(0), 14); - srcId(i, 1, 20); + srcId(i, src1, 20); } void From mareko at kemper.freedesktop.org Thu Apr 30 12:43:15 2015 From: mareko at kemper.freedesktop.org (Marek Olšák) Date: Thu, 30 Apr 2015 05:43:15 -0700 (PDT) Subject: Mesa (master): dri_interface: add an interface for fences Message-ID: <20150430124315.A2035761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: b02a5bf3ba04608d50916a4ca00261461280d548 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b02a5bf3ba04608d50916a4ca00261461280d548 Author: Marek Ol??k Date: Fri Apr 10 10:43:26 2015 +0200 dri_interface: add an interface for fences --- include/GL/internal/dri_interface.h | 60 +++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/include/GL/internal/dri_interface.h b/include/GL/internal/dri_interface.h index eb7da23..c827bb6 100644 --- a/include/GL/internal/dri_interface.h +++ b/include/GL/internal/dri_interface.h @@ -85,6 +85,7 @@ typedef struct __DRIdri2ExtensionRec __DRIdri2Extension; typedef struct __DRIdri2LoaderExtensionRec __DRIdri2LoaderExtension; typedef struct __DRI2flushExtensionRec __DRI2flushExtension; typedef struct __DRI2throttleExtensionRec __DRI2throttleExtension; +typedef struct __DRI2fenceExtensionRec __DRI2fenceExtension; typedef struct __DRIimageLoaderExtensionRec __DRIimageLoaderExtension; @@ -339,6 +340,65 @@ struct __DRI2throttleExtensionRec { enum __DRI2throttleReason reason); }; + +/** + * Extension for fences / synchronization objects. + */ + +#define __DRI2_FENCE "DRI2_Fence" +#define __DRI2_FENCE_VERSION 1 + +#define __DRI2_FENCE_TIMEOUT_INFINITE 0xffffffffffffffffllu + +#define __DRI2_FENCE_FLAG_FLUSH_COMMANDS (1 << 0) + +struct __DRI2fenceExtensionRec { + __DRIextension base; + + /** + * Create and insert a fence into the command stream of the context. + */ + void *(*create_fence)(__DRIcontext *ctx); + + /** + * Get a fence associated with the OpenCL event object. + * This can be NULL, meaning that OpenCL interoperability is not supported. + */ + void *(*get_fence_from_cl_event)(__DRIscreen *screen, intptr_t cl_event); + + /** + * Destroy a fence. + */ + void (*destroy_fence)(__DRIscreen *screen, void *fence); + + /** + * This function waits and doesn't return until the fence is signalled + * or the timeout expires. It returns true if the fence has been signaled. + * + * \param ctx the context where commands are flushed + * \param fence the fence + * \param flags a combination of __DRI2_FENCE_FLAG_xxx flags + * \param timeout the timeout in ns or __DRI2_FENCE_TIMEOUT_INFINITE + */ + GLboolean (*client_wait_sync)(__DRIcontext *ctx, void *fence, + unsigned flags, uint64_t timeout); + + /** + * This function enqueues a wait command into the command stream of + * the context and then returns. When the execution reaches the wait + * command, no further execution will be done in the context until + * the fence is signaled. This is a no-op if the device doesn't support + * parallel execution of contexts. + * + * \param ctx the context where the waiting is done + * \param fence the fence + * \param flags a combination of __DRI2_FENCE_FLAG_xxx flags that make + * sense with this function (right now there are none) + */ + void (*server_wait_sync)(__DRIcontext *ctx, void *fence, unsigned flags); +}; + + /*@}*/ /** From mareko at kemper.freedesktop.org Thu Apr 30 12:43:15 2015 From: mareko at kemper.freedesktop.org (Marek Olšák) Date: Thu, 30 Apr 2015 05:43:15 -0700 (PDT) Subject: Mesa (master): mesa: add GL_OES_EGL_sync Message-ID: <20150430124315.AC716761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 592ee249a139a46168cd8e3335039ce28e8a2c39 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=592ee249a139a46168cd8e3335039ce28e8a2c39 Author: Marek Ol??k Date: Thu Apr 9 23:26:14 2015 +0200 mesa: add GL_OES_EGL_sync This is an empty extension whose presence means that EGL sync objects can be used with ES contexts. --- src/mesa/main/extensions.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c index 3d4965c..9be8993 100644 --- a/src/mesa/main/extensions.c +++ b/src/mesa/main/extensions.c @@ -298,6 +298,7 @@ static const struct extension extension_table[] = { { "GL_OES_depth_texture", o(ARB_depth_texture), ES2, 2006 }, { "GL_OES_depth_texture_cube_map", o(OES_depth_texture_cube_map), ES2, 2012 }, { "GL_OES_draw_texture", o(OES_draw_texture), ES1, 2004 }, + { "GL_OES_EGL_sync", o(dummy_true), ES1 | ES2, 2010 }, /* FIXME: Mesa expects GL_OES_EGL_image to be available in OpenGL contexts. */ { "GL_OES_EGL_image", o(OES_EGL_image), GL | ES1 | ES2, 2006 }, { "GL_OES_EGL_image_external", o(OES_EGL_image_external), ES1 | ES2, 2010 }, From mareko at kemper.freedesktop.org Thu Apr 30 12:43:15 2015 From: mareko at kemper.freedesktop.org (Marek Olšák) Date: Thu, 30 Apr 2015 05:43:15 -0700 (PDT) Subject: Mesa (master): egl/dri2: implement EGL_KHR_fence_sync Message-ID: <20150430124315.BA2EA761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 9a0bda2430f10fbf43c64573412d97b6cc38e5d7 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=9a0bda2430f10fbf43c64573412d97b6cc38e5d7 Author: Marek Ol??k Date: Fri Apr 10 10:56:02 2015 +0200 egl/dri2: implement EGL_KHR_fence_sync --- src/egl/drivers/dri2/egl_dri2.c | 95 +++++++++++++++++++++++++++++++++++++++ src/egl/drivers/dri2/egl_dri2.h | 9 ++++ src/egl/main/eglapi.c | 34 +++++++++++--- 3 files changed, 133 insertions(+), 5 deletions(-) diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index 14b9be9..97175ae 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -51,6 +51,7 @@ #endif #include "egl_dri2.h" +#include "../util/u_atomic.h" const __DRIuseInvalidateExtension use_invalidate = { .base = { __DRI_USE_INVALIDATE, 1 } @@ -527,6 +528,10 @@ dri2_setup_screen(_EGLDisplay *disp) disp->Extensions.EXT_create_context_robustness = EGL_TRUE; } + if (dri2_dpy->fence) { + disp->Extensions.KHR_fence_sync = EGL_TRUE; + } + if (dri2_dpy->image) { if (dri2_dpy->image->base.version >= 10 && dri2_dpy->image->getCapabilities != NULL) { @@ -620,6 +625,9 @@ dri2_create_screen(_EGLDisplay *disp) if (strcmp(extensions[i]->name, __DRI2_CONFIG_QUERY) == 0) { dri2_dpy->config = (__DRI2configQueryExtension *) extensions[i]; } + if (strcmp(extensions[i]->name, __DRI2_FENCE) == 0) { + dri2_dpy->fence = (__DRI2fenceExtension *) extensions[i]; + } } } else { assert(dri2_dpy->swrast); @@ -2181,6 +2189,90 @@ dri2_query_wayland_buffer_wl(_EGLDriver *drv, _EGLDisplay *disp, #endif static void +dri2_egl_ref_sync(struct dri2_egl_sync *sync) +{ + p_atomic_inc(&sync->refcount); +} + +static void +dri2_egl_unref_sync(struct dri2_egl_display *dri2_dpy, + struct dri2_egl_sync *dri2_sync) +{ + if (p_atomic_dec_zero(&dri2_sync->refcount)) { + dri2_dpy->fence->destroy_fence(dri2_dpy->dri_screen, dri2_sync->fence); + free(dri2_sync); + } +} + +static _EGLSync * +dri2_create_sync(_EGLDriver *drv, _EGLDisplay *dpy, + EGLenum type, const EGLint *attrib_list) +{ + _EGLContext *ctx = _eglGetCurrentContext(); + struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy); + struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx); + struct dri2_egl_sync *dri2_sync; + + dri2_sync = calloc(1, sizeof(struct dri2_egl_sync)); + if (!dri2_sync) { + _eglError(EGL_BAD_ALLOC, "eglCreateSyncKHR"); + return NULL; + } + + if (!_eglInitSync(&dri2_sync->base, dpy, type, attrib_list)) { + free(dri2_sync); + return NULL; + } + + switch (type) { + case EGL_SYNC_FENCE_KHR: + dri2_sync->fence = dri2_dpy->fence->create_fence(dri2_ctx->dri_context); + break; + } + + p_atomic_set(&dri2_sync->refcount, 1); + return &dri2_sync->base; +} + +static EGLBoolean +dri2_destroy_sync(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync) +{ + struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy); + struct dri2_egl_sync *dri2_sync = dri2_egl_sync(sync); + + dri2_egl_unref_sync(dri2_dpy, dri2_sync); + return EGL_TRUE; +} + +static EGLint +dri2_client_wait_sync(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync, + EGLint flags, EGLTimeKHR timeout) +{ + _EGLContext *ctx = _eglGetCurrentContext(); + struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy); + struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx); + struct dri2_egl_sync *dri2_sync = dri2_egl_sync(sync); + unsigned wait_flags = 0; + EGLint ret = EGL_CONDITION_SATISFIED_KHR; + + if (flags & EGL_SYNC_FLUSH_COMMANDS_BIT_KHR) + wait_flags |= __DRI2_FENCE_FLAG_FLUSH_COMMANDS; + + /* the sync object should take a reference while waiting */ + dri2_egl_ref_sync(dri2_sync); + + if (dri2_dpy->fence->client_wait_sync(dri2_ctx->dri_context, + dri2_sync->fence, wait_flags, + timeout)) + dri2_sync->base.SyncStatus = EGL_SIGNALED_KHR; + else + ret = EGL_TIMEOUT_EXPIRED_KHR; + + dri2_egl_unref_sync(dri2_dpy, dri2_sync); + return ret; +} + +static void dri2_unload(_EGLDriver *drv) { struct dri2_egl_driver *dri2_drv = dri2_egl_driver(drv); @@ -2292,6 +2384,9 @@ _eglBuiltInDriverDRI2(const char *args) dri2_drv->base.API.QueryWaylandBufferWL = dri2_query_wayland_buffer_wl; #endif dri2_drv->base.API.GetSyncValuesCHROMIUM = dri2_get_sync_values_chromium; + dri2_drv->base.API.CreateSyncKHR = dri2_create_sync; + dri2_drv->base.API.ClientWaitSyncKHR = dri2_client_wait_sync; + dri2_drv->base.API.DestroySyncKHR = dri2_destroy_sync; dri2_drv->base.Name = "DRI2"; dri2_drv->base.Unload = dri2_unload; diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h index 167b3b1..371fb4a 100644 --- a/src/egl/drivers/dri2/egl_dri2.h +++ b/src/egl/drivers/dri2/egl_dri2.h @@ -74,6 +74,7 @@ #include "egllog.h" #include "eglsurface.h" #include "eglimage.h" +#include "eglsync.h" #define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0])) @@ -164,6 +165,7 @@ struct dri2_egl_display const __DRIimageExtension *image; const __DRIrobustnessExtension *robustness; const __DRI2configQueryExtension *config; + const __DRI2fenceExtension *fence; int fd; int own_device; @@ -283,6 +285,12 @@ struct dri2_egl_image __DRIimage *dri_image; }; +struct dri2_egl_sync { + _EGLSync base; + int refcount; + void *fence; +}; + /* From xmlpool/options.h, user exposed so should be stable */ #define DRI_CONF_VBLANK_NEVER 0 #define DRI_CONF_VBLANK_DEF_INTERVAL_0 1 @@ -292,6 +300,7 @@ struct dri2_egl_image /* standard typecasts */ _EGL_DRIVER_STANDARD_TYPECASTS(dri2_egl) _EGL_DRIVER_TYPECAST(dri2_egl_image, _EGLImage, obj) +_EGL_DRIVER_TYPECAST(dri2_egl_sync, _EGLSync, obj) extern const __DRIimageLookupExtension image_lookup_extension; extern const __DRIuseInvalidateExtension use_invalidate; diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c index 8b7b9be..dd972b2 100644 --- a/src/egl/main/eglapi.c +++ b/src/egl/main/eglapi.c @@ -1657,13 +1657,30 @@ EGLSyncKHR EGLAPIENTRY eglCreateSyncKHR(EGLDisplay dpy, EGLenum type, const EGLint *attrib_list) { _EGLDisplay *disp = _eglLockDisplay(dpy); + _EGLContext *ctx = _eglGetCurrentContext(); _EGLDriver *drv; _EGLSync *sync; EGLSyncKHR ret; _EGL_CHECK_DISPLAY(disp, EGL_NO_SYNC_KHR, drv); - if (!disp->Extensions.KHR_reusable_sync) - RETURN_EGL_EVAL(disp, EGL_NO_SYNC_KHR); + + /* return an error if the client API doesn't support GL_OES_EGL_sync */ + if (!ctx || ctx->Resource.Display != dpy || + ctx->ClientAPI != EGL_OPENGL_ES_API) + RETURN_EGL_ERROR(disp, EGL_BAD_MATCH, EGL_NO_SYNC_KHR); + + switch (type) { + case EGL_SYNC_FENCE_KHR: + if (!disp->Extensions.KHR_fence_sync) + RETURN_EGL_ERROR(disp, EGL_BAD_ATTRIBUTE, EGL_NO_SYNC_KHR); + break; + case EGL_SYNC_REUSABLE_KHR: + if (!disp->Extensions.KHR_reusable_sync) + RETURN_EGL_ERROR(disp, EGL_BAD_ATTRIBUTE, EGL_NO_SYNC_KHR); + break; + default: + RETURN_EGL_ERROR(disp, EGL_BAD_ATTRIBUTE, EGL_NO_SYNC_KHR); + } sync = drv->API.CreateSyncKHR(drv, disp, type, attrib_list); ret = (sync) ? _eglLinkSync(sync) : EGL_NO_SYNC_KHR; @@ -1681,7 +1698,8 @@ eglDestroySyncKHR(EGLDisplay dpy, EGLSyncKHR sync) EGLBoolean ret; _EGL_CHECK_SYNC(disp, s, EGL_FALSE, drv); - assert(disp->Extensions.KHR_reusable_sync); + assert(disp->Extensions.KHR_reusable_sync || + disp->Extensions.KHR_fence_sync); _eglUnlinkSync(s); ret = drv->API.DestroySyncKHR(drv, disp, s); @@ -1699,7 +1717,12 @@ eglClientWaitSyncKHR(EGLDisplay dpy, EGLSyncKHR sync, EGLint flags, EGLTimeKHR t EGLint ret; _EGL_CHECK_SYNC(disp, s, EGL_FALSE, drv); - assert(disp->Extensions.KHR_reusable_sync); + assert(disp->Extensions.KHR_reusable_sync || + disp->Extensions.KHR_fence_sync); + + if (s->SyncStatus == EGL_SIGNALED_KHR) + RETURN_EGL_EVAL(disp, EGL_CONDITION_SATISFIED_KHR); + ret = drv->API.ClientWaitSyncKHR(drv, disp, s, flags, timeout); RETURN_EGL_EVAL(disp, ret); @@ -1731,7 +1754,8 @@ eglGetSyncAttribKHR(EGLDisplay dpy, EGLSyncKHR sync, EGLint attribute, EGLint *v EGLBoolean ret; _EGL_CHECK_SYNC(disp, s, EGL_FALSE, drv); - assert(disp->Extensions.KHR_reusable_sync); + assert(disp->Extensions.KHR_reusable_sync || + disp->Extensions.KHR_fence_sync); ret = drv->API.GetSyncAttribKHR(drv, disp, s, attribute, value); RETURN_EGL_EVAL(disp, ret); From mareko at kemper.freedesktop.org Thu Apr 30 12:43:15 2015 From: mareko at kemper.freedesktop.org (Marek Olšák) Date: Thu, 30 Apr 2015 05:43:15 -0700 (PDT) Subject: Mesa (master): egl/dri2: implement EGL_KHR_wait_sync Message-ID: <20150430124315.C6D72761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: a8617cc0428dcc8340cb85776e122c6534a0fa11 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a8617cc0428dcc8340cb85776e122c6534a0fa11 Author: Marek Ol??k Date: Fri Apr 10 12:04:18 2015 +0200 egl/dri2: implement EGL_KHR_wait_sync --- src/egl/drivers/dri2/egl_dri2.c | 15 +++++++++++++++ src/egl/main/eglapi.c | 28 ++++++++++++++++++++++++++++ src/egl/main/eglapi.h | 2 ++ src/egl/main/egldisplay.h | 1 + src/egl/main/eglfallbacks.c | 1 + 5 files changed, 47 insertions(+) diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index 97175ae..e096a7d 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -530,6 +530,7 @@ dri2_setup_screen(_EGLDisplay *disp) if (dri2_dpy->fence) { disp->Extensions.KHR_fence_sync = EGL_TRUE; + disp->Extensions.KHR_wait_sync = EGL_TRUE; } if (dri2_dpy->image) { @@ -2272,6 +2273,19 @@ dri2_client_wait_sync(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync, return ret; } +static EGLint +dri2_server_wait_sync(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync) +{ + _EGLContext *ctx = _eglGetCurrentContext(); + struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy); + struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx); + struct dri2_egl_sync *dri2_sync = dri2_egl_sync(sync); + + dri2_dpy->fence->server_wait_sync(dri2_ctx->dri_context, + dri2_sync->fence, 0); + return EGL_TRUE; +} + static void dri2_unload(_EGLDriver *drv) { @@ -2386,6 +2400,7 @@ _eglBuiltInDriverDRI2(const char *args) dri2_drv->base.API.GetSyncValuesCHROMIUM = dri2_get_sync_values_chromium; dri2_drv->base.API.CreateSyncKHR = dri2_create_sync; dri2_drv->base.API.ClientWaitSyncKHR = dri2_client_wait_sync; + dri2_drv->base.API.WaitSyncKHR = dri2_server_wait_sync; dri2_drv->base.API.DestroySyncKHR = dri2_destroy_sync; dri2_drv->base.Name = "DRI2"; diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c index dd972b2..65a730a 100644 --- a/src/egl/main/eglapi.c +++ b/src/egl/main/eglapi.c @@ -406,6 +406,7 @@ _eglCreateExtensionsString(_EGLDisplay *dpy) _EGL_CHECK_EXTENSION(KHR_reusable_sync); _EGL_CHECK_EXTENSION(KHR_fence_sync); + _EGL_CHECK_EXTENSION(KHR_wait_sync); _EGL_CHECK_EXTENSION(KHR_surfaceless_context); _EGL_CHECK_EXTENSION(KHR_create_context); @@ -1216,6 +1217,7 @@ eglGetProcAddress(const char *procname) { "eglCreateSyncKHR", (_EGLProc) eglCreateSyncKHR }, { "eglDestroySyncKHR", (_EGLProc) eglDestroySyncKHR }, { "eglClientWaitSyncKHR", (_EGLProc) eglClientWaitSyncKHR }, + { "eglWaitSyncKHR", (_EGLProc) eglWaitSyncKHR }, { "eglSignalSyncKHR", (_EGLProc) eglSignalSyncKHR }, { "eglGetSyncAttribKHR", (_EGLProc) eglGetSyncAttribKHR }, #ifdef EGL_NOK_swap_region @@ -1729,6 +1731,32 @@ eglClientWaitSyncKHR(EGLDisplay dpy, EGLSyncKHR sync, EGLint flags, EGLTimeKHR t } +EGLint EGLAPIENTRY +eglWaitSyncKHR(EGLDisplay dpy, EGLSyncKHR sync, EGLint flags) +{ + _EGLDisplay *disp = _eglLockDisplay(dpy); + _EGLSync *s = _eglLookupSync(sync, disp); + _EGLContext *ctx = _eglGetCurrentContext(); + _EGLDriver *drv; + EGLint ret; + + _EGL_CHECK_SYNC(disp, s, EGL_FALSE, drv); + assert(disp->Extensions.KHR_wait_sync); + + /* return an error if the client API doesn't support GL_OES_EGL_sync */ + if (ctx == EGL_NO_CONTEXT || ctx->ClientAPI != EGL_OPENGL_ES_API) + RETURN_EGL_ERROR(disp, EGL_BAD_MATCH, EGL_FALSE); + + /* the API doesn't allow any flags yet */ + if (flags != 0) + RETURN_EGL_ERROR(disp, EGL_BAD_PARAMETER, EGL_FALSE); + + ret = drv->API.WaitSyncKHR(drv, disp, s); + + RETURN_EGL_EVAL(disp, ret); +} + + EGLBoolean EGLAPIENTRY eglSignalSyncKHR(EGLDisplay dpy, EGLSyncKHR sync, EGLenum mode) { diff --git a/src/egl/main/eglapi.h b/src/egl/main/eglapi.h index 3245327..7462b35 100644 --- a/src/egl/main/eglapi.h +++ b/src/egl/main/eglapi.h @@ -105,6 +105,7 @@ typedef EGLBoolean (*DestroyImageKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLI typedef _EGLSync *(*CreateSyncKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, EGLenum type, const EGLint *attrib_list); typedef EGLBoolean (*DestroySyncKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync); typedef EGLint (*ClientWaitSyncKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync, EGLint flags, EGLTimeKHR timeout); +typedef EGLint (*WaitSyncKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync); typedef EGLBoolean (*SignalSyncKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync, EGLenum mode); typedef EGLBoolean (*GetSyncAttribKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync, EGLint attribute, EGLint *value); @@ -201,6 +202,7 @@ struct _egl_api CreateSyncKHR_t CreateSyncKHR; DestroySyncKHR_t DestroySyncKHR; ClientWaitSyncKHR_t ClientWaitSyncKHR; + WaitSyncKHR_t WaitSyncKHR; SignalSyncKHR_t SignalSyncKHR; GetSyncAttribKHR_t GetSyncAttribKHR; diff --git a/src/egl/main/egldisplay.h b/src/egl/main/egldisplay.h index 4a1fb4a..70381bc 100644 --- a/src/egl/main/egldisplay.h +++ b/src/egl/main/egldisplay.h @@ -106,6 +106,7 @@ struct _egl_extensions EGLBoolean KHR_reusable_sync; EGLBoolean KHR_fence_sync; + EGLBoolean KHR_wait_sync; EGLBoolean KHR_surfaceless_context; EGLBoolean KHR_create_context; diff --git a/src/egl/main/eglfallbacks.c b/src/egl/main/eglfallbacks.c index c108ca7..83d7756 100644 --- a/src/egl/main/eglfallbacks.c +++ b/src/egl/main/eglfallbacks.c @@ -107,6 +107,7 @@ _eglInitDriverFallbacks(_EGLDriver *drv) drv->API.CreateSyncKHR = NULL; drv->API.DestroySyncKHR = NULL; drv->API.ClientWaitSyncKHR = NULL; + drv->API.WaitSyncKHR = NULL; drv->API.SignalSyncKHR = NULL; drv->API.GetSyncAttribKHR = _eglGetSyncAttribKHR; From mareko at kemper.freedesktop.org Thu Apr 30 12:43:15 2015 From: mareko at kemper.freedesktop.org (Marek Olšák) Date: Thu, 30 Apr 2015 05:43:15 -0700 (PDT) Subject: Mesa (master): egl/dri2: implement EGL_KHR_cl_event2 (v2) Message-ID: <20150430124315.D3E3E761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 290a3eb7507f7f2949753a77c425ed2bb6fd0dd1 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=290a3eb7507f7f2949753a77c425ed2bb6fd0dd1 Author: Marek Ol??k Date: Fri Apr 10 13:16:30 2015 +0200 egl/dri2: implement EGL_KHR_cl_event2 (v2) v2: fix the SYNC_CONDITION query --- src/egl/drivers/dri2/egl_dri2.c | 25 ++++++++++++++-- src/egl/main/eglapi.c | 30 +++++++++++++++++-- src/egl/main/eglapi.h | 2 +- src/egl/main/egldisplay.h | 1 + src/egl/main/eglsync.c | 62 +++++++++++++++++++++++++++++++++++---- src/egl/main/eglsync.h | 3 +- 6 files changed, 111 insertions(+), 12 deletions(-) diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index e096a7d..34d6bfe 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -531,6 +531,8 @@ dri2_setup_screen(_EGLDisplay *disp) if (dri2_dpy->fence) { disp->Extensions.KHR_fence_sync = EGL_TRUE; disp->Extensions.KHR_wait_sync = EGL_TRUE; + if (dri2_dpy->fence->get_fence_from_cl_event) + disp->Extensions.KHR_cl_event2 = EGL_TRUE; } if (dri2_dpy->image) { @@ -2207,7 +2209,8 @@ dri2_egl_unref_sync(struct dri2_egl_display *dri2_dpy, static _EGLSync * dri2_create_sync(_EGLDriver *drv, _EGLDisplay *dpy, - EGLenum type, const EGLint *attrib_list) + EGLenum type, const EGLint *attrib_list, + const EGLAttribKHR *attrib_list64) { _EGLContext *ctx = _eglGetCurrentContext(); struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy); @@ -2220,7 +2223,8 @@ dri2_create_sync(_EGLDriver *drv, _EGLDisplay *dpy, return NULL; } - if (!_eglInitSync(&dri2_sync->base, dpy, type, attrib_list)) { + if (!_eglInitSync(&dri2_sync->base, dpy, type, attrib_list, + attrib_list64)) { free(dri2_sync); return NULL; } @@ -2229,6 +2233,23 @@ dri2_create_sync(_EGLDriver *drv, _EGLDisplay *dpy, case EGL_SYNC_FENCE_KHR: dri2_sync->fence = dri2_dpy->fence->create_fence(dri2_ctx->dri_context); break; + + case EGL_SYNC_CL_EVENT_KHR: + dri2_sync->fence = dri2_dpy->fence->get_fence_from_cl_event( + dri2_dpy->dri_screen, + dri2_sync->base.CLEvent); + /* this can only happen if the cl_event passed in is invalid. */ + if (!dri2_sync->fence) { + _eglError(EGL_BAD_ATTRIBUTE, "eglCreateSyncKHR"); + free(dri2_sync); + return NULL; + } + + /* the initial status must be "signaled" if the cl_event is signaled */ + if (dri2_dpy->fence->client_wait_sync(dri2_ctx->dri_context, + dri2_sync->fence, 0, 0)) + dri2_sync->base.SyncStatus = EGL_SIGNALED_KHR; + break; } p_atomic_set(&dri2_sync->refcount, 1); diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c index 65a730a..ec41aa3 100644 --- a/src/egl/main/eglapi.c +++ b/src/egl/main/eglapi.c @@ -407,6 +407,7 @@ _eglCreateExtensionsString(_EGLDisplay *dpy) _EGL_CHECK_EXTENSION(KHR_reusable_sync); _EGL_CHECK_EXTENSION(KHR_fence_sync); _EGL_CHECK_EXTENSION(KHR_wait_sync); + _EGL_CHECK_EXTENSION(KHR_cl_event2); _EGL_CHECK_EXTENSION(KHR_surfaceless_context); _EGL_CHECK_EXTENSION(KHR_create_context); @@ -1215,6 +1216,7 @@ eglGetProcAddress(const char *procname) { "eglCreateImageKHR", (_EGLProc) eglCreateImageKHR }, { "eglDestroyImageKHR", (_EGLProc) eglDestroyImageKHR }, { "eglCreateSyncKHR", (_EGLProc) eglCreateSyncKHR }, + { "eglCreateSync64KHR", (_EGLProc) eglCreateSync64KHR }, { "eglDestroySyncKHR", (_EGLProc) eglDestroySyncKHR }, { "eglClientWaitSyncKHR", (_EGLProc) eglClientWaitSyncKHR }, { "eglWaitSyncKHR", (_EGLProc) eglWaitSyncKHR }, @@ -1655,8 +1657,9 @@ eglDestroyImageKHR(EGLDisplay dpy, EGLImageKHR image) } -EGLSyncKHR EGLAPIENTRY -eglCreateSyncKHR(EGLDisplay dpy, EGLenum type, const EGLint *attrib_list) +static EGLSyncKHR +_eglCreateSync(EGLDisplay dpy, EGLenum type, const EGLint *attrib_list, + const EGLAttribKHR *attrib_list64, EGLBoolean is64) { _EGLDisplay *disp = _eglLockDisplay(dpy); _EGLContext *ctx = _eglGetCurrentContext(); @@ -1666,6 +1669,9 @@ eglCreateSyncKHR(EGLDisplay dpy, EGLenum type, const EGLint *attrib_list) _EGL_CHECK_DISPLAY(disp, EGL_NO_SYNC_KHR, drv); + if (!disp->Extensions.KHR_cl_event2 && is64) + RETURN_EGL_EVAL(disp, EGL_NO_SYNC_KHR); + /* return an error if the client API doesn't support GL_OES_EGL_sync */ if (!ctx || ctx->Resource.Display != dpy || ctx->ClientAPI != EGL_OPENGL_ES_API) @@ -1680,17 +1686,35 @@ eglCreateSyncKHR(EGLDisplay dpy, EGLenum type, const EGLint *attrib_list) if (!disp->Extensions.KHR_reusable_sync) RETURN_EGL_ERROR(disp, EGL_BAD_ATTRIBUTE, EGL_NO_SYNC_KHR); break; + case EGL_SYNC_CL_EVENT_KHR: + if (!disp->Extensions.KHR_cl_event2) + RETURN_EGL_ERROR(disp, EGL_BAD_ATTRIBUTE, EGL_NO_SYNC_KHR); + break; default: RETURN_EGL_ERROR(disp, EGL_BAD_ATTRIBUTE, EGL_NO_SYNC_KHR); } - sync = drv->API.CreateSyncKHR(drv, disp, type, attrib_list); + sync = drv->API.CreateSyncKHR(drv, disp, type, attrib_list, attrib_list64); ret = (sync) ? _eglLinkSync(sync) : EGL_NO_SYNC_KHR; RETURN_EGL_EVAL(disp, ret); } +EGLSyncKHR EGLAPIENTRY +eglCreateSyncKHR(EGLDisplay dpy, EGLenum type, const EGLint *attrib_list) +{ + return _eglCreateSync(dpy, type, attrib_list, NULL, EGL_FALSE); +} + + +EGLSyncKHR EGLAPIENTRY +eglCreateSync64KHR(EGLDisplay dpy, EGLenum type, const EGLAttribKHR *attrib_list) +{ + return _eglCreateSync(dpy, type, NULL, attrib_list, EGL_TRUE); +} + + EGLBoolean EGLAPIENTRY eglDestroySyncKHR(EGLDisplay dpy, EGLSyncKHR sync) { diff --git a/src/egl/main/eglapi.h b/src/egl/main/eglapi.h index 7462b35..5226886 100644 --- a/src/egl/main/eglapi.h +++ b/src/egl/main/eglapi.h @@ -102,7 +102,7 @@ typedef _EGLImage *(*CreateImageKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLCo typedef EGLBoolean (*DestroyImageKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLImage *image); -typedef _EGLSync *(*CreateSyncKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, EGLenum type, const EGLint *attrib_list); +typedef _EGLSync *(*CreateSyncKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, EGLenum type, const EGLint *attrib_list, const EGLAttribKHR *attrib_list64); typedef EGLBoolean (*DestroySyncKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync); typedef EGLint (*ClientWaitSyncKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync, EGLint flags, EGLTimeKHR timeout); typedef EGLint (*WaitSyncKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync); diff --git a/src/egl/main/egldisplay.h b/src/egl/main/egldisplay.h index 70381bc..b6b9ed8 100644 --- a/src/egl/main/egldisplay.h +++ b/src/egl/main/egldisplay.h @@ -107,6 +107,7 @@ struct _egl_extensions EGLBoolean KHR_reusable_sync; EGLBoolean KHR_fence_sync; EGLBoolean KHR_wait_sync; + EGLBoolean KHR_cl_event2; EGLBoolean KHR_surfaceless_context; EGLBoolean KHR_create_context; diff --git a/src/egl/main/eglsync.c b/src/egl/main/eglsync.c index 9d0067c..3b4a889 100644 --- a/src/egl/main/eglsync.c +++ b/src/egl/main/eglsync.c @@ -65,25 +65,76 @@ _eglParseSyncAttribList(_EGLSync *sync, const EGLint *attrib_list) } +static EGLint +_eglParseSyncAttribList64(_EGLSync *sync, const EGLAttribKHR *attrib_list) +{ + EGLint i, err = EGL_SUCCESS; + + if (!attrib_list) + return EGL_SUCCESS; + + for (i = 0; attrib_list[i] != EGL_NONE; i++) { + EGLint attr = attrib_list[i++]; + EGLint val = attrib_list[i]; + + switch (attr) { + case EGL_CL_EVENT_HANDLE_KHR: + if (sync->Type == EGL_SYNC_CL_EVENT_KHR) { + sync->CLEvent = val; + break; + } + /* fall through */ + default: + (void) val; + err = EGL_BAD_ATTRIBUTE; + break; + } + + if (err != EGL_SUCCESS) { + _eglLog(_EGL_DEBUG, "bad sync attribute 0x%04x", attr); + break; + } + } + + return err; +} + + EGLBoolean _eglInitSync(_EGLSync *sync, _EGLDisplay *dpy, EGLenum type, - const EGLint *attrib_list) + const EGLint *attrib_list, const EGLAttribKHR *attrib_list64) { EGLint err; if (!(type == EGL_SYNC_REUSABLE_KHR && dpy->Extensions.KHR_reusable_sync) && - !(type == EGL_SYNC_FENCE_KHR && dpy->Extensions.KHR_fence_sync)) + !(type == EGL_SYNC_FENCE_KHR && dpy->Extensions.KHR_fence_sync) && + !(type == EGL_SYNC_CL_EVENT_KHR && dpy->Extensions.KHR_cl_event2 && + attrib_list64)) return _eglError(EGL_BAD_ATTRIBUTE, "eglCreateSyncKHR"); _eglInitResource(&sync->Resource, sizeof(*sync), dpy); sync->Type = type; sync->SyncStatus = EGL_UNSIGNALED_KHR; - sync->SyncCondition = EGL_SYNC_PRIOR_COMMANDS_COMPLETE_KHR; - err = _eglParseSyncAttribList(sync, attrib_list); + switch (type) { + case EGL_SYNC_CL_EVENT_KHR: + sync->SyncCondition = EGL_SYNC_CL_EVENT_COMPLETE_KHR; + break; + default: + sync->SyncCondition = EGL_SYNC_PRIOR_COMMANDS_COMPLETE_KHR; + } + + if (attrib_list64) + err = _eglParseSyncAttribList64(sync, attrib_list64); + else + err = _eglParseSyncAttribList(sync, attrib_list); + if (err != EGL_SUCCESS) return _eglError(err, "eglCreateSyncKHR"); + if (type == EGL_SYNC_CL_EVENT_KHR && !sync->CLEvent) + return _eglError(EGL_BAD_ATTRIBUTE, "eglCreateSyncKHR"); + return EGL_TRUE; } @@ -103,7 +154,8 @@ _eglGetSyncAttribKHR(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync, *value = sync->SyncStatus; break; case EGL_SYNC_CONDITION_KHR: - if (sync->Type != EGL_SYNC_FENCE_KHR) + if (sync->Type != EGL_SYNC_FENCE_KHR && + sync->Type != EGL_SYNC_CL_EVENT_KHR) return _eglError(EGL_BAD_ATTRIBUTE, "eglGetSyncAttribKHR"); *value = sync->SyncCondition; break; diff --git a/src/egl/main/eglsync.h b/src/egl/main/eglsync.h index c6cf8c6..1d2eb11 100644 --- a/src/egl/main/eglsync.h +++ b/src/egl/main/eglsync.h @@ -47,12 +47,13 @@ struct _egl_sync EGLenum Type; EGLenum SyncStatus; EGLenum SyncCondition; + EGLAttribKHR CLEvent; }; extern EGLBoolean _eglInitSync(_EGLSync *sync, _EGLDisplay *dpy, EGLenum type, - const EGLint *attrib_list); + const EGLint *attrib_list, const EGLAttribKHR *attrib_list64); extern EGLBoolean From mareko at kemper.freedesktop.org Thu Apr 30 12:43:15 2015 From: mareko at kemper.freedesktop.org (Marek Olšák) Date: Thu, 30 Apr 2015 05:43:15 -0700 (PDT) Subject: Mesa (master): egl/dri2: return the latest sync status in eglGetSyncAttribKHR Message-ID: <20150430124315.DD583761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: a2557b30d843d4f3dada6a022f27e8343adabca8 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a2557b30d843d4f3dada6a022f27e8343adabca8 Author: Marek Ol??k Date: Fri Apr 10 23:35:39 2015 +0200 egl/dri2: return the latest sync status in eglGetSyncAttribKHR --- src/egl/main/eglsync.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/egl/main/eglsync.c b/src/egl/main/eglsync.c index 3b4a889..8b8ab16 100644 --- a/src/egl/main/eglsync.c +++ b/src/egl/main/eglsync.c @@ -30,6 +30,7 @@ #include "eglsync.h" #include "eglcurrent.h" +#include "egldriver.h" #include "egllog.h" @@ -144,13 +145,19 @@ _eglGetSyncAttribKHR(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync, EGLint attribute, EGLint *value) { if (!value) - return _eglError(EGL_BAD_PARAMETER, "eglGetConfigs"); + return _eglError(EGL_BAD_PARAMETER, "eglGetSyncAttribKHR"); switch (attribute) { case EGL_SYNC_TYPE_KHR: *value = sync->Type; break; case EGL_SYNC_STATUS_KHR: + /* update the sync status */ + if (sync->SyncStatus != EGL_SIGNALED_KHR && + (sync->Type == EGL_SYNC_FENCE_KHR || + sync->Type == EGL_SYNC_CL_EVENT_KHR)) + drv->API.ClientWaitSyncKHR(drv, dpy, sync, 0, 0); + *value = sync->SyncStatus; break; case EGL_SYNC_CONDITION_KHR: From mareko at kemper.freedesktop.org Thu Apr 30 12:43:15 2015 From: mareko at kemper.freedesktop.org (Marek Olšák) Date: Thu, 30 Apr 2015 05:43:15 -0700 (PDT) Subject: Mesa (master): gallium,clover: add OpenCL interoperability support for CL events Message-ID: <20150430124315.F2CA0761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 952b5e84db47be3adaf01f047efeedd26cf0d173 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=952b5e84db47be3adaf01f047efeedd26cf0d173 Author: Marek Ol??k Date: Fri Apr 10 18:42:42 2015 +0200 gallium,clover: add OpenCL interoperability support for CL events v2: - move interop.cpp to clover/api - change intptr_t to void* in the interface - add a virtual function fence() to simplify some code v3: - use bool in the interface v4: - enclose the last two interop functions in try..catch Reviewed-by: Francisco Jerez --- src/gallium/include/state_tracker/opencl_interop.h | 40 ++++++++++++ src/gallium/state_trackers/clover/Makefile.sources | 1 + src/gallium/state_trackers/clover/api/interop.cpp | 64 ++++++++++++++++++++ src/gallium/state_trackers/clover/core/event.hpp | 8 +++ src/gallium/targets/opencl/opencl.sym | 1 + 5 files changed, 114 insertions(+) diff --git a/src/gallium/include/state_tracker/opencl_interop.h b/src/gallium/include/state_tracker/opencl_interop.h new file mode 100644 index 0000000..4983644 --- /dev/null +++ b/src/gallium/include/state_tracker/opencl_interop.h @@ -0,0 +1,40 @@ +/************************************************************************** + * + * Copyright 2015 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef OPENCL_INTEROP_H +#define OPENCL_INTEROP_H + +/* dlsym these without the "_t" suffix. You should get the correct symbols + * if the OpenCL driver is loaded. + */ + +typedef bool (*opencl_dri_event_add_ref_t)(void *cl_event); +typedef bool (*opencl_dri_event_release_t)(void *cl_event); +typedef bool (*opencl_dri_event_wait_t)(void *cl_event, uint64_t timeout); +typedef struct pipe_fence_handle *(*opencl_dri_event_get_fence_t)(void *cl_event); + +#endif /* OPENCL_INTEROP_H */ diff --git a/src/gallium/state_trackers/clover/Makefile.sources b/src/gallium/state_trackers/clover/Makefile.sources index fa96774..10bbda0 100644 --- a/src/gallium/state_trackers/clover/Makefile.sources +++ b/src/gallium/state_trackers/clover/Makefile.sources @@ -4,6 +4,7 @@ CPP_SOURCES := \ api/dispatch.cpp \ api/dispatch.hpp \ api/event.cpp \ + api/interop.cpp \ api/kernel.cpp \ api/memory.cpp \ api/platform.cpp \ diff --git a/src/gallium/state_trackers/clover/api/interop.cpp b/src/gallium/state_trackers/clover/api/interop.cpp new file mode 100644 index 0000000..ea0c7c7 --- /dev/null +++ b/src/gallium/state_trackers/clover/api/interop.cpp @@ -0,0 +1,64 @@ +// +// Copyright 2015 Advanced Micro Devices, Inc. +// All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// + +#include "core/event.hpp" +#include "api/util.hpp" + +using namespace clover; + +extern "C" { + +PUBLIC bool +opencl_dri_event_add_ref(cl_event event) +{ + return clRetainEvent(event) == CL_SUCCESS; +} + +PUBLIC bool +opencl_dri_event_release(cl_event event) +{ + return clReleaseEvent(event) == CL_SUCCESS; +} + +PUBLIC bool +opencl_dri_event_wait(cl_event event, uint64_t timeout) try { + if (!timeout) { + return obj(event).status() == CL_COMPLETE; + } + + obj(event).wait(); + return true; + +} catch (error &) { + return false; +} + +PUBLIC struct pipe_fence_handle * +opencl_dri_event_get_fence(cl_event event) try { + return obj(event).fence(); + +} catch (error &) { + return NULL; +} + +} diff --git a/src/gallium/state_trackers/clover/core/event.hpp b/src/gallium/state_trackers/clover/core/event.hpp index 0e1359a..d407c80 100644 --- a/src/gallium/state_trackers/clover/core/event.hpp +++ b/src/gallium/state_trackers/clover/core/event.hpp @@ -70,6 +70,10 @@ namespace clover { virtual cl_command_type command() const = 0; virtual void wait() const = 0; + virtual struct pipe_fence_handle *fence() const { + return NULL; + } + const intrusive_ref context; protected: @@ -116,6 +120,10 @@ namespace clover { friend class command_queue; + virtual struct pipe_fence_handle *fence() const { + return _fence; + } + private: virtual void fence(pipe_fence_handle *fence); action profile(command_queue &q, const action &action) const; diff --git a/src/gallium/targets/opencl/opencl.sym b/src/gallium/targets/opencl/opencl.sym index ee8aacf..9fcc576 100644 --- a/src/gallium/targets/opencl/opencl.sym +++ b/src/gallium/targets/opencl/opencl.sym @@ -1,6 +1,7 @@ { global: cl*; + opencl_dri_*; local: *; }; From mareko at kemper.freedesktop.org Thu Apr 30 12:43:16 2015 From: mareko at kemper.freedesktop.org (Marek Olšák) Date: Thu, 30 Apr 2015 05:43:16 -0700 (PDT) Subject: Mesa (master): docs/relnotes: document the new EGL sync extensions Message-ID: <20150430124316.12DAA761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 1db5d3c19ee8d91686dfe3c1927a3e2dfa83830c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1db5d3c19ee8d91686dfe3c1927a3e2dfa83830c Author: Marek Ol??k Date: Wed Apr 29 20:33:21 2015 +0200 docs/relnotes: document the new EGL sync extensions --- docs/relnotes/10.6.0.html | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/relnotes/10.6.0.html b/docs/relnotes/10.6.0.html index ff24842..fbc4d5b 100644 --- a/docs/relnotes/10.6.0.html +++ b/docs/relnotes/10.6.0.html @@ -58,6 +58,10 @@ Note: some of the new features are only available with certain drivers.
    • GL_ARB_texture_stencil8 on nv50, nvc0, r600, radeonsi, softpipe
    • GL_ARB_uniform_buffer_object on freedreno
    • GL_EXT_draw_buffers2 on freedreno
    • +
    • GL_OES_EGL_sync on all drivers
    • +
    • EGL_KHR_fence_sync on freedreno, nv50, nvc0, r600, radeonsi
    • +
    • EGL_KHR_wait_sync on freedreno, nv50, nvc0, r600, radeonsi
    • +
    • EGL_KHR_cl_event2 on freedreno, nv50, nvc0, r600, radeonsi

    Bug fixes

    From mareko at kemper.freedesktop.org Thu Apr 30 12:43:15 2015 From: mareko at kemper.freedesktop.org (Marek Olšák) Date: Thu, 30 Apr 2015 05:43:15 -0700 (PDT) Subject: Mesa (master): egl/dri: don't expose configs with an accumulation buffer Message-ID: <20150430124315.9806C761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 396cbabbefaae64deac6d33c79898bb07db8a621 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=396cbabbefaae64deac6d33c79898bb07db8a621 Author: Marek Ol??k Date: Thu Apr 9 23:25:07 2015 +0200 egl/dri: don't expose configs with an accumulation buffer --- src/egl/drivers/dri2/egl_dri2.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c index a428f28..14b9be9 100644 --- a/src/egl/drivers/dri2/egl_dri2.c +++ b/src/egl/drivers/dri2/egl_dri2.c @@ -194,6 +194,15 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id, dri_masks[3] = value; break; + case __DRI_ATTRIB_ACCUM_RED_SIZE: + case __DRI_ATTRIB_ACCUM_GREEN_SIZE: + case __DRI_ATTRIB_ACCUM_BLUE_SIZE: + case __DRI_ATTRIB_ACCUM_ALPHA_SIZE: + /* Don't expose visuals with the accumulation buffer. */ + if (value > 0) + return NULL; + break; + default: key = dri2_to_egl_attribute_map[attrib]; if (key != 0) From mareko at kemper.freedesktop.org Thu Apr 30 12:43:16 2015 From: mareko at kemper.freedesktop.org (Marek Olšák) Date: Thu, 30 Apr 2015 05:43:16 -0700 (PDT) Subject: Mesa (master): st/dri: implement the fence interface for CL events Message-ID: <20150430124316.09900761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: e70de9b0327009dd9b99ee1f388cfffd1bc93761 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e70de9b0327009dd9b99ee1f388cfffd1bc93761 Author: Marek Ol??k Date: Fri Apr 10 19:45:50 2015 +0200 st/dri: implement the fence interface for CL events --- src/gallium/state_trackers/dri/dri2.c | 72 ++++++++++++++++++++++++++- src/gallium/state_trackers/dri/dri_screen.c | 1 + src/gallium/state_trackers/dri/dri_screen.h | 9 ++++ 3 files changed, 81 insertions(+), 1 deletion(-) diff --git a/src/gallium/state_trackers/dri/dri2.c b/src/gallium/state_trackers/dri/dri2.c index 9b56313..8b6fe67 100644 --- a/src/gallium/state_trackers/dri/dri2.c +++ b/src/gallium/state_trackers/dri/dri2.c @@ -29,6 +29,7 @@ */ #include +#include #include "util/u_memory.h" #include "util/u_inlines.h" #include "util/u_format.h" @@ -1252,8 +1253,48 @@ static __DRIimageExtension dri2ImageExtension = { }; +static bool +dri2_is_opencl_interop_loaded_locked(struct dri_screen *screen) +{ + return screen->opencl_dri_event_add_ref && + screen->opencl_dri_event_release && + screen->opencl_dri_event_wait && + screen->opencl_dri_event_get_fence; +} + +static bool +dri2_load_opencl_interop(struct dri_screen *screen) +{ +#if defined(RTLD_DEFAULT) + bool success; + + pipe_mutex_lock(screen->opencl_func_mutex); + + if (dri2_is_opencl_interop_loaded_locked(screen)) { + pipe_mutex_unlock(screen->opencl_func_mutex); + return true; + } + + screen->opencl_dri_event_add_ref = + dlsym(RTLD_DEFAULT, "opencl_dri_event_add_ref"); + screen->opencl_dri_event_release = + dlsym(RTLD_DEFAULT, "opencl_dri_event_release"); + screen->opencl_dri_event_wait = + dlsym(RTLD_DEFAULT, "opencl_dri_event_wait"); + screen->opencl_dri_event_get_fence = + dlsym(RTLD_DEFAULT, "opencl_dri_event_get_fence"); + + success = dri2_is_opencl_interop_loaded_locked(screen); + pipe_mutex_unlock(screen->opencl_func_mutex); + return success; +#else + return false; +#endif +} + struct dri2_fence { struct pipe_fence_handle *pipe_fence; + void *cl_event; }; static void * @@ -1278,7 +1319,24 @@ dri2_create_fence(__DRIcontext *_ctx) static void * dri2_get_fence_from_cl_event(__DRIscreen *_screen, intptr_t cl_event) { - return NULL; + struct dri_screen *driscreen = dri_screen(_screen); + struct dri2_fence *fence; + + if (!dri2_load_opencl_interop(driscreen)) + return NULL; + + fence = CALLOC_STRUCT(dri2_fence); + if (!fence) + return NULL; + + fence->cl_event = (void*)cl_event; + + if (!driscreen->opencl_dri_event_add_ref(fence->cl_event)) { + free(fence); + return NULL; + } + + return fence; } static void @@ -1290,6 +1348,8 @@ dri2_destroy_fence(__DRIscreen *_screen, void *_fence) if (fence->pipe_fence) screen->fence_reference(screen, &fence->pipe_fence, NULL); + else if (fence->cl_event) + driscreen->opencl_dri_event_release(fence->cl_event); else assert(0); @@ -1308,6 +1368,15 @@ dri2_client_wait_sync(__DRIcontext *_ctx, void *_fence, unsigned flags, if (fence->pipe_fence) return screen->fence_finish(screen, fence->pipe_fence, timeout); + else if (fence->cl_event) { + struct pipe_fence_handle *pipe_fence = + driscreen->opencl_dri_event_get_fence(fence->cl_event); + + if (pipe_fence) + return screen->fence_finish(screen, pipe_fence, timeout); + else + return driscreen->opencl_dri_event_wait(fence->cl_event, timeout); + } else { assert(0); return false; @@ -1365,6 +1434,7 @@ dri2_init_screen(__DRIscreen * sPriv) screen->sPriv = sPriv; screen->fd = sPriv->fd; + pipe_mutex_init(screen->opencl_func_mutex); sPriv->driverPrivate = (void *)screen; diff --git a/src/gallium/state_trackers/dri/dri_screen.c b/src/gallium/state_trackers/dri/dri_screen.c index 1b14ab1..85393d8 100644 --- a/src/gallium/state_trackers/dri/dri_screen.c +++ b/src/gallium/state_trackers/dri/dri_screen.c @@ -365,6 +365,7 @@ dri_destroy_screen_helper(struct dri_screen * screen) screen->base.screen->destroy(screen->base.screen); dri_destroy_option_cache(screen); + pipe_mutex_destroy(screen->opencl_func_mutex); } void diff --git a/src/gallium/state_trackers/dri/dri_screen.h b/src/gallium/state_trackers/dri/dri_screen.h index e4a1a39..bdab74f 100644 --- a/src/gallium/state_trackers/dri/dri_screen.h +++ b/src/gallium/state_trackers/dri/dri_screen.h @@ -39,6 +39,8 @@ #include "pipe/p_context.h" #include "pipe/p_state.h" #include "state_tracker/st_api.h" +#include "state_tracker/opencl_interop.h" +#include "os/os_thread.h" #include "postprocess/filters.h" struct dri_context; @@ -84,6 +86,13 @@ struct dri_screen /* hooks filled in by dri2 & drisw */ __DRIimage * (*lookup_egl_image)(struct dri_screen *ctx, void *handle); + + /* OpenCL interop */ + pipe_mutex opencl_func_mutex; + opencl_dri_event_add_ref_t opencl_dri_event_add_ref; + opencl_dri_event_release_t opencl_dri_event_release; + opencl_dri_event_wait_t opencl_dri_event_wait; + opencl_dri_event_get_fence_t opencl_dri_event_get_fence; }; /** cast wrapper */ From mareko at kemper.freedesktop.org Thu Apr 30 12:43:15 2015 From: mareko at kemper.freedesktop.org (Marek Olšák) Date: Thu, 30 Apr 2015 05:43:15 -0700 (PDT) Subject: Mesa (master): st/dri: implement the fence interface Message-ID: <20150430124315.E7B4B761E9@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 7070b0dd66d315b3106803580623df4fb5223473 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7070b0dd66d315b3106803580623df4fb5223473 Author: Marek Ol??k Date: Fri Apr 10 19:41:39 2015 +0200 st/dri: implement the fence interface --- src/gallium/state_trackers/dri/dri2.c | 80 +++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/src/gallium/state_trackers/dri/dri2.c b/src/gallium/state_trackers/dri/dri2.c index f8f4ecf..9b56313 100644 --- a/src/gallium/state_trackers/dri/dri2.c +++ b/src/gallium/state_trackers/dri/dri2.c @@ -1251,6 +1251,85 @@ static __DRIimageExtension dri2ImageExtension = { .getCapabilities = dri2_get_capabilities, }; + +struct dri2_fence { + struct pipe_fence_handle *pipe_fence; +}; + +static void * +dri2_create_fence(__DRIcontext *_ctx) +{ + struct pipe_context *ctx = dri_context(_ctx)->st->pipe; + struct dri2_fence *fence = CALLOC_STRUCT(dri2_fence); + + if (!fence) + return NULL; + + ctx->flush(ctx, &fence->pipe_fence, 0); + + if (!fence->pipe_fence) { + FREE(fence); + return NULL; + } + + return fence; +} + +static void * +dri2_get_fence_from_cl_event(__DRIscreen *_screen, intptr_t cl_event) +{ + return NULL; +} + +static void +dri2_destroy_fence(__DRIscreen *_screen, void *_fence) +{ + struct dri_screen *driscreen = dri_screen(_screen); + struct pipe_screen *screen = driscreen->base.screen; + struct dri2_fence *fence = (struct dri2_fence*)_fence; + + if (fence->pipe_fence) + screen->fence_reference(screen, &fence->pipe_fence, NULL); + else + assert(0); + + FREE(fence); +} + +static GLboolean +dri2_client_wait_sync(__DRIcontext *_ctx, void *_fence, unsigned flags, + uint64_t timeout) +{ + struct dri_screen *driscreen = dri_screen(_ctx->driScreenPriv); + struct pipe_screen *screen = driscreen->base.screen; + struct dri2_fence *fence = (struct dri2_fence*)_fence; + + /* No need to flush. The context was flushed when the fence was created. */ + + if (fence->pipe_fence) + return screen->fence_finish(screen, fence->pipe_fence, timeout); + else { + assert(0); + return false; + } +} + +static void +dri2_server_wait_sync(__DRIcontext *_ctx, void *_fence, unsigned flags) +{ + /* AFAIK, no driver currently supports parallel context execution. */ +} + +static __DRI2fenceExtension dri2FenceExtension = { + .base = { __DRI2_FENCE, 1 }, + + .create_fence = dri2_create_fence, + .get_fence_from_cl_event = dri2_get_fence_from_cl_event, + .destroy_fence = dri2_destroy_fence, + .client_wait_sync = dri2_client_wait_sync, + .server_wait_sync = dri2_server_wait_sync +}; + /* * Backend function init_screen. */ @@ -1262,6 +1341,7 @@ static const __DRIextension *dri_screen_extensions[] = { &dri2RendererQueryExtension.base, &dri2ConfigQueryExtension.base, &dri2ThrottleExtension.base, + &dri2FenceExtension.base, NULL }; From kwg at kemper.freedesktop.org Thu Apr 30 18:51:51 2015 From: kwg at kemper.freedesktop.org (Kenneth Graunke) Date: Thu, 30 Apr 2015 11:51:51 -0700 (PDT) Subject: Mesa (master): i965: Unhardcode a few more stage names and abbreviations. Message-ID: <20150430185151.C437176102@kemper.freedesktop.org> Module: Mesa Branch: master Commit: 1ac7db07b363207e8ded9259f84bbcaa084b8667 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1ac7db07b363207e8ded9259f84bbcaa084b8667 Author: Kenneth Graunke Date: Thu Mar 12 05:37:43 2015 -0700 i965: Unhardcode a few more stage names and abbreviations. The stage_abbrev and stage_name fields in backend_visitor provide what we need without any additional effort. It also means we'll get the right names for compute shaders, SIMD8 geometry shaders, and both kinds of tessellation shaders. This does unfortunately change the capitalization of the stage abbreviation in the INTEL_DEBUG=optimizer output filenames. It doesn't seem worth adding code to handle, though. Signed-off-by: Kenneth Graunke Reviewed-by: Matt Turner Reviewed-by: Anuj Phogat --- src/mesa/drivers/dri/i965/brw_fs.cpp | 10 +++------- src/mesa/drivers/dri/i965/brw_vec4.cpp | 6 ++---- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 255ddf4..a648323 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -3766,8 +3766,6 @@ fs_visitor::calculate_register_pressure() void fs_visitor::optimize() { - const char *stage_name = stage == MESA_SHADER_VERTEX ? "vs" : "fs"; - split_virtual_grfs(); move_uniform_array_access_to_pull_constants(); @@ -3781,7 +3779,7 @@ fs_visitor::optimize() if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER) && this_progress) { \ char filename[64]; \ snprintf(filename, 64, "%s%d-%04d-%02d-%02d-" #pass, \ - stage_name, dispatch_width, shader_prog ? shader_prog->Name : 0, iteration, pass_num); \ + stage_abbrev, dispatch_width, shader_prog ? shader_prog->Name : 0, iteration, pass_num); \ \ backend_visitor::dump_instructions(filename); \ } \ @@ -3793,7 +3791,8 @@ fs_visitor::optimize() if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER)) { char filename[64]; snprintf(filename, 64, "%s%d-%04d-00-start", - stage_name, dispatch_width, shader_prog ? shader_prog->Name : 0); + stage_abbrev, dispatch_width, + shader_prog ? shader_prog->Name : 0); backend_visitor::dump_instructions(filename); } @@ -3885,9 +3884,6 @@ fs_visitor::allocate_registers() } if (!allocated_without_spills) { - const char *stage_name = stage == MESA_SHADER_VERTEX ? - "Vertex" : "Fragment"; - /* We assume that any spilling is worse than just dropping back to * SIMD8. There's probably actually some intermediate point where * SIMD16 with a couple of spills is still better. diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 8ce07637..57b507d 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1700,8 +1700,6 @@ vec4_visitor::run() move_push_constants_to_pull_constants(); split_virtual_grfs(); - const char *stage_name = stage == MESA_SHADER_GEOMETRY ? "gs" : "vs"; - #define OPT(pass, args...) ({ \ pass_num++; \ bool this_progress = pass(args); \ @@ -1709,7 +1707,7 @@ vec4_visitor::run() if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER) && this_progress) { \ char filename[64]; \ snprintf(filename, 64, "%s-%04d-%02d-%02d-" #pass, \ - stage_name, shader_prog ? shader_prog->Name : 0, iteration, pass_num); \ + stage_abbrev, shader_prog ? shader_prog->Name : 0, iteration, pass_num); \ \ backend_visitor::dump_instructions(filename); \ } \ @@ -1722,7 +1720,7 @@ vec4_visitor::run() if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER)) { char filename[64]; snprintf(filename, 64, "%s-%04d-00-start", - stage_name, shader_prog ? shader_prog->Name : 0); + stage_abbrev, shader_prog ? shader_prog->Name : 0); backend_visitor::dump_instructions(filename); }