[Mesa-dev] Fwd: [PATCH 5/6] st/mesa: implement blit-based ReadPixels
Martin Andersson
g02maran at gmail.com
Thu Mar 14 15:35:46 PDT 2013
Thanks for implementing this, I just have one comment.
On Thu, Mar 14, 2013 at 7:45 PM, Marek Olšák <maraeo at gmail.com> wrote:
> Initial version contributed by: Martin Andersson <g02maran at gmail.com>
>
> This is only used if the memcpy path cannot be used and if no transfer ops
> are needed. It's pretty similar to our TexImage and GetTexImage
> implementations.
>
> The motivation behind this is to be able to use ReadPixels every frame and
> still have at least 20 fps (or 60 fps with a powerful GPU and CPU)
> instead of 0.5 fps.
> ---
> src/mesa/state_tracker/st_cb_readpixels.c | 184 +++++++++++++++++++++++++++--
> src/mesa/state_tracker/st_cb_texture.c | 12 +-
> src/mesa/state_tracker/st_cb_texture.h | 6 +
> 3 files changed, 189 insertions(+), 13 deletions(-)
>
> diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c
> index 6b824b1..b524738 100644
> --- a/src/mesa/state_tracker/st_cb_readpixels.c
> +++ b/src/mesa/state_tracker/st_cb_readpixels.c
> @@ -25,35 +25,205 @@
> *
> **************************************************************************/
>
> -
> +#include "main/image.h"
> +#include "main/pbo.h"
> #include "main/imports.h"
> #include "main/readpix.h"
> +#include "main/enums.h"
> +#include "main/framebuffer.h"
> +#include "util/u_inlines.h"
> +#include "util/u_format.h"
>
> +#include "st_cb_fbo.h"
> #include "st_atom.h"
> #include "st_context.h"
> #include "st_cb_bitmap.h"
> #include "st_cb_readpixels.h"
> +#include "state_tracker/st_cb_texture.h"
> +#include "state_tracker/st_format.h"
> +#include "state_tracker/st_texture.h"
>
>
> /**
> - * The only special thing we need to do for the state tracker's
> - * glReadPixels is to validate state (to be sure we have up-to-date
> - * framebuffer surfaces) and flush the bitmap cache prior to reading.
> + * This uses a blit to copy the read buffer to a texture format which matches
> + * the format and type combo and then a fast read-back is done using memcpy.
> + * We can do arbitrary X/Y/Z/W/0/1 swizzling here as long as there is
> + * a format which matches the swizzling.
> + *
> + * If such a format isn't available, we fall back to _mesa_readpixels.
> + *
> + * NOTE: Some drivers use a blit to convert between tiled and linear
> + * texture layouts during texture uploads/downloads, so the blit
> + * we do here should be free in such cases.
> */
> static void
> st_readpixels(struct gl_context *ctx, GLint x, GLint y,
> GLsizei width, GLsizei height,
> GLenum format, GLenum type,
> const struct gl_pixelstore_attrib *pack,
> - GLvoid *dest)
> + GLvoid *pixels)
> {
> struct st_context *st = st_context(ctx);
> + struct gl_renderbuffer *rb =
> + _mesa_get_read_renderbuffer_for_format(ctx, format);
> + struct st_renderbuffer *strb = st_renderbuffer(rb);
> + struct pipe_context *pipe = st->pipe;
> + struct pipe_screen *screen = pipe->screen;
> + struct pipe_resource *src;
> + struct pipe_resource *dst = NULL;
> + struct pipe_resource dst_templ;
> + enum pipe_format dst_format, src_format;
> + struct pipe_blit_info blit;
> + unsigned bind = PIPE_BIND_TRANSFER_READ;
> + struct pipe_transfer *tex_xfer;
> + ubyte *map = NULL;
>
> + /* Validate state (to be sure we have up-to-date framebuffer surfaces)
> + * and flush the bitmap cache prior to reading. */
> st_validate_state(st);
> st_flush_bitmap_cache(st);
> - _mesa_readpixels(ctx, x, y, width, height, format, type, pack, dest);
> -}
>
> + /* This must be done after state validation. */
> + src = strb->texture;
> +
> + /* XXX Fallback for depth-stencil formats due to an incomplete
> + * stencil blit implementation in some drivers. */
> + if (format == GL_DEPTH_STENCIL) {
> + goto fallback;
> + }
> +
> + /* We are creating a texture of the size of the region being read back.
> + * Need to check for NPOT texture support. */
> + if (!screen->get_param(screen, PIPE_CAP_NPOT_TEXTURES) &&
> + (!util_is_power_of_two(width) ||
> + !util_is_power_of_two(height))) {
> + goto fallback;
> + }
> +
> + /* If the base internal format and the texture format don't match, we have
> + * to use the slow path. */
> + if (rb->_BaseFormat !=
> + _mesa_get_format_base_format(rb->Format)) {
> + goto fallback;
> + }
> +
> + /* See if the texture format already matches the format and type,
> + * in which case the memcpy-based fast path will likely be used and
> + * we don't have to blit. */
> + if (_mesa_format_matches_format_and_type(rb->Format, format,
> + type, pack->SwapBytes)) {
> + goto fallback;
> + }
On my system (Intel core i7 and AMD 6950) the memcpy fast-path takes
around 210 milliseconds and the blit path takes around 9 milliseconds, for a
1920x1200 image. So it is much faster, at least on my system, to use the
blit path even when the mesa format matches format and type.
To test this I forced the use of the memcpy fast-path (the mesa format was
MESA_FORMAT_XRGB8888, the format was GL_BGRA and the type was
GL_UNSIGNED_BYTE). I visually inspected the resulting image and I could
not see anything wrong with it.
But perhaps forcing the use of the memcpy fast-path invalidates the results.
Or there might be some other issues that I am missing, but I just wanted to
say that on my system it is better to remove this check.
> +
> + if (_mesa_readpixels_needs_slow_path(ctx, format, type, GL_TRUE)) {
> + goto fallback;
> + }
> +
> + /* Convert the source format to what is expected by ReadPixels
> + * and see if it's supported. */
> + src_format = util_format_linear(src->format);
> + src_format = util_format_luminance_to_red(src_format);
> + src_format = util_format_intensity_to_red(src_format);
> +
> + if (!src_format ||
> + !screen->is_format_supported(screen, src_format, src->target,
> + src->nr_samples,
> + PIPE_BIND_SAMPLER_VIEW)) {
> + printf("fallback: src format unsupported %s\n", util_format_short_name(src_format));
> + goto fallback;
> + }
> +
> + if (format == GL_DEPTH_COMPONENT || format == GL_DEPTH_STENCIL)
> + bind |= PIPE_BIND_DEPTH_STENCIL;
> + else
> + bind |= PIPE_BIND_RENDER_TARGET;
> +
> + /* Choose the destination format by finding the best match
> + * for the format+type combo. */
> + dst_format = st_choose_matching_format(screen, bind, format, type,
> + pack->SwapBytes);
> + if (dst_format == PIPE_FORMAT_NONE) {
> + printf("fallback: no matching format for %s, %s\n",
> + _mesa_lookup_enum_by_nr(format), _mesa_lookup_enum_by_nr(type));
> + goto fallback;
> + }
> +
> + /* create the destination texture */
> + memset(&dst_templ, 0, sizeof(dst_templ));
> + dst_templ.target = PIPE_TEXTURE_2D;
> + dst_templ.format = dst_format;
> + dst_templ.bind = bind;
> + dst_templ.usage = PIPE_USAGE_STAGING;
> +
> + st_gl_texture_dims_to_pipe_dims(GL_TEXTURE_2D, width, height, 1,
> + &dst_templ.width0, &dst_templ.height0,
> + &dst_templ.depth0, &dst_templ.array_size);
> +
> + dst = screen->resource_create(screen, &dst_templ);
> + if (!dst) {
> + goto fallback;
> + }
> +
> + blit.src.resource = src;
> + blit.src.level = strb->rtt_level;
> + blit.src.format = src_format;
> + blit.dst.resource = dst;
> + blit.dst.level = 0;
> + blit.dst.format = dst->format;
> + blit.src.box.x = x;
> + blit.dst.box.x = 0;
> + blit.src.box.y = y;
> + blit.dst.box.y = 0;
> + blit.src.box.z = strb->rtt_face + strb->rtt_slice;
> + blit.dst.box.z = 0;
> + blit.src.box.width = blit.dst.box.width = width;
> + blit.src.box.height = blit.dst.box.height = height;
> + blit.src.box.depth = blit.dst.box.depth = 1;
> + blit.mask = st_get_blit_mask(rb->_BaseFormat, format);
> + blit.filter = PIPE_TEX_FILTER_NEAREST;
> + blit.scissor_enable = FALSE;
> +
> + if (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP) {
> + blit.src.box.y = rb->Height - blit.src.box.y;
> + blit.src.box.height = -blit.src.box.height;
> + }
> +
> + /* blit */
> + st->pipe->blit(st->pipe, &blit);
> +
> + /* map resources */
> + pixels = _mesa_map_pbo_dest(ctx, pack, pixels);
> +
> + map = pipe_transfer_map_3d(pipe, dst, 0, PIPE_TRANSFER_READ,
> + 0, 0, 0, width, height, 1, &tex_xfer);
> + if (!map) {
> + _mesa_unmap_pbo_dest(ctx, pack);
> + pipe_resource_reference(&dst, NULL);
> + goto fallback;
> + }
> +
> + /* memcpy data into a user buffer */
> + {
> + const uint bytesPerRow = width * util_format_get_blocksize(dst_format);
> + GLuint row;
> +
> + for (row = 0; row < height; row++) {
> + GLvoid *dest = _mesa_image_address3d(pack, pixels,
> + width, height, format,
> + type, 0, row, 0);
> + memcpy(dest, map, bytesPerRow);
> + map += tex_xfer->stride;
> + }
> + }
> +
> + pipe_transfer_unmap(pipe, tex_xfer);
> + _mesa_unmap_pbo_dest(ctx, pack);
> + pipe_resource_reference(&dst, NULL);
> + return;
> +
> +fallback:
> + _mesa_readpixels(ctx, x, y, width, height, format, type, pack, pixels);
> +}
>
> void st_init_readpixels_functions(struct dd_function_table *functions)
> {
> diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c
> index c922a31..7307c60 100644
> --- a/src/mesa/state_tracker/st_cb_texture.c
> +++ b/src/mesa/state_tracker/st_cb_texture.c
> @@ -68,7 +68,7 @@
> #define DBG if (0) printf
>
>
> -static enum pipe_texture_target
> +enum pipe_texture_target
> gl_target_to_pipe(GLenum target)
> {
> switch (target) {
> @@ -542,8 +542,8 @@ prep_teximage(struct gl_context *ctx, struct gl_texture_image *texImage,
> * Return a writemask for the gallium blit. The parameters can be base
> * formats or "format" from glDrawPixels/glTexImage/glGetTexImage.
> */
> -static unsigned
> -get_blit_mask(GLenum srcFormat, GLenum dstFormat)
> +unsigned
> +st_get_blit_mask(GLenum srcFormat, GLenum dstFormat)
> {
> switch (dstFormat) {
> case GL_DEPTH_STENCIL:
> @@ -769,7 +769,7 @@ st_TexSubImage(struct gl_context *ctx, GLuint dims,
> blit.src.box.width = blit.dst.box.width = width;
> blit.src.box.height = blit.dst.box.height = height;
> blit.src.box.depth = blit.dst.box.depth = depth;
> - blit.mask = get_blit_mask(format, texImage->_BaseFormat);
> + blit.mask = st_get_blit_mask(format, texImage->_BaseFormat);
> blit.filter = PIPE_TEX_FILTER_NEAREST;
> blit.scissor_enable = FALSE;
>
> @@ -996,7 +996,7 @@ st_GetTexImage(struct gl_context * ctx,
> blit.src.box.width = blit.dst.box.width = width;
> blit.src.box.height = blit.dst.box.height = height;
> blit.src.box.depth = blit.dst.box.depth = depth;
> - blit.mask = get_blit_mask(texImage->_BaseFormat, format);
> + blit.mask = st_get_blit_mask(texImage->_BaseFormat, format);
> blit.filter = PIPE_TEX_FILTER_NEAREST;
> blit.scissor_enable = FALSE;
>
> @@ -1370,7 +1370,7 @@ st_CopyTexSubImage(struct gl_context *ctx, GLuint dims,
> blit.dst.box.width = width;
> blit.dst.box.height = height;
> blit.dst.box.depth = 1;
> - blit.mask = get_blit_mask(rb->_BaseFormat, texImage->_BaseFormat);
> + blit.mask = st_get_blit_mask(rb->_BaseFormat, texImage->_BaseFormat);
> blit.filter = PIPE_TEX_FILTER_NEAREST;
>
> /* 1D array textures need special treatment.
> diff --git a/src/mesa/state_tracker/st_cb_texture.h b/src/mesa/state_tracker/st_cb_texture.h
> index 27956bc..7f70d0b 100644
> --- a/src/mesa/state_tracker/st_cb_texture.h
> +++ b/src/mesa/state_tracker/st_cb_texture.h
> @@ -38,6 +38,12 @@ struct gl_texture_object;
> struct pipe_context;
> struct st_context;
>
> +extern enum pipe_texture_target
> +gl_target_to_pipe(GLenum target);
> +
> +unsigned
> +st_get_blit_mask(GLenum srcFormat, GLenum dstFormat);
> +
> extern GLboolean
> st_finalize_texture(struct gl_context *ctx,
> struct pipe_context *pipe,
> --
> 1.7.10.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list