xf86-video-intel: 4 commits - configure.ac src/intel_device.c src/intel_driver.h src/sna/gen7_render.c src/sna/sna_dri.c src/sna/sna_video_sprite.c
Chris Wilson
ickle at kemper.freedesktop.org
Fri Aug 23 11:28:12 PDT 2013
configure.ac | 11 +++++++++
src/intel_device.c | 54 +++++++++++++++++++++++++++++++++++++++------
src/intel_driver.h | 2 -
src/sna/gen7_render.c | 45 +++++++++++++++++++++++++++++++++++++
src/sna/sna_dri.c | 4 +--
src/sna/sna_video_sprite.c | 12 +++++++++-
6 files changed, 117 insertions(+), 11 deletions(-)
New commits:
commit 8b0d69e76c50155ea404f0e8a97d60a3f710c8a3
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Fri Aug 23 18:53:34 2013 +0100
intel: Add experimental rendernode support
Render nodes allow clients full access to off-screen rendering and GPU
offload, without assuming any master responsiblities (for device and
display management). As they have a more limited interface, they can be
used in a more permissive manner.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/configure.ac b/configure.ac
index 9fc011e..1e73c0c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -336,6 +336,17 @@ fi
xp_msg=""
+AC_ARG_ENABLE(rendernode,
+ AS_HELP_STRING([--enable-rendernode],
+ [Enable use of render nodes (experimental) [default=no]]),
+ [RENDERNODE="$enableval"],
+ [RENDERNODE=no])
+AM_CONDITIONAL(USE_RENDERNODE, test x$RENDERNODE = xyes)
+if test "x$RENDERNODE" = xyes; then
+ AC_DEFINE(USE_RENDERNODE,1,[Assume "rendernode" support])
+ xp_msg="$xp_msg rendernode"
+fi
+
AC_ARG_ENABLE(create2,
AS_HELP_STRING([--enable-create2],
[Enable use of create2 ioctl (experimental) [default=no]]),
diff --git a/src/intel_device.c b/src/intel_device.c
index d9ff8bc..751875e 100644
--- a/src/intel_device.c
+++ b/src/intel_device.c
@@ -24,6 +24,12 @@
**************************************************************************/
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
#include <assert.h>
#include <string.h>
#include <unistd.h>
@@ -43,7 +49,8 @@
#include "intel_driver.h"
struct intel_device {
- char *path;
+ char *master_node;
+ char *render_node;
int fd;
int open_count;
int master_count;
@@ -164,6 +171,32 @@ static int __intel_open_device(const struct pci_device *pci, char **path)
return fd;
}
+static char *find_render_node(int fd)
+{
+#if defined(USE_RENDERNODE)
+ struct stat master, render;
+ char buf[128];
+
+ if (fstat(fd, &master))
+ return NULL;
+
+ if (!S_ISCHR(master.st_mode))
+ return NULL;
+
+ /* Are we a render-node ourselves? */
+ if (master.st_rdev & 0x80)
+ return NULL;
+
+ sprintf(buf, "/dev/dri/renderD%d", (int)((master.st_rdev | 0x80) & 0xff));
+ if (stat(buf, &render) == 0 &&
+ master.st_mode == render.st_mode &&
+ render.st_rdev == (master.st_rdev | 0x80))
+ return strdup(buf);
+#endif
+
+ return NULL;
+}
+
int intel_open_device(int entity_num,
const struct pci_device *pci,
const char *path)
@@ -194,10 +227,13 @@ int intel_open_device(int entity_num,
if (dev == NULL)
goto err_close;
- dev->path = local_path;
dev->fd = fd;
dev->open_count = 0;
dev->master_count = 0;
+ dev->master_node = local_path;
+ dev->render_node = find_render_node(fd);
+ if (dev->render_node == NULL)
+ dev->render_node = dev->master_node;
/* If hosted under a system compositor, just pretend to be master */
if (hosted()) {
@@ -257,11 +293,11 @@ int intel_get_device(ScrnInfoPtr scrn)
return dev->fd;
}
-const char *intel_get_device_name(ScrnInfoPtr scrn)
+const char *intel_get_client_name(ScrnInfoPtr scrn)
{
struct intel_device *dev = intel_device(scrn);
- assert(dev && dev->path);
- return dev->path;
+ assert(dev && dev->render_node);
+ return dev->render_node;
}
int intel_get_master(ScrnInfoPtr scrn)
@@ -312,7 +348,9 @@ void __intel_uxa_release_device(ScrnInfoPtr scrn)
intel_set_device(scrn, NULL);
drmClose(dev->fd);
- free(dev->path);
+ if (dev->render_node != dev->master_node)
+ free(dev->render_node);
+ free(dev->master_node);
free(dev);
}
}
@@ -331,6 +369,8 @@ void intel_put_device(ScrnInfoPtr scrn)
intel_set_device(scrn, NULL);
drmClose(dev->fd);
- free(dev->path);
+ if (dev->render_node != dev->master_node)
+ free(dev->render_node);
+ free(dev->master_node);
free(dev);
}
diff --git a/src/intel_driver.h b/src/intel_driver.h
index 4768536..e54054f 100644
--- a/src/intel_driver.h
+++ b/src/intel_driver.h
@@ -122,7 +122,7 @@ void intel_detect_chipset(ScrnInfoPtr scrn,
int intel_open_device(int entity_num, const struct pci_device *pci, const char *path);
int intel_get_device(ScrnInfoPtr scrn);
-const char *intel_get_device_name(ScrnInfoPtr scrn);
+const char *intel_get_client_name(ScrnInfoPtr scrn);
int intel_get_master(ScrnInfoPtr scrn);
int intel_put_master(ScrnInfoPtr scrn);
void intel_put_device(ScrnInfoPtr scrn);
diff --git a/src/sna/sna_dri.c b/src/sna/sna_dri.c
index ff96075..f31ca4e 100644
--- a/src/sna/sna_dri.c
+++ b/src/sna/sna_dri.c
@@ -2321,7 +2321,7 @@ bool sna_dri_open(struct sna *sna, ScreenPtr screen)
memset(&info, '\0', sizeof(info));
info.fd = sna->kgem.fd;
info.driverName = dri_driver_name(sna);
- info.deviceName = intel_get_device_name(sna->scrn);
+ info.deviceName = intel_get_client_name(sna->scrn);
DBG(("%s: loading dri driver '%s' [gen=%d] for device '%s'\n",
__FUNCTION__, info.driverName, sna->kgem.gen, info.deviceName));
commit 846436c1a26b2c8a9d787ec707edb075fac57ee0
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Fri Aug 23 16:25:08 2013 +0100
sna/dri: Make async blits async again
Fixes the regression introduced in
commit 6f5fd772c7ca656b86394a0f036d4e0cf5b33d8e [2.21.13]
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Thu Jul 25 08:29:55 2013 +0100
sna/dri: Discard the strict checking for stale bo before performing a blit
which added the sync request flag along the explicit async blit path.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/sna_dri.c b/src/sna/sna_dri.c
index 1569251..ff96075 100644
--- a/src/sna/sna_dri.c
+++ b/src/sna/sna_dri.c
@@ -1415,7 +1415,7 @@ sna_dri_immediate_blit(struct sna *sna,
ret = true;
} else {
info->bo = __sna_dri_copy_region(sna, draw, NULL,
- info->back, info->front, true);
+ info->back, info->front, false);
if (event)
DRI2SwapComplete(info->client, draw, 0, 0, 0,
DRI2_BLIT_COMPLETE,
commit fc4e81726d03399bfbbba79d2e76556836f03bd2
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Fri Aug 23 02:25:34 2013 +0100
sna/video: Disable the existing sprite when switching CRTCs
After starting a new video on another CRTC, disable the old one as we
currently only track the single video port. However, showing a video
split across multiple CRTCs would be a useful extension in the future.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/sna_video_sprite.c b/src/sna/sna_video_sprite.c
index 0323e46..8b094c4 100644
--- a/src/sna/sna_video_sprite.c
+++ b/src/sna/sna_video_sprite.c
@@ -201,6 +201,8 @@ sna_video_sprite_show(struct sna *sna,
{
struct drm_mode_set_plane s;
+ /* XXX handle video spanning multiple CRTC */
+
VG_CLEAR(s);
s.plane_id = sna_crtc_to_plane(crtc);
@@ -300,7 +302,15 @@ sna_video_sprite_show(struct sna *sna,
}
frame->bo->domain = DOMAIN_NONE;
- video->plane = s.plane_id;
+
+ if (video->plane != s.plane_id) {
+ if (video->plane) {
+ memset(&s, 0, sizeof(s));
+ s.plane_id = video->plane;
+ drmIoctl(video->sna->kgem.fd, DRM_IOCTL_MODE_SETPLANE, &s);
+ }
+ video->plane = s.plane_id;
+ }
if (video->bo != frame->bo) {
if (video->bo)
commit 509e7aaf8446f568e133e1b450ea13f73e9b366b
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Fri Aug 23 02:10:11 2013 +0100
sna/gen7: Prefer the render ring for more operations
As we get more well-endowed GPUs with ever more execution units, it
becomes advantageous to do even basic copies through the render ring.
However, the extra performance comes at a cost - higher power usage. To
mitigate this, we apply a heuristic of only allowing a switch over to
the render ring if the render ring is already active with an early
request (in addition to the usual stall avoidance and general
performance heuristics).
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index cd851f0..c0ca623 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -85,6 +85,7 @@ struct gt_info {
int max_gs_entries;
int push_ps_size; /* in 1KBs */
} urb;
+ int gt;
};
static const struct gt_info ivb_gt_info = {
@@ -93,6 +94,7 @@ static const struct gt_info ivb_gt_info = {
.max_gs_threads = 16,
.max_wm_threads = (16-1) << IVB_PS_MAX_THREADS_SHIFT,
.urb = { 128, 64, 64, 8 },
+ .gt = 0,
};
static const struct gt_info ivb_gt1_info = {
@@ -101,6 +103,7 @@ static const struct gt_info ivb_gt1_info = {
.max_gs_threads = 36,
.max_wm_threads = (48-1) << IVB_PS_MAX_THREADS_SHIFT,
.urb = { 128, 512, 192, 8 },
+ .gt = 1,
};
static const struct gt_info ivb_gt2_info = {
@@ -109,6 +112,7 @@ static const struct gt_info ivb_gt2_info = {
.max_gs_threads = 128,
.max_wm_threads = (172-1) << IVB_PS_MAX_THREADS_SHIFT,
.urb = { 256, 704, 320, 8 },
+ .gt = 2,
};
static const struct gt_info byt_gt_info = {
@@ -118,6 +122,7 @@ static const struct gt_info byt_gt_info = {
.max_gs_threads = 36,
.max_wm_threads = (48-1) << IVB_PS_MAX_THREADS_SHIFT,
.urb = { 128, 512, 192, 8 },
+ .gt = 1,
};
static const struct gt_info hsw_gt_info = {
@@ -128,6 +133,7 @@ static const struct gt_info hsw_gt_info = {
(8 - 1) << HSW_PS_MAX_THREADS_SHIFT |
1 << HSW_PS_SAMPLE_MASK_SHIFT,
.urb = { 128, 64, 64, 8 },
+ .gt = 0,
};
static const struct gt_info hsw_gt1_info = {
@@ -138,6 +144,7 @@ static const struct gt_info hsw_gt1_info = {
(102 - 1) << HSW_PS_MAX_THREADS_SHIFT |
1 << HSW_PS_SAMPLE_MASK_SHIFT,
.urb = { 128, 640, 256, 8 },
+ .gt = 1,
};
static const struct gt_info hsw_gt2_info = {
@@ -148,6 +155,7 @@ static const struct gt_info hsw_gt2_info = {
(140 - 1) << HSW_PS_MAX_THREADS_SHIFT |
1 << HSW_PS_SAMPLE_MASK_SHIFT,
.urb = { 256, 1664, 640, 8 },
+ .gt = 2,
};
static const struct gt_info hsw_gt3_info = {
@@ -158,6 +166,7 @@ static const struct gt_info hsw_gt3_info = {
(280 - 1) << HSW_PS_MAX_THREADS_SHIFT |
1 << HSW_PS_SAMPLE_MASK_SHIFT,
.urb = { 512, 3328, 1280, 16 },
+ .gt = 3,
};
inline static bool is_ivb(struct sna *sna)
@@ -2123,6 +2132,24 @@ inline static bool can_switch_to_blt(struct sna *sna,
return kgem_ring_is_idle(&sna->kgem, KGEM_BLT);
}
+inline static bool can_switch_to_render(struct sna *sna,
+ struct kgem_bo *bo)
+{
+ if (sna->kgem.ring == KGEM_RENDER)
+ return true;
+
+ if (NO_RING_SWITCH)
+ return false;
+
+ if (!sna->kgem.has_semaphores)
+ return false;
+
+ if (bo && !RQ_IS_BLT(bo->rq) && !(bo->scanout && !sna->kgem.has_wt))
+ return true;
+
+ return !kgem_ring_is_idle(&sna->kgem, KGEM_RENDER);
+}
+
static inline bool untiled_tlb_miss(struct kgem_bo *bo)
{
return bo->tiling == I915_TILING_NONE && bo->pitch >= 4096;
@@ -2143,6 +2170,15 @@ inline static bool prefer_blt_ring(struct sna *sna,
return can_switch_to_blt(sna, bo, flags);
}
+inline static bool prefer_render_ring(struct sna *sna,
+ struct kgem_bo *bo)
+{
+ if (sna->render_state.gen7.info->gt < 2)
+ return false;
+
+ return can_switch_to_render(sna, bo);
+}
+
static bool
try_blt(struct sna *sna,
PicturePtr dst, PicturePtr src,
@@ -2392,6 +2428,9 @@ prefer_blt_composite(struct sna *sna, struct sna_composite_op *tmp)
kgem_bo_is_render(tmp->src.bo))
return false;
+ if (prefer_render_ring(sna, tmp->dst.bo))
+ return false;
+
if (!prefer_blt_ring(sna, tmp->dst.bo, 0))
return false;
@@ -2833,6 +2872,9 @@ static inline bool prefer_blt_copy(struct sna *sna,
kgem_bo_is_render(src_bo))
return false;
+ if (prefer_render_ring(sna, dst_bo))
+ return false;
+
if (!prefer_blt_ring(sna, dst_bo, flags))
return false;
@@ -3238,6 +3280,9 @@ static inline bool prefer_blt_fill(struct sna *sna,
if (untiled_tlb_miss(bo))
return true;
+ if (prefer_render_ring(sna, bo))
+ return false;
+
if (!prefer_blt_ring(sna, bo, 0))
return false;
More information about the xorg-commit
mailing list