xf86-video-intel: 3 commits - src/sna/gen6_render.c src/sna/gen7_render.c src/sna/kgem.h src/sna/sna_io.c src/sna/sna_render.c src/sna/sna_render.h src/sna/sna_video.c
Chris Wilson
ickle at kemper.freedesktop.org
Thu Jul 19 09:56:49 PDT 2012
src/sna/gen6_render.c | 43 +++++++++++++++++++++++++++++++++++++++++--
src/sna/gen7_render.c | 2 +-
src/sna/kgem.h | 14 +++++++++++---
src/sna/sna_io.c | 8 ++++----
src/sna/sna_render.c | 2 +-
src/sna/sna_render.h | 1 +
src/sna/sna_video.c | 2 +-
7 files changed, 60 insertions(+), 12 deletions(-)
New commits:
commit 6f60f89588caa70e7d8ed53ba453bbe8c2094a95
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Thu Jul 19 16:58:34 2012 +0100
sna/gen6: Bump the WM thread count to 80
Note that we should only do this when "WiZ Hashing" is disabled. So we
should be checking the GT_MODE register (bring on i915_read!) to be sure
that is safe to do so. However, it gives a big boost to performance of
render copies... It also causes perf benchmarks to hit thermal limits
much quicker.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index 20a09d0..044a9f9 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -56,6 +56,31 @@
#define GEN6_MAX_SIZE 8192
+struct gt_info {
+ int max_vs_threads;
+ int max_gs_threads;
+ int max_wm_threads;
+ struct {
+ int size;
+ int max_vs_entries;
+ int max_gs_entries;
+ } urb;
+};
+
+static const struct gt_info gt1_info = {
+ .max_vs_threads = 24,
+ .max_gs_threads = 21,
+ .max_wm_threads = 40,
+ .urb = { 32, 256, 256 },
+};
+
+static const struct gt_info gt2_info = {
+ .max_vs_threads = 60,
+ .max_gs_threads = 60,
+ .max_wm_threads = 80,
+ .urb = { 64, 256, 256 },
+};
+
static const uint32_t ps_kernel_nomask_affine[][4] = {
#include "exa_wm_src_affine.g6b"
#include "exa_wm_src_sample_argb.g6b"
@@ -422,7 +447,7 @@ gen6_emit_urb(struct sna *sna)
{
OUT_BATCH(GEN6_3DSTATE_URB | (3 - 2));
OUT_BATCH(((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
- (256 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
+ (sna->render_state.gen6.info->urb.max_vs_entries << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
OUT_BATCH((0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
(0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
}
@@ -665,7 +690,7 @@ gen6_emit_wm(struct sna *sna, unsigned int kernel)
wm_kernels[kernel].num_surfaces << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT);
OUT_BATCH(0);
OUT_BATCH(6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT); /* DW4 */
- OUT_BATCH((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT |
+ OUT_BATCH((sna->render_state.gen6.info->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT |
GEN6_3DSTATE_WM_DISPATCH_ENABLE |
GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
OUT_BATCH(wm_kernels[kernel].num_inputs << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT |
@@ -4198,6 +4223,10 @@ static bool gen6_render_setup(struct sna *sna)
struct gen6_sampler_state *ss;
int i, j, k, l, m;
+ state->info = >1_info;
+ if (DEVICE_ID(sna->PciInfo) & 0x20)
+ state->info = >2_info; /* XXX requires GT_MODE WiZ disabled */
+
sna_static_stream_init(&general);
/* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index 18ba826..c041d66 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -4268,7 +4268,7 @@ static bool gen7_render_setup(struct sna *sna)
state->info = >1_info;
if (DEVICE_ID(sna->PciInfo) & 0x20)
- state->info = >2_info;
+ state->info = >2_info; /* XXX requires GT_MODE WiZ disabled */
sna_static_stream_init(&general);
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
index f0f4a2d..5662a79 100644
--- a/src/sna/sna_render.h
+++ b/src/sna/sna_render.h
@@ -388,6 +388,7 @@ enum {
};
struct gen6_render_state {
+ const struct gt_info *info;
struct kgem_bo *general_bo;
uint32_t vs_state;
commit fc39d4b5cb105d269c5349e479daf112f5d93580
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Thu Jul 19 16:25:54 2012 +0100
sna/gen6: Add a simple DBG option to limit usage of either BLT/RENDER
We can force the code to either select only BLT or RENDER operations -
for those that we have a choice for at least!
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index 389d002..20a09d0 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -52,6 +52,7 @@
#define NO_FILL_CLEAR 0
#define NO_RING_SWITCH 0
+#define PREFER_RENDER 0
#define GEN6_MAX_SIZE 8192
@@ -2344,6 +2345,9 @@ gen6_composite_set_target(struct sna *sna,
static bool prefer_blt_ring(struct sna *sna)
{
+ if (PREFER_RENDER)
+ return PREFER_RENDER < 0;
+
return sna->kgem.ring != KGEM_RENDER;
}
@@ -3272,6 +3276,9 @@ static inline bool prefer_blt_copy(struct sna *sna,
PixmapPtr dst, struct kgem_bo *dst_bo,
unsigned flags)
{
+ if (PREFER_RENDER)
+ return PREFER_RENDER > 0;
+
return (sna->kgem.ring == KGEM_BLT ||
(flags & COPY_LAST && sna->kgem.mode == KGEM_NONE) ||
prefer_blt_bo(sna, src, src_bo) ||
@@ -3647,6 +3654,9 @@ gen6_emit_fill_state(struct sna *sna, const struct sna_composite_op *op)
static inline bool prefer_blt_fill(struct sna *sna,
struct kgem_bo *bo)
{
+ if (PREFER_RENDER)
+ return PREFER_RENDER < 0;
+
return (can_switch_rings(sna) ||
prefer_blt_ring(sna) ||
untiled_tlb_miss(bo));
commit 15d3eea7004822e5cbd48d676692e1b6a2b26d3e
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Thu Jul 19 16:22:20 2012 +0100
sna: Handle mixed bo/buffers in assertions
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index b038bb1..f7ee5b4 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -420,16 +420,24 @@ int kgem_bo_fenced_size(struct kgem *kgem, struct kgem_bo *bo);
void kgem_get_tile_size(struct kgem *kgem, int tiling,
int *tile_width, int *tile_height, int *tile_size);
+static inline int __kgem_buffer_size(struct kgem_bo *bo)
+{
+ assert(bo->proxy && bo->io);
+ return bo->size.bytes;
+}
+
static inline int kgem_bo_size(struct kgem_bo *bo)
{
assert(!(bo->proxy && bo->io));
return PAGE_SIZE * bo->size.pages.count;
}
-static inline int kgem_buffer_size(struct kgem_bo *bo)
+static inline int __kgem_bo_size(struct kgem_bo *bo)
{
- assert(bo->proxy && bo->io);
- return bo->size.bytes;
+ if (bo->io)
+ return __kgem_buffer_size(bo);
+ else
+ return __kgem_bo_size(bo);
}
static inline bool kgem_bo_blt_pitch_is_ok(struct kgem *kgem,
diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c
index f1df84a..b53143f 100644
--- a/src/sna/sna_io.c
+++ b/src/sna/sna_io.c
@@ -438,7 +438,7 @@ fallback:
_kgem_set_mode(kgem, KGEM_BLT);
tmp_box += nbox_this_time;
} while (1);
- assert(offset == kgem_buffer_size(dst_bo));
+ assert(offset == __kgem_buffer_size(dst_bo));
kgem_buffer_read_sync(kgem, dst_bo);
@@ -470,7 +470,7 @@ fallback:
src += pitch * height;
} while (--nbox);
- assert(src - (char *)ptr == kgem_buffer_size(dst_bo));
+ assert(src - (char *)ptr == __kgem_buffer_size(dst_bo));
kgem_bo_destroy(kgem, dst_bo);
sna->blt_state.fill_bo = 0;
}
@@ -841,7 +841,7 @@ tile:
box++;
offset += pitch * height;
} while (--nbox_this_time);
- assert(offset == kgem_buffer_size(src_bo));
+ assert(offset == __kgem_buffer_size(src_bo));
if (nbox) {
_kgem_submit(kgem);
@@ -1079,7 +1079,7 @@ fallback:
box++;
offset += pitch * height;
} while (--nbox_this_time);
- assert(offset == kgem_buffer_size(src_bo));
+ assert(offset == __kgem_buffer_size(src_bo));
if (nbox) {
_kgem_submit(kgem);
diff --git a/src/sna/sna_render.c b/src/sna/sna_render.c
index fd105f4..e503586 100644
--- a/src/sna/sna_render.c
+++ b/src/sna/sna_render.c
@@ -1604,7 +1604,7 @@ do_fixup:
w, h);
pixman_image_unref(src);
} else {
- memset(ptr, 0, kgem_buffer_size(channel->bo));
+ memset(ptr, 0, __kgem_buffer_size(channel->bo));
dst = src;
}
}
diff --git a/src/sna/sna_video.c b/src/sna/sna_video.c
index b76a3c4..b8690ec 100644
--- a/src/sna/sna_video.c
+++ b/src/sna/sna_video.c
@@ -102,7 +102,7 @@ sna_video_buffer(struct sna *sna,
struct sna_video_frame *frame)
{
/* Free the current buffer if we're going to have to reallocate */
- if (video->buf && kgem_bo_size(video->buf) < frame->size)
+ if (video->buf && __kgem_bo_size(video->buf) < frame->size)
sna_video_free_buffers(sna, video);
if (video->buf == NULL)
More information about the xorg-commit
mailing list