[Intel-gfx] [PATCH 4/6] Xv: setup pipeline for Xv on Sandybridge
Xiang, Haihao
haihao.xiang at intel.com
Thu Oct 21 10:55:44 CEST 2010
Signed-off-by: Xiang, Haihao <haihao.xiang at intel.com>
---
src/brw_structs.h | 100 ++++++++
src/i965_reg.h | 98 ++++++++
src/i965_video.c | 624 +++++++++++++++++++++++++++++++++++++++++++++++
src/intel.h | 4 +
src/intel_batchbuffer.c | 25 ++-
src/intel_video.h | 7 +
6 files changed, 852 insertions(+), 6 deletions(-)
diff --git a/src/brw_structs.h b/src/brw_structs.h
index 1cee5bd..d089ba1 100644
--- a/src/brw_structs.h
+++ b/src/brw_structs.h
@@ -1487,4 +1487,104 @@ struct brw_interface_descriptor {
} desc3;
};
+struct gen6_blend_state
+{
+ struct {
+ unsigned int dest_blend_factor:5;
+ unsigned int source_blend_factor:5;
+ unsigned int pad3:1;
+ unsigned int blend_func:3;
+ unsigned int pad2:1;
+ unsigned int ia_dest_blend_factor:5;
+ unsigned int ia_source_blend_factor:5;
+ unsigned int pad1:1;
+ unsigned int ia_blend_func:3;
+ unsigned int pad0:1;
+ unsigned int ia_blend_enable:1;
+ unsigned int blend_enable:1;
+ } blend0;
+
+ struct {
+ unsigned int post_blend_clamp_enable:1;
+ unsigned int pre_blend_clamp_enable:1;
+ unsigned int clamp_range:2;
+ unsigned int pad0:4;
+ unsigned int x_dither_offset:2;
+ unsigned int y_dither_offset:2;
+ unsigned int dither_enable:1;
+ unsigned int alpha_test_func:3;
+ unsigned int alpha_test_enable:1;
+ unsigned int pad1:1;
+ unsigned int logic_op_func:4;
+ unsigned int logic_op_enable:1;
+ unsigned int pad2:1;
+ unsigned int write_disable_b:1;
+ unsigned int write_disable_g:1;
+ unsigned int write_disable_r:1;
+ unsigned int write_disable_a:1;
+ unsigned int pad3:1;
+ unsigned int alpha_to_coverage_dither:1;
+ unsigned int alpha_to_one:1;
+ unsigned int alpha_to_coverage:1;
+ } blend1;
+};
+
+struct gen6_color_calc_state
+{
+ struct {
+ unsigned int alpha_test_format:1;
+ unsigned int pad0:14;
+ unsigned int round_disable:1;
+ unsigned int bf_stencil_ref:8;
+ unsigned int stencil_ref:8;
+ } cc0;
+
+ union {
+ float alpha_ref_f;
+ struct {
+ unsigned int ui:8;
+ unsigned int pad0:24;
+ } alpha_ref_fi;
+ } cc1;
+
+ float constant_r;
+ float constant_g;
+ float constant_b;
+ float constant_a;
+};
+
+struct gen6_depth_stencil_state
+{
+ struct {
+ unsigned int pad0:3;
+ unsigned int bf_stencil_pass_depth_pass_op:3;
+ unsigned int bf_stencil_pass_depth_fail_op:3;
+ unsigned int bf_stencil_fail_op:3;
+ unsigned int bf_stencil_func:3;
+ unsigned int bf_stencil_enable:1;
+ unsigned int pad1:2;
+ unsigned int stencil_write_enable:1;
+ unsigned int stencil_pass_depth_pass_op:3;
+ unsigned int stencil_pass_depth_fail_op:3;
+ unsigned int stencil_fail_op:3;
+ unsigned int stencil_func:3;
+ unsigned int stencil_enable:1;
+ } ds0;
+
+ struct {
+ unsigned int bf_stencil_write_mask:8;
+ unsigned int bf_stencil_test_mask:8;
+ unsigned int stencil_write_mask:8;
+ unsigned int stencil_test_mask:8;
+ } ds1;
+
+ struct {
+ unsigned int pad0:26;
+ unsigned int depth_write_enable:1;
+ unsigned int depth_test_func:3;
+ unsigned int pad1:1;
+ unsigned int depth_test_enable:1;
+ } ds2;
+};
+
#endif
diff --git a/src/i965_reg.h b/src/i965_reg.h
index fe419dc..3953dab 100644
--- a/src/i965_reg.h
+++ b/src/i965_reg.h
@@ -22,6 +22,10 @@
#define BRW_3DSTATE_PIPELINED_POINTERS BRW_3D(3, 0, 0)
#define BRW_3DSTATE_BINDING_TABLE_POINTERS BRW_3D(3, 0, 1)
+# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS (1 << 12)/* for GEN6 */
+# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_GS (1 << 9) /* for GEN6 */
+# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_VS (1 << 8) /* for GEN6 */
+
#define BRW_3DSTATE_VERTEX_BUFFERS BRW_3D(3, 0, 8)
#define BRW_3DSTATE_VERTEX_ELEMENTS BRW_3D(3, 0, 9)
#define BRW_3DSTATE_INDEX_BUFFER BRW_3D(3, 0, 0xa)
@@ -32,6 +36,9 @@
#define BRW_3DSTATE_SAMPLER_PALETTE_LOAD BRW_3D(3, 1, 2)
#define BRW_3DSTATE_CHROMA_KEY BRW_3D(3, 1, 4)
#define BRW_3DSTATE_DEPTH_BUFFER BRW_3D(3, 1, 5)
+# define BRW_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT 29
+# define BRW_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT 18
+
#define BRW_3DSTATE_POLY_STIPPLE_OFFSET BRW_3D(3, 1, 6)
#define BRW_3DSTATE_POLY_STIPPLE_PATTERN BRW_3D(3, 1, 7)
#define BRW_3DSTATE_LINE_STIPPLE BRW_3D(3, 1, 8)
@@ -44,6 +51,91 @@
#define BRW_3DPRIMITIVE BRW_3D(3, 3, 0)
+#define BRW_3DSTATE_CLEAR_PARAMS BRW_3D(3, 1, 0x10)
+/* DW1 */
+# define BRW_3DSTATE_DEPTH_CLEAR_VALID (1 << 15)
+
+/* for GEN6+ */
+#define GEN6_3DSTATE_SAMPLER_STATE_POINTERS BRW_3D(3, 0, 0x02)
+# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS (1 << 12)
+# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_GS (1 << 9)
+# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_VS (1 << 8)
+
+#define GEN6_3DSTATE_URB BRW_3D(3, 0, 0x05)
+/* DW1 */
+# define GEN6_3DSTATE_URB_VS_SIZE_SHIFT 16
+# define GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT 0
+/* DW2 */
+# define GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT 8
+# define GEN6_3DSTATE_URB_GS_SIZE_SHIFT 0
+
+#define GEN6_3DSTATE_VIEWPORT_STATE_POINTERS BRW_3D(3, 0, 0x0d)
+# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC (1 << 12)
+# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_SF (1 << 11)
+# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CLIP (1 << 10)
+
+#define GEN6_3DSTATE_CC_STATE_POINTERS BRW_3D(3, 0, 0x0e)
+
+#define GEN6_3DSTATE_VS BRW_3D(3, 0, 0x10)
+
+#define GEN6_3DSTATE_GS BRW_3D(3, 0, 0x11)
+/* DW4 */
+# define GEN6_3DSTATE_GS_DISPATCH_START_GRF_SHIFT 0
+
+#define GEN6_3DSTATE_CLIP BRW_3D(3, 0, 0x12)
+
+#define GEN6_3DSTATE_SF BRW_3D(3, 0, 0x13)
+/* DW1 */
+# define GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT 22
+# define GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT 11
+# define GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT 4
+/* DW2 */
+/* DW3 */
+# define GEN6_3DSTATE_SF_CULL_BOTH (0 << 29)
+# define GEN6_3DSTATE_SF_CULL_NONE (1 << 29)
+# define GEN6_3DSTATE_SF_CULL_FRONT (2 << 29)
+# define GEN6_3DSTATE_SF_CULL_BACK (3 << 29)
+/* DW4 */
+# define GEN6_3DSTATE_SF_TRI_PROVOKE_SHIFT 29
+# define GEN6_3DSTATE_SF_LINE_PROVOKE_SHIFT 27
+# define GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT 25
+
+
+#define GEN6_3DSTATE_WM BRW_3D(3, 0, 0x14)
+/* DW2 */
+# define GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF 27
+# define GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
+/* DW4 */
+# define GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT 16
+/* DW5 */
+# define GEN6_3DSTATE_WM_MAX_THREADS_SHIFT 25
+# define GEN6_3DSTATE_WM_DISPATCH_ENABLE (1 << 19)
+# define GEN6_3DSTATE_WM_16_DISPATCH_ENABLE (1 << 1)
+# define GEN6_3DSTATE_WM_8_DISPATCH_ENABLE (1 << 0)
+/* DW6 */
+# define GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT 20
+# define GEN6_3DSTATE_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15)
+# define GEN6_3DSTATE_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14)
+# define GEN6_3DSTATE_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13)
+# define GEN6_3DSTATE_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12)
+# define GEN6_3DSTATE_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11)
+# define GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10)
+
+
+#define GEN6_3DSTATE_CONSTANT_VS BRW_3D(3, 0, 0x15)
+#define GEN6_3DSTATE_CONSTANT_GS BRW_3D(3, 0, 0x16)
+#define GEN6_3DSTATE_CONSTANT_PS BRW_3D(3, 0, 0x17)
+
+#define GEN6_3DSTATE_SAMPLE_MASK BRW_3D(3, 0, 0x18)
+
+#define GEN6_3DSTATE_MULTISAMPLE BRW_3D(3, 1, 0x0d)
+/* DW1 */
+# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER (0 << 4)
+# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_UPPER_LEFT (1 << 4)
+# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1 (0 << 1)
+# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_4 (2 << 1)
+# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_8 (3 << 1)
+
#define PIPELINE_SELECT_3D 0
#define PIPELINE_SELECT_MEDIA 1
@@ -80,16 +172,22 @@
#define BRW_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8)
#define BRW_PIPE_CONTROL_GLOBAL_GTT (1 << 2)
#define BRW_PIPE_CONTROL_LOCAL_PGTT (0 << 2)
+#define BRW_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0)
/* VERTEX_BUFFER_STATE Structure */
#define VB0_BUFFER_INDEX_SHIFT 27
+#define GEN6_VB0_BUFFER_INDEX_SHIFT 26
#define VB0_VERTEXDATA (0 << 26)
#define VB0_INSTANCEDATA (1 << 26)
+#define GEN6_VB0_VERTEXDATA (0 << 20)
+#define GEN6_VB0_INSTANCEDATA (1 << 20)
#define VB0_BUFFER_PITCH_SHIFT 0
/* VERTEX_ELEMENT_STATE Structure */
#define VE0_VERTEX_BUFFER_INDEX_SHIFT 27
+#define GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT 26 /* for GEN6 */
#define VE0_VALID (1 << 26)
+#define GEN6_VE0_VALID (1 << 25) /* for GEN6 */
#define VE0_FORMAT_SHIFT 16
#define VE0_OFFSET_SHIFT 0
#define VE1_VFCOMPONENT_0_SHIFT 28
diff --git a/src/i965_video.c b/src/i965_video.c
index aaf10fa..6acac36 100644
--- a/src/i965_video.c
+++ b/src/i965_video.c
@@ -134,6 +134,21 @@ static const uint32_t ps_kernel_planar_static_gen5[][4] = {
#include "exa_wm_write.g4b.gen5"
};
+/* programs for Sandybridge */
+static const uint32_t ps_kernel_packed_static_gen6[][4] = {
+#include "exa_wm_src_affine.g6b"
+#include "exa_wm_src_sample_argb.g6b"
+#include "exa_wm_yuv_rgb.g6b"
+#include "exa_wm_write.g6b"
+};
+
+static const uint32_t ps_kernel_planar_static_gen6[][4] = {
+#include "exa_wm_src_affine.g6b"
+#include "exa_wm_src_sample_planar.g6b"
+#include "exa_wm_yuv_rgb.g6b"
+#include "exa_wm_write.g6b"
+};
+
static uint32_t float_to_uint(float f)
{
union {
@@ -1256,4 +1271,613 @@ void i965_free_video(ScrnInfoPtr scrn)
intel->video.gen4_sampler_bo = NULL;
drm_intel_bo_unreference(intel->video.gen4_sip_kernel_bo);
intel->video.gen4_sip_kernel_bo = NULL;
+ drm_intel_bo_unreference(intel->video.wm_prog_packed_bo);
+ intel->video.wm_prog_packed_bo = NULL;
+ drm_intel_bo_unreference(intel->video.wm_prog_planar_bo);
+ intel->video.wm_prog_planar_bo = NULL;
+ drm_intel_bo_unreference(intel->video.gen6_blend_bo);
+ intel->video.gen6_blend_bo = NULL;
+ drm_intel_bo_unreference(intel->video.gen6_depth_stencil_bo);
+ intel->video.gen6_depth_stencil_bo = NULL;
+}
+
+/* for GEN6+ */
+static drm_intel_bo *
+gen6_create_cc_state(ScrnInfoPtr scrn)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+ struct gen6_color_calc_state *cc_state;
+ drm_intel_bo *cc_bo;
+
+ if (intel_alloc_and_map(
+ intel,
+ "textured video cc state",
+ sizeof(*cc_state),
+ &cc_bo,
+ &cc_state) != 0)
+ return NULL;
+
+ cc_state->constant_r = 1.0;
+ cc_state->constant_g = 0.0;
+ cc_state->constant_b = 1.0;
+ cc_state->constant_a = 1.0;
+
+ return cc_bo;
+}
+
+static drm_intel_bo *
+gen6_create_blend_state(ScrnInfoPtr scrn)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+ struct gen6_blend_state *blend_state;
+ drm_intel_bo *blend_bo;
+
+ if (intel_alloc_and_map(
+ intel,
+ "textured video blend state",
+ sizeof(*blend_state),
+ &blend_bo,
+ &blend_state) != 0)
+ return NULL;
+
+ blend_state->blend1.logic_op_enable = 1;
+ blend_state->blend1.logic_op_func = 0xc;
+
+ return blend_bo;
+}
+
+static drm_intel_bo *
+gen6_create_depth_stencil_state(ScrnInfoPtr scrn)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+ struct gen6_depth_stencil_state *depth_stencil_state;
+ drm_intel_bo *depth_stencil_bo;
+
+ if (intel_alloc_and_map(
+ intel,
+ "textured video blend state",
+ sizeof(*depth_stencil_state),
+ &depth_stencil_bo,
+ &depth_stencil_state) != 0)
+ return NULL;
+
+ return depth_stencil_bo;
+}
+
+static Bool
+gen6_create_vidoe_objects(ScrnInfoPtr scrn)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+
+ if (intel->video.gen4_sampler_bo == NULL)
+ intel->video.gen4_sampler_bo = i965_create_sampler_state(scrn);
+
+ if (intel->video.wm_prog_packed_bo == NULL)
+ intel->video.wm_prog_packed_bo =
+ i965_create_program(scrn,
+ &ps_kernel_packed_static_gen6[0][0],
+ sizeof(ps_kernel_packed_static_gen6));
+
+ if (intel->video.wm_prog_planar_bo == NULL)
+ intel->video.wm_prog_planar_bo =
+ i965_create_program(scrn,
+ &ps_kernel_planar_static_gen6[0][0],
+ sizeof(ps_kernel_planar_static_gen6));
+
+ if (intel->video.gen4_cc_vp_bo == NULL)
+ intel->video.gen4_cc_vp_bo = i965_create_cc_vp_state(scrn);
+
+ if (intel->video.gen4_cc_bo == NULL)
+ intel->video.gen4_cc_bo = gen6_create_cc_state(scrn);
+
+ if (intel->video.gen6_blend_bo == NULL)
+ intel->video.gen6_blend_bo = gen6_create_blend_state(scrn);
+
+ if (intel->video.gen6_depth_stencil_bo == NULL)
+ intel->video.gen6_depth_stencil_bo = gen6_create_depth_stencil_state(scrn);
+
+
+ return (intel->video.gen4_sampler_bo != NULL &&
+ intel->video.wm_prog_packed_bo != NULL &&
+ intel->video.wm_prog_planar_bo != NULL &&
+ intel->video.gen4_cc_vp_bo != NULL &&
+ intel->video.gen4_cc_bo != NULL &&
+ intel->video.gen6_blend_bo != NULL &&
+ intel->video.gen6_depth_stencil_bo != NULL);
+}
+
+static void
+gen6_upload_invarient_states(ScrnInfoPtr scrn)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+
+ OUT_BATCH(BRW_PIPE_CONTROL | (4 - 2));
+ OUT_BATCH(BRW_PIPE_CONTROL_IS_FLUSH |
+ BRW_PIPE_CONTROL_WC_FLUSH |
+ BRW_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ BRW_PIPE_CONTROL_NOWRITE);
+ OUT_BATCH(0); /* write address */
+ OUT_BATCH(0); /* write data */
+
+ OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
+
+ OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
+ OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
+ GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
+ OUT_BATCH(0);
+
+ OUT_BATCH(GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
+ OUT_BATCH(1);
+
+ /* Set system instruction pointer */
+ OUT_BATCH(BRW_STATE_SIP | 0);
+ OUT_BATCH(0);
+}
+
+static void
+gen6_upload_state_base_address(ScrnInfoPtr scrn, drm_intel_bo *surface_state_binding_table_bo)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+
+ OUT_BATCH(BRW_STATE_BASE_ADDRESS | (10 - 2));
+ OUT_BATCH(BASE_ADDRESS_MODIFY); /* General state base address */
+ OUT_RELOC(surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
+ OUT_BATCH(BASE_ADDRESS_MODIFY); /* Dynamic state base address */
+ OUT_BATCH(BASE_ADDRESS_MODIFY); /* Indirect object base address */
+ OUT_BATCH(BASE_ADDRESS_MODIFY); /* Instruction base address */
+ OUT_BATCH(BASE_ADDRESS_MODIFY); /* General state upper bound */
+ OUT_BATCH(BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
+ OUT_BATCH(BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
+ OUT_BATCH(BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
+}
+
+static void
+gen6_upload_viewport_state_pointers(ScrnInfoPtr scrn)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+
+ OUT_BATCH(GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
+ GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
+ (4 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_RELOC(intel->video.gen4_cc_vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+}
+
+static void
+gen6_upload_urb(ScrnInfoPtr scrn)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+
+ OUT_BATCH(GEN6_3DSTATE_URB | (3 - 2));
+ OUT_BATCH(((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
+ (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
+ OUT_BATCH((0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
+ (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
+}
+
+static void
+gen6_upload_cc_state_pointers(ScrnInfoPtr scrn)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+
+ OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
+ OUT_RELOC(intel->video.gen6_blend_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+ OUT_RELOC(intel->video.gen6_depth_stencil_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+ OUT_RELOC(intel->video.gen4_cc_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+}
+
+static void
+gen6_upload_sampler_state_pointers(ScrnInfoPtr scrn)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+
+ OUT_BATCH(GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
+ GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
+ (4 - 2));
+ OUT_BATCH(0); /* VS */
+ OUT_BATCH(0); /* GS */
+ OUT_RELOC(intel->video.gen4_sampler_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+}
+
+static void
+gen6_upload_binding_table(ScrnInfoPtr scrn, uint32_t ps_binding_table_offset)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+
+ /* Binding table pointers */
+ OUT_BATCH(BRW_3DSTATE_BINDING_TABLE_POINTERS |
+ GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS |
+ (4 - 2));
+ OUT_BATCH(0); /* vs */
+ OUT_BATCH(0); /* gs */
+ /* Only the PS uses the binding table */
+ OUT_BATCH(ps_binding_table_offset);
+}
+
+static void
+gen6_upload_depth_buffer_state(ScrnInfoPtr scrn)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+
+ OUT_BATCH(BRW_3DSTATE_DEPTH_BUFFER | (7 - 2));
+ OUT_BATCH((BRW_SURFACE_NULL << BRW_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT) |
+ (BRW_DEPTHFORMAT_D32_FLOAT << BRW_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+
+ OUT_BATCH(BRW_3DSTATE_CLEAR_PARAMS | (2 - 2));
+ OUT_BATCH(0);
+}
+
+static void
+gen6_upload_drawing_rectangle(ScrnInfoPtr scrn)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+
+ OUT_BATCH(BRW_3DSTATE_DRAWING_RECTANGLE | 2);
+ OUT_BATCH(0x00000000); /* ymin, xmin */
+ OUT_BATCH((scrn->virtualX - 1) | (scrn->virtualY - 1) << 16); /* ymax, xmax */
+ OUT_BATCH(0x00000000); /* yorigin, xorigin */
+}
+
+static void
+gen6_upload_vs_state(ScrnInfoPtr scrn)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+
+ /* disable VS constant buffer */
+ OUT_BATCH(GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+
+ OUT_BATCH(GEN6_3DSTATE_VS | (6 - 2));
+ OUT_BATCH(0); /* without VS kernel */
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0); /* pass-through */
+}
+
+static void
+gen6_upload_gs_state(ScrnInfoPtr scrn)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+
+ /* disable GS constant buffer */
+ OUT_BATCH(GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+
+ OUT_BATCH(GEN6_3DSTATE_GS | (7 - 2));
+ OUT_BATCH(0); /* without GS kernel */
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0); /* pass-through */
+}
+
+static void
+gen6_upload_clip_state(ScrnInfoPtr scrn)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+
+ OUT_BATCH(GEN6_3DSTATE_CLIP | (4 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(0); /* pass-through */
+ OUT_BATCH(0);
+}
+
+static void
+gen6_upload_sf_state(ScrnInfoPtr scrn)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+
+ OUT_BATCH(GEN6_3DSTATE_SF | (20 - 2));
+ OUT_BATCH((1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
+ (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
+ (0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
+ OUT_BATCH(0);
+ OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE);
+ OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0); /* DW9 */
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0); /* DW14 */
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0); /* DW19 */
+}
+
+static void
+gen6_upload_wm_state(ScrnInfoPtr scrn, Bool is_packed)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+
+ /* disable WM constant buffer */
+ OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (5 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+
+ OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2));
+ if (is_packed) {
+ OUT_RELOC(intel->video.wm_prog_packed_bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0);
+ OUT_BATCH((1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
+ (2 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+ } else {
+ OUT_RELOC(intel->video.wm_prog_planar_bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0);
+ OUT_BATCH((1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
+ (7 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+ }
+ OUT_BATCH(0);
+ OUT_BATCH((6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
+ OUT_BATCH(((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
+ GEN6_3DSTATE_WM_DISPATCH_ENABLE |
+ GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
+ OUT_BATCH((1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
+ GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+}
+
+static void
+gen6_upload_vertex_element_state(ScrnInfoPtr scrn)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+
+ /* Set up our vertex elements, sourced from the single vertex buffer. */
+ OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | (5 - 2));
+ /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
+ OUT_BATCH((0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
+ GEN6_VE0_VALID |
+ (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
+ (0 << VE0_OFFSET_SHIFT));
+ OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
+ /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
+ OUT_BATCH((0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
+ GEN6_VE0_VALID |
+ (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
+ (8 << VE0_OFFSET_SHIFT));
+ OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
+}
+
+static void
+gen6_emit_video_setup(ScrnInfoPtr scrn, drm_intel_bo *surface_state_binding_table_bo, int n_src_surf)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+
+ assert(n_src_surf == 1 || n_src_surf == 6);
+ IntelEmitInvarientState(scrn);
+ intel->last_3d = LAST_3D_VIDEO;
+
+ gen6_upload_invarient_states(scrn);
+ gen6_upload_state_base_address(scrn, surface_state_binding_table_bo);
+ gen6_upload_viewport_state_pointers(scrn);
+ gen6_upload_urb(scrn);
+ gen6_upload_cc_state_pointers(scrn);
+ gen6_upload_sampler_state_pointers(scrn);
+ gen6_upload_vs_state(scrn);
+ gen6_upload_gs_state(scrn);
+ gen6_upload_clip_state(scrn);
+ gen6_upload_sf_state(scrn);
+ gen6_upload_wm_state(scrn, n_src_surf == 1 ? TRUE : FALSE);
+ gen6_upload_binding_table(scrn, (n_src_surf + 1) * ALIGN(sizeof(struct brw_surface_state), 32));;
+ gen6_upload_depth_buffer_state(scrn);
+ gen6_upload_drawing_rectangle(scrn);
+ gen6_upload_vertex_element_state(scrn);
+}
+
+void Gen6DisplayVideoTextured(ScrnInfoPtr scrn,
+ intel_adaptor_private *adaptor_priv, int id,
+ RegionPtr dstRegion,
+ short width, short height,
+ int video_pitch, int video_pitch2,
+ short src_w, short src_h,
+ short drw_w, short drw_h, PixmapPtr pixmap)
+{
+ intel_screen_private *intel = intel_get_screen_private(scrn);
+ BoxPtr pbox;
+ int nbox, dxo, dyo, pix_xoff, pix_yoff;
+ float src_scale_x, src_scale_y;
+ int src_surf;
+ int n_src_surf;
+ uint32_t src_surf_format;
+ uint32_t src_surf_base[6];
+ int src_width[6];
+ int src_height[6];
+ int src_pitch[6];
+ drm_intel_bo *surface_state_binding_table_bo;
+
+ src_surf_base[0] = adaptor_priv->YBufOffset;
+ src_surf_base[1] = adaptor_priv->YBufOffset;
+ src_surf_base[2] = adaptor_priv->VBufOffset;
+ src_surf_base[3] = adaptor_priv->VBufOffset;
+ src_surf_base[4] = adaptor_priv->UBufOffset;
+ src_surf_base[5] = adaptor_priv->UBufOffset;
+
+ if (is_planar_fourcc(id)) {
+ src_surf_format = BRW_SURFACEFORMAT_R8_UNORM;
+ src_width[1] = src_width[0] = width;
+ src_height[1] = src_height[0] = height;
+ src_pitch[1] = src_pitch[0] = video_pitch2;
+ src_width[4] = src_width[5] = src_width[2] = src_width[3] =
+ width / 2;
+ src_height[4] = src_height[5] = src_height[2] = src_height[3] =
+ height / 2;
+ src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] =
+ video_pitch;
+ n_src_surf = 6;
+ } else {
+ if (id == FOURCC_UYVY)
+ src_surf_format = BRW_SURFACEFORMAT_YCRCB_SWAPY;
+ else
+ src_surf_format = BRW_SURFACEFORMAT_YCRCB_NORMAL;
+
+ src_width[0] = width;
+ src_height[0] = height;
+ src_pitch[0] = video_pitch;
+ n_src_surf = 1;
+ }
+
+ surface_state_binding_table_bo =
+ drm_intel_bo_alloc(intel->bufmgr,
+ "surface state & binding table",
+ (n_src_surf + 1) * (ALIGN(sizeof(struct brw_surface_state), 32) + sizeof(uint32_t)),
+ 4096);
+
+ if (!surface_state_binding_table_bo)
+ return;
+
+ i965_create_dst_surface_state(scrn, pixmap, surface_state_binding_table_bo, 0);
+
+ for (src_surf = 0; src_surf < n_src_surf; src_surf++) {
+ i965_create_src_surface_state(scrn,
+ adaptor_priv->buf,
+ src_surf_base[src_surf],
+ src_width[src_surf],
+ src_height[src_surf],
+ src_pitch[src_surf],
+ src_surf_format,
+ surface_state_binding_table_bo,
+ (src_surf + 1) * ALIGN(sizeof(struct brw_surface_state), 32));
+ }
+
+ i965_create_binding_table(scrn, surface_state_binding_table_bo, n_src_surf + 1);
+
+ if (!gen6_create_vidoe_objects(scrn)) {
+ drm_intel_bo_unreference(surface_state_binding_table_bo);
+ return;
+ }
+
+ /* Set up the offset for translating from the given region (in screen
+ * coordinates) to the backing pixmap.
+ */
+#ifdef COMPOSITE
+ pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
+ pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
+#else
+ pix_xoff = 0;
+ pix_yoff = 0;
+#endif
+
+ dxo = dstRegion->extents.x1;
+ dyo = dstRegion->extents.y1;
+
+ /* Use normalized texture coordinates */
+ src_scale_x = ((float)src_w / width) / (float)drw_w;
+ src_scale_y = ((float)src_h / height) / (float)drw_h;
+
+ pbox = REGION_RECTS(dstRegion);
+ nbox = REGION_NUM_RECTS(dstRegion);
+ while (nbox--) {
+ int box_x1 = pbox->x1;
+ int box_y1 = pbox->y1;
+ int box_x2 = pbox->x2;
+ int box_y2 = pbox->y2;
+ int i;
+ drm_intel_bo *vb_bo;
+ float *vb;
+ drm_intel_bo *bo_table[] = {
+ NULL, /* vb_bo */
+ intel->batch_bo,
+ surface_state_binding_table_bo,
+ intel->video.gen4_sampler_bo,
+ intel->video.wm_prog_packed_bo,
+ intel->video.wm_prog_planar_bo,
+ intel->video.gen4_cc_vp_bo,
+ intel->video.gen4_cc_bo,
+ intel->video.gen6_blend_bo,
+ intel->video.gen6_depth_stencil_bo,
+ };
+
+ pbox++;
+
+ if (intel_alloc_and_map(intel, "textured video vb", 4096,
+ &vb_bo, &vb) != 0)
+ break;
+ bo_table[0] = vb_bo;
+
+ i = 0;
+ vb[i++] = (box_x2 - dxo) * src_scale_x;
+ vb[i++] = (box_y2 - dyo) * src_scale_y;
+ vb[i++] = (float)box_x2 + pix_xoff;
+ vb[i++] = (float)box_y2 + pix_yoff;
+
+ vb[i++] = (box_x1 - dxo) * src_scale_x;
+ vb[i++] = (box_y2 - dyo) * src_scale_y;
+ vb[i++] = (float)box_x1 + pix_xoff;
+ vb[i++] = (float)box_y2 + pix_yoff;
+
+ vb[i++] = (box_x1 - dxo) * src_scale_x;
+ vb[i++] = (box_y1 - dyo) * src_scale_y;
+ vb[i++] = (float)box_x1 + pix_xoff;
+ vb[i++] = (float)box_y1 + pix_yoff;
+
+ drm_intel_bo_unmap(vb_bo);
+
+ /* If this command won't fit in the current batch, flush.
+ * Assume that it does after being flushed.
+ */
+ if (drm_intel_bufmgr_check_aperture_space(bo_table, ARRAY_SIZE(bo_table)) < 0)
+ intel_batch_submit(scrn, FALSE);
+
+ intel_batch_start_atomic(scrn, 200);
+ gen6_emit_video_setup(scrn, surface_state_binding_table_bo, n_src_surf);
+
+ /* Set up the pointer to our vertex buffer */
+ OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | (5 - 2));
+ /* four 32-bit floats per vertex */
+ OUT_BATCH((0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
+ GEN6_VB0_VERTEXDATA |
+ ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
+ OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, 0);
+ OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, i * 4);
+ OUT_BATCH(0); /* reserved */
+
+ OUT_BATCH(BRW_3DPRIMITIVE |
+ BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL |
+ (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) |
+ (0 << 9) | /* Internal Vertex Count */
+ (6 - 2));
+ OUT_BATCH(3); /* vertex count per instance */
+ OUT_BATCH(0); /* start vertex offset */
+ OUT_BATCH(1); /* single instance */
+ OUT_BATCH(0); /* start instance location */
+ OUT_BATCH(0); /* index buffer offset, ignored */
+
+ intel_batch_end_atomic(scrn);
+ drm_intel_bo_unreference(vb_bo);
+ }
+
+ /* release reference once we're finished */
+ drm_intel_bo_unreference(surface_state_binding_table_bo);
+ intel_debug_flush(scrn);
}
diff --git a/src/intel.h b/src/intel.h
index 6b05997..c748a41 100644
--- a/src/intel.h
+++ b/src/intel.h
@@ -369,6 +369,10 @@ typedef struct intel_screen_private {
drm_intel_bo *gen4_cc_vp_bo;
drm_intel_bo *gen4_sampler_bo;
drm_intel_bo *gen4_sip_kernel_bo;
+ drm_intel_bo *wm_prog_packed_bo;
+ drm_intel_bo *wm_prog_planar_bo;
+ drm_intel_bo *gen6_blend_bo;
+ drm_intel_bo *gen6_depth_stencil_bo;
} video;
/* Render accel state */
diff --git a/src/intel_batchbuffer.c b/src/intel_batchbuffer.c
index e7ca69d..01cb193 100644
--- a/src/intel_batchbuffer.c
+++ b/src/intel_batchbuffer.c
@@ -38,6 +38,7 @@
#include "intel.h"
#include "i830_reg.h"
#include "i915_drm.h"
+#include "i965_reg.h"
#define DUMP_BATCHBUFFERS NULL /* "/tmp/i915-batchbuffers.dump" */
@@ -146,14 +147,26 @@ void intel_batch_emit_flush(ScrnInfoPtr scrn)
assert (!intel->in_batch_atomic);
- /* Big hammer, look to the pipelined flushes in future. */
- flags = MI_WRITE_DIRTY_STATE | MI_INVALIDATE_MAP_CACHE;
- if (INTEL_INFO(intel)->gen >= 40)
+ if ((INTEL_INFO(intel)->gen >= 60)) {
+ BEGIN_BATCH(4);
+ OUT_BATCH(BRW_PIPE_CONTROL | (4 - 2));
+ OUT_BATCH(BRW_PIPE_CONTROL_IS_FLUSH |
+ BRW_PIPE_CONTROL_WC_FLUSH |
+ BRW_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ BRW_PIPE_CONTROL_NOWRITE);
+ OUT_BATCH(0); /* write address */
+ OUT_BATCH(0); /* write data */
+ ADVANCE_BATCH();
+ } else {
+ /* Big hammer, look to the pipelined flushes in future. */
+ flags = MI_WRITE_DIRTY_STATE | MI_INVALIDATE_MAP_CACHE;
+ if (INTEL_INFO(intel)->gen >= 40)
flags = 0;
- BEGIN_BATCH(1);
- OUT_BATCH(MI_FLUSH | flags);
- ADVANCE_BATCH();
+ BEGIN_BATCH(1);
+ OUT_BATCH(MI_FLUSH | flags);
+ ADVANCE_BATCH();
+ }
intel_batch_do_flush(scrn);
}
diff --git a/src/intel_video.h b/src/intel_video.h
index 5920d30..f405d40 100644
--- a/src/intel_video.h
+++ b/src/intel_video.h
@@ -81,6 +81,13 @@ void I965DisplayVideoTextured(ScrnInfoPtr scrn,
short src_w, short src_h,
short drw_w, short drw_h, PixmapPtr pixmap);
+void Gen6DisplayVideoTextured(ScrnInfoPtr scrn,
+ intel_adaptor_private *adaptor_priv,
+ int id, RegionPtr dstRegion, short width,
+ short height, int video_pitch, int video_pitch2,
+ short src_w, short src_h,
+ short drw_w, short drw_h, PixmapPtr pixmap);
+
void i965_free_video(ScrnInfoPtr scrn);
int is_planar_fourcc(int id);
--
1.7.0.4
More information about the Intel-gfx
mailing list