[Intel-gfx] [PATCH] Move i965 video wm and sampler state to BOs.

Eric Anholt eric at anholt.net
Fri Dec 5 00:23:37 CET 2008


---
 src/i830.h       |    3 +
 src/i965_video.c |  136 ++++++++++++++++++++++++++++++++++++------------------
 2 files changed, 94 insertions(+), 45 deletions(-)

diff --git a/src/i830.h b/src/i830.h
index 024c72b..1af750d 100644
--- a/src/i830.h
+++ b/src/i830.h
@@ -572,8 +572,11 @@ typedef struct _I830Rec {
    struct {
       drm_intel_bo *gen4_vs_bo;
       drm_intel_bo *gen4_sf_bo;
+      drm_intel_bo *gen4_wm_packed_bo;
+      drm_intel_bo *gen4_wm_planar_bo;
       drm_intel_bo *gen4_cc_bo;
       drm_intel_bo *gen4_cc_vp_bo;
+      drm_intel_bo *gen4_sampler_bo;
    } video;
 #endif
 
diff --git a/src/i965_video.c b/src/i965_video.c
index 32ff330..19f2f06 100644
--- a/src/i965_video.c
+++ b/src/i965_video.c
@@ -409,10 +409,19 @@ i965_set_src_surface_state(ScrnInfoPtr scrn,
     src_surf_state->ss3.pitch = src_pitch - 1;
 }
 
-static void
-i965_set_sampler_state(ScrnInfoPtr scrn,
-		       struct brw_sampler_state *sampler_state)
+static drm_intel_bo *
+i965_create_sampler_state(ScrnInfoPtr scrn)
 {
+    I830Ptr pI830 = I830PTR(scrn);
+    drm_intel_bo *sampler_bo;
+    struct brw_sampler_state *sampler_state;
+
+    sampler_bo = drm_intel_bo_alloc(pI830->bufmgr,
+				    "textured video sampler state",
+				    4096, 4096);
+    drm_intel_bo_map(sampler_bo, TRUE);
+    sampler_state = sampler_bo->virtual;
+
     memset(sampler_state, 0, sizeof(struct brw_sampler_state));
 
     sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR;
@@ -420,6 +429,9 @@ i965_set_sampler_state(ScrnInfoPtr scrn,
     sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
     sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
     sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+
+    drm_intel_bo_unmap(sampler_bo);
+    return sampler_bo;
 }
 
 static drm_intel_bo *
@@ -446,6 +458,20 @@ i965_create_vs_state(ScrnInfoPtr scrn)
 }
 
 static drm_intel_bo *
+i965_create_program(ScrnInfoPtr scrn, const uint32_t *program,
+		    unsigned int program_size)
+{
+    I830Ptr pI830 = I830PTR(scrn);
+    drm_intel_bo *prog_bo;
+
+    prog_bo = drm_intel_bo_alloc(pI830->bufmgr, "textured video program",
+				 program_size, 4096);
+    drm_intel_bo_subdata(prog_bo, 0, program_size, program);
+
+    return prog_bo;
+}
+
+static drm_intel_bo *
 i965_create_sf_state(ScrnInfoPtr scrn, uint32_t sf_kernel_offset)
 {
     I830Ptr pI830 = I830PTR(scrn);
@@ -494,16 +520,37 @@ i965_create_sf_state(ScrnInfoPtr scrn, uint32_t sf_kernel_offset)
     return sf_bo;
 }
 
-static void
-i965_set_wm_state(ScrnInfoPtr scrn, struct brw_wm_unit_state *wm_state,
-		  uint32_t ps_kernel_offset,
-		  uint32_t sampler_offset, int n_src_surf)
+static drm_intel_bo *
+i965_create_wm_state(ScrnInfoPtr scrn, drm_intel_bo *sampler_bo, Bool is_packed)
 {
+    I830Ptr pI830 = I830PTR(scrn);
+    drm_intel_bo *wm_bo, *kernel_bo;
+    struct brw_wm_unit_state *wm_state;
+
+    if (is_packed) {
+	kernel_bo = i965_create_program(scrn, &ps_kernel_packed_static[0][0],
+					sizeof(ps_kernel_packed_static));
+    } else {
+	kernel_bo = i965_create_program(scrn, &ps_kernel_planar_static[0][0],
+					sizeof(ps_kernel_planar_static));
+    }
+
+    wm_bo = drm_intel_bo_alloc(pI830->bufmgr, "textured video wm state",
+			       4096, 4096);
+    drm_intel_bo_map(wm_bo, TRUE);
+    wm_state = wm_bo->virtual;
+
     memset(wm_state, 0, sizeof (*wm_state));
-    wm_state->thread0.kernel_start_pointer = ps_kernel_offset >> 6;
     wm_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
+    wm_state->thread0.kernel_start_pointer =
+	intel_emit_reloc(wm_bo, offsetof(struct brw_wm_unit_state, thread0),
+			 kernel_bo, wm_state->thread0.grf_reg_count << 1,
+			 I915_GEM_DOMAIN_INSTRUCTION, 0) >> 6;
     wm_state->thread1.single_program_flow = 1; /* XXX */
-    wm_state->thread1.binding_table_entry_count = 1 + n_src_surf;
+    if (is_packed)
+	wm_state->thread1.binding_table_entry_count = 2;
+    else
+	wm_state->thread1.binding_table_entry_count = 7;
     /* Though we never use the scratch space in our WM kernel, it has to be
      * set, and the minimum allocation is 1024 bytes.
      */
@@ -515,13 +562,21 @@ i965_set_wm_state(ScrnInfoPtr scrn, struct brw_wm_unit_state *wm_state,
     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
     wm_state->wm4.stats_enable = 1;
-    wm_state->wm4.sampler_state_pointer = sampler_offset >> 5;
+    wm_state->wm4.sampler_state_pointer =
+	intel_emit_reloc(wm_bo, offsetof(struct brw_wm_unit_state, wm4),
+			 sampler_bo, 0,
+			 I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5;
     wm_state->wm4.sampler_count = 1; /* 1-4 samplers used */
     wm_state->wm5.max_threads = PS_MAX_THREADS - 1;
     wm_state->wm5.thread_dispatch_enable = 1;
     wm_state->wm5.enable_16_pix = 1;
     wm_state->wm5.enable_8_pix = 0;
     wm_state->wm5.early_depth_test = 1;
+
+    drm_intel_bo_unreference(kernel_bo);
+
+    drm_intel_bo_unmap(wm_bo);
+    return wm_bo;
 }
 
 static drm_intel_bo *
@@ -603,9 +658,9 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
     float src_scale_x, src_scale_y;
     uint32_t *binding_table;
     Bool first_output = TRUE;
-    int dest_surf_offset, src_surf_offset[6], sampler_offset[6];
-    int wm_offset, vb_offset;
-    int sf_kernel_offset, ps_kernel_offset, sip_kernel_offset;
+    int dest_surf_offset, src_surf_offset[6];
+    int vb_offset;
+    int sf_kernel_offset, sip_kernel_offset;
     int binding_table_offset;
     int next_offset, total_state_size;
     int vb_size = (4 * 4) * 4; /* 4 DWORDS per vertex */
@@ -619,8 +674,6 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
     int		src_height[6];
     int		src_pitch[6];
     int wm_binding_table_entries;
-    const uint32_t	*ps_kernel_static;
-    int		ps_kernel_static_size;
 
 #if 0
     ErrorF("BroadwaterDisplayVideoTextured: %dx%d (pitch %d)\n", width, height,
@@ -650,16 +703,12 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
     case FOURCC_UYVY:
 	src_surf_format = BRW_SURFACEFORMAT_YCRCB_SWAPY;
 	n_src_surf = 1;
-	ps_kernel_static = &ps_kernel_packed_static[0][0];
-	ps_kernel_static_size = sizeof (ps_kernel_packed_static);
 	src_width[0] = width;
 	src_height[0] = height;
 	src_pitch[0] = video_pitch;
 	break;
     case FOURCC_YUY2:
 	src_surf_format = BRW_SURFACEFORMAT_YCRCB_NORMAL;
-	ps_kernel_static = &ps_kernel_packed_static[0][0];
-	ps_kernel_static_size = sizeof (ps_kernel_packed_static);
 	src_width[0] = width;
 	src_height[0] = height;
 	src_pitch[0] = video_pitch;
@@ -671,8 +720,6 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
     case FOURCC_I420:
     case FOURCC_YV12:
 	src_surf_format = BRW_SURFACEFORMAT_R8_UNORM;
-	ps_kernel_static = &ps_kernel_planar_static[0][0];
-	ps_kernel_static_size = sizeof (ps_kernel_planar_static);
 	src_width[1] = src_width[0] = width;
 	src_height[1] = src_height[0] = height;
 	src_pitch[1] = src_pitch[0] = video_pitch * 2;
@@ -692,21 +739,11 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
     next_offset = 0;
 
     /* Set up our layout of state in framebuffer.  First the general state: */
-    wm_offset = ALIGN(next_offset, 32);
-    next_offset = wm_offset + sizeof(struct brw_wm_unit_state);
-
     sf_kernel_offset = ALIGN(next_offset, 64);
     next_offset = sf_kernel_offset + sizeof (sf_kernel_static);
-    ps_kernel_offset = ALIGN(next_offset, 64);
-    next_offset = ps_kernel_offset + ps_kernel_static_size;
     sip_kernel_offset = ALIGN(next_offset, 64);
     next_offset = sip_kernel_offset + sizeof (sip_kernel_static);
 
-    for (src_surf = 0; src_surf < n_src_surf; src_surf++) {    
-	sampler_offset[src_surf] = ALIGN(next_offset, 32);
-	next_offset = sampler_offset[src_surf] + sizeof(struct brw_sampler_state);
-    }
-    
     /* Align VB to native size of elements, for safety */
     vb_offset = ALIGN(next_offset, 8);
     next_offset = vb_offset + vb_size;
@@ -738,11 +775,8 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
     binding_table = (void *)(state_base + binding_table_offset);
 
 #if 0
-    ErrorF("wm:            0x%08x\n", state_base_offset + wm_offset);
     ErrorF("sf kernel:     0x%08x\n", state_base_offset + sf_kernel_offset);
-    ErrorF("ps kernel:     0x%08x\n", state_base_offset + ps_kernel_offset);
     ErrorF("sip kernel:    0x%08x\n", state_base_offset + sip_kernel_offset);
-    ErrorF("src sampler:   0x%08x\n", state_base_offset + sampler_offset);
     ErrorF("vb:            0x%08x\n", state_base_offset + vb_offset);
     ErrorF("dst surf:      0x%08x\n", state_base_offset + dest_surf_offset);
     ErrorF("src surf:      0x%08x\n", state_base_offset + src_surf_offset);
@@ -770,8 +804,6 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
 	   sizeof(sip_kernel_static));
     memcpy(state_base + sf_kernel_offset, sf_kernel_static,
 	   sizeof(sf_kernel_static));
-    memcpy(state_base + ps_kernel_offset, ps_kernel_static,
-	   ps_kernel_static_size);
 
     i965_set_dst_surface_state(pScrn, (void *)(state_base +
 					       dest_surf_offset),
@@ -787,25 +819,28 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
 				   src_pitch[src_surf],
 				   src_surf_format);
 
-    for (src_surf = 0; src_surf < n_src_surf; src_surf++)
-	i965_set_sampler_state(pScrn, (void *)(state_base +
-					       sampler_offset[src_surf]));
-
     /* Set up a binding table for our surfaces.  Only the PS will use it */
     binding_table[0] = state_base_offset + dest_surf_offset;
     for (src_surf = 0; src_surf < n_src_surf; src_surf++)
 	binding_table[1 + src_surf] = state_base_offset + src_surf_offset[src_surf];
 
+    if (pI830->video.gen4_sampler_bo == NULL)
+	pI830->video.gen4_sampler_bo = i965_create_sampler_state(pScrn);
+
     if (pI830->video.gen4_vs_bo == NULL)
 	pI830->video.gen4_vs_bo = i965_create_vs_state(pScrn);
     if (pI830->video.gen4_sf_bo == NULL)
 	pI830->video.gen4_sf_bo = i965_create_sf_state(pScrn,
 						       state_base_offset +
 						       sf_kernel_offset);
-    i965_set_wm_state(pScrn, (void *)(state_base + wm_offset),
-		      state_base_offset + ps_kernel_offset,
-		      state_base_offset + sampler_offset[0],
-		      n_src_surf);
+    if (pI830->video.gen4_wm_packed_bo == NULL) {
+	pI830->video.gen4_wm_packed_bo =
+	    i965_create_wm_state(pScrn, pI830->video.gen4_sampler_bo, TRUE);
+    }
+    if (pI830->video.gen4_wm_planar_bo == NULL) {
+	pI830->video.gen4_wm_planar_bo =
+	    i965_create_wm_state(pScrn, pI830->video.gen4_sampler_bo, FALSE);
+    }
 
     if (pI830->video.gen4_cc_bo == NULL)
 	pI830->video.gen4_cc_bo = i965_create_cc_state(pScrn);
@@ -908,7 +943,12 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id,
        /* disable CLIP, resulting in passthrough */
        OUT_BATCH(BRW_CLIP_DISABLE);
        OUT_RELOC(pI830->video.gen4_sf_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
-       OUT_BATCH(state_base_offset + wm_offset);  /* 32 byte aligned */
+       if (n_src_surf == 1)
+	   OUT_RELOC(pI830->video.gen4_wm_packed_bo,
+		     I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+       else
+	   OUT_RELOC(pI830->video.gen4_wm_planar_bo,
+		     I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
        OUT_RELOC(pI830->video.gen4_cc_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
 
        /* URB fence */
@@ -1057,6 +1097,12 @@ i965_free_video(ScrnInfoPtr scrn)
     pI830->video.gen4_sf_bo = NULL;
     drm_intel_bo_unreference(pI830->video.gen4_cc_bo);
     pI830->video.gen4_cc_bo = NULL;
+    drm_intel_bo_unreference(pI830->video.gen4_wm_packed_bo);
+    pI830->video.gen4_wm_packed_bo = NULL;
+    drm_intel_bo_unreference(pI830->video.gen4_wm_planar_bo);
+    pI830->video.gen4_wm_planar_bo = NULL;
     drm_intel_bo_unreference(pI830->video.gen4_cc_vp_bo);
     pI830->video.gen4_cc_vp_bo = NULL;
+    drm_intel_bo_unreference(pI830->video.gen4_sampler_bo);
+    pI830->video.gen4_sampler_bo = NULL;
 }
-- 
1.5.6.5




More information about the Intel-gfx mailing list