[PATCH 7/7] drm/radeon/kms: skip db/cb/streamout checking when possible on r600
Marek Olšák
maraeo at gmail.com
Sun Mar 18 19:09:38 PDT 2012
Signed-off-by: Marek Olšák <maraeo at gmail.com>
---
drivers/gpu/drm/radeon/r600_cs.c | 270 +++++++++++++++++++++-----------------
1 files changed, 150 insertions(+), 120 deletions(-)
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index d9ebec3..0ec3f20 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -52,18 +52,18 @@ struct r600_cs_track {
struct radeon_bo *cb_color_bo[8];
u64 cb_color_bo_mc[8];
u32 cb_color_bo_offset[8];
- struct radeon_bo *cb_color_frag_bo[8];
- struct radeon_bo *cb_color_tile_bo[8];
+ struct radeon_bo *cb_color_frag_bo[8]; /* unused */
+ struct radeon_bo *cb_color_tile_bo[8]; /* unused */
u32 cb_color_info[8];
u32 cb_color_view[8];
- u32 cb_color_size_idx[8];
+ u32 cb_color_size_idx[8]; /* unused */
u32 cb_target_mask;
- u32 cb_shader_mask;
+ u32 cb_shader_mask; /* unused */
u32 cb_color_size[8];
u32 vgt_strmout_en;
u32 vgt_strmout_buffer_en;
struct radeon_bo *vgt_strmout_bo[4];
- u64 vgt_strmout_bo_mc[4];
+ u64 vgt_strmout_bo_mc[4]; /* unused */
u32 vgt_strmout_bo_offset[4];
u32 vgt_strmout_size[4];
u32 db_depth_control;
@@ -75,6 +75,9 @@ struct r600_cs_track {
struct radeon_bo *db_bo;
u64 db_bo_mc;
bool sx_misc_kill_all_prims;
+ bool cb_dirty;
+ bool db_dirty;
+ bool streamout_dirty;
};
#define FMT_8_BIT(fmt, vc) [fmt] = { 1, 1, 1, vc, CHIP_R600 }
@@ -308,6 +311,7 @@ static void r600_cs_track_init(struct r600_cs_track *track)
}
track->cb_target_mask = 0xFFFFFFFF;
track->cb_shader_mask = 0xFFFFFFFF;
+ track->cb_dirty = true;
track->db_bo = NULL;
track->db_bo_mc = 0xFFFFFFFF;
/* assume the biggest format and that htile is enabled */
@@ -316,6 +320,7 @@ static void r600_cs_track_init(struct r600_cs_track *track)
track->db_depth_size = 0xFFFFFFFF;
track->db_depth_size_idx = 0;
track->db_depth_control = 0xFFFFFFFF;
+ track->db_dirty = true;
for (i = 0; i < 4; i++) {
track->vgt_strmout_size[i] = 0;
@@ -323,6 +328,7 @@ static void r600_cs_track_init(struct r600_cs_track *track)
track->vgt_strmout_bo_offset[i] = 0xFFFFFFFF;
track->vgt_strmout_bo_mc[i] = 0xFFFFFFFF;
}
+ track->streamout_dirty = true;
track->sx_misc_kill_all_prims = false;
}
@@ -461,7 +467,7 @@ static int r600_cs_track_check(struct radeon_cs_parser *p)
return 0;
/* check streamout */
- if (track->vgt_strmout_en) {
+ if (track->streamout_dirty && track->vgt_strmout_en) {
for (i = 0; i < 4; i++) {
if (track->vgt_strmout_buffer_en & (1 << i)) {
if (track->vgt_strmout_bo[i]) {
@@ -479,6 +485,7 @@ static int r600_cs_track_check(struct radeon_cs_parser *p)
}
}
}
+ track->streamout_dirty = false;
}
if (track->sx_misc_kill_all_prims)
@@ -487,135 +494,142 @@ static int r600_cs_track_check(struct radeon_cs_parser *p)
/* check that we have a cb for each enabled target, we don't check
* shader_mask because it seems mesa isn't always setting it :(
*/
- tmp = track->cb_target_mask;
- for (i = 0; i < 8; i++) {
- if ((tmp >> (i * 4)) & 0xF) {
- /* at least one component is enabled */
- if (track->cb_color_bo[i] == NULL) {
- dev_warn(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n",
- __func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i);
- return -EINVAL;
+ if (track->cb_dirty) {
+ tmp = track->cb_target_mask;
+ for (i = 0; i < 8; i++) {
+ if ((tmp >> (i * 4)) & 0xF) {
+ /* at least one component is enabled */
+ if (track->cb_color_bo[i] == NULL) {
+ dev_warn(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n",
+ __func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i);
+ return -EINVAL;
+ }
+ /* perform rewrite of CB_COLOR[0-7]_SIZE */
+ r = r600_cs_track_validate_cb(p, i);
+ if (r)
+ return r;
}
- /* perform rewrite of CB_COLOR[0-7]_SIZE */
- r = r600_cs_track_validate_cb(p, i);
- if (r)
- return r;
}
+ track->cb_dirty = false;
}
- /* Check depth buffer */
- if (G_028800_STENCIL_ENABLE(track->db_depth_control) ||
- G_028800_Z_ENABLE(track->db_depth_control)) {
- u32 nviews, bpe, ntiles, size, slice_tile_max;
- u32 height, height_align, pitch, pitch_align, depth_align;
- u64 base_offset, base_align;
- struct array_mode_checker array_check;
- int array_mode;
-
- if (track->db_bo == NULL) {
- dev_warn(p->dev, "z/stencil with no depth buffer\n");
- return -EINVAL;
- }
- if (G_028010_TILE_SURFACE_ENABLE(track->db_depth_info)) {
- dev_warn(p->dev, "this kernel doesn't support z/stencil htile\n");
- return -EINVAL;
- }
- switch (G_028010_FORMAT(track->db_depth_info)) {
- case V_028010_DEPTH_16:
- bpe = 2;
- break;
- case V_028010_DEPTH_X8_24:
- case V_028010_DEPTH_8_24:
- case V_028010_DEPTH_X8_24_FLOAT:
- case V_028010_DEPTH_8_24_FLOAT:
- case V_028010_DEPTH_32_FLOAT:
- bpe = 4;
- break;
- case V_028010_DEPTH_X24_8_32_FLOAT:
- bpe = 8;
- break;
- default:
- dev_warn(p->dev, "z/stencil with invalid format %d\n", G_028010_FORMAT(track->db_depth_info));
- return -EINVAL;
- }
- if ((track->db_depth_size & 0xFFFFFC00) == 0xFFFFFC00) {
- if (!track->db_depth_size_idx) {
- dev_warn(p->dev, "z/stencil buffer size not set\n");
- return -EINVAL;
- }
- tmp = radeon_bo_size(track->db_bo) - track->db_offset;
- tmp = (tmp / bpe) >> 6;
- if (!tmp) {
- dev_warn(p->dev, "z/stencil buffer too small (0x%08X %d %d %ld)\n",
- track->db_depth_size, bpe, track->db_offset,
- radeon_bo_size(track->db_bo));
+
+ if (track->db_dirty) {
+ /* Check depth buffer */
+ if (G_028800_STENCIL_ENABLE(track->db_depth_control) ||
+ G_028800_Z_ENABLE(track->db_depth_control)) {
+ u32 nviews, bpe, ntiles, size, slice_tile_max;
+ u32 height, height_align, pitch, pitch_align, depth_align;
+ u64 base_offset, base_align;
+ struct array_mode_checker array_check;
+ int array_mode;
+
+ if (track->db_bo == NULL) {
+ dev_warn(p->dev, "z/stencil with no depth buffer\n");
return -EINVAL;
}
- ib[track->db_depth_size_idx] = S_028000_SLICE_TILE_MAX(tmp - 1) | (track->db_depth_size & 0x3FF);
- } else {
- size = radeon_bo_size(track->db_bo);
- /* pitch in pixels */
- pitch = (G_028000_PITCH_TILE_MAX(track->db_depth_size) + 1) * 8;
- slice_tile_max = G_028000_SLICE_TILE_MAX(track->db_depth_size) + 1;
- slice_tile_max *= 64;
- height = slice_tile_max / pitch;
- if (height > 8192)
- height = 8192;
- base_offset = track->db_bo_mc + track->db_offset;
- array_mode = G_028010_ARRAY_MODE(track->db_depth_info);
- array_check.array_mode = array_mode;
- array_check.group_size = track->group_size;
- array_check.nbanks = track->nbanks;
- array_check.npipes = track->npipes;
- array_check.nsamples = track->nsamples;
- array_check.blocksize = bpe;
- if (r600_get_array_mode_alignment(&array_check,
- &pitch_align, &height_align, &depth_align, &base_align)) {
- dev_warn(p->dev, "%s invalid tiling %d (0x%08X)\n", __func__,
- G_028010_ARRAY_MODE(track->db_depth_info),
- track->db_depth_info);
+ if (G_028010_TILE_SURFACE_ENABLE(track->db_depth_info)) {
+ dev_warn(p->dev, "this kernel doesn't support z/stencil htile\n");
return -EINVAL;
}
- switch (array_mode) {
- case V_028010_ARRAY_1D_TILED_THIN1:
- /* don't break userspace */
- height &= ~0x7;
+ switch (G_028010_FORMAT(track->db_depth_info)) {
+ case V_028010_DEPTH_16:
+ bpe = 2;
+ break;
+ case V_028010_DEPTH_X8_24:
+ case V_028010_DEPTH_8_24:
+ case V_028010_DEPTH_X8_24_FLOAT:
+ case V_028010_DEPTH_8_24_FLOAT:
+ case V_028010_DEPTH_32_FLOAT:
+ bpe = 4;
break;
- case V_028010_ARRAY_2D_TILED_THIN1:
+ case V_028010_DEPTH_X24_8_32_FLOAT:
+ bpe = 8;
break;
default:
- dev_warn(p->dev, "%s invalid tiling %d (0x%08X)\n", __func__,
- G_028010_ARRAY_MODE(track->db_depth_info),
- track->db_depth_info);
+ dev_warn(p->dev, "z/stencil with invalid format %d\n", G_028010_FORMAT(track->db_depth_info));
return -EINVAL;
}
+ if ((track->db_depth_size & 0xFFFFFC00) == 0xFFFFFC00) {
+ if (!track->db_depth_size_idx) {
+ dev_warn(p->dev, "z/stencil buffer size not set\n");
+ return -EINVAL;
+ }
+ tmp = radeon_bo_size(track->db_bo) - track->db_offset;
+ tmp = (tmp / bpe) >> 6;
+ if (!tmp) {
+ dev_warn(p->dev, "z/stencil buffer too small (0x%08X %d %d %ld)\n",
+ track->db_depth_size, bpe, track->db_offset,
+ radeon_bo_size(track->db_bo));
+ return -EINVAL;
+ }
+ ib[track->db_depth_size_idx] = S_028000_SLICE_TILE_MAX(tmp - 1) | (track->db_depth_size & 0x3FF);
+ } else {
+ size = radeon_bo_size(track->db_bo);
+ /* pitch in pixels */
+ pitch = (G_028000_PITCH_TILE_MAX(track->db_depth_size) + 1) * 8;
+ slice_tile_max = G_028000_SLICE_TILE_MAX(track->db_depth_size) + 1;
+ slice_tile_max *= 64;
+ height = slice_tile_max / pitch;
+ if (height > 8192)
+ height = 8192;
+ base_offset = track->db_bo_mc + track->db_offset;
+ array_mode = G_028010_ARRAY_MODE(track->db_depth_info);
+ array_check.array_mode = array_mode;
+ array_check.group_size = track->group_size;
+ array_check.nbanks = track->nbanks;
+ array_check.npipes = track->npipes;
+ array_check.nsamples = track->nsamples;
+ array_check.blocksize = bpe;
+ if (r600_get_array_mode_alignment(&array_check,
+ &pitch_align, &height_align, &depth_align, &base_align)) {
+ dev_warn(p->dev, "%s invalid tiling %d (0x%08X)\n", __func__,
+ G_028010_ARRAY_MODE(track->db_depth_info),
+ track->db_depth_info);
+ return -EINVAL;
+ }
+ switch (array_mode) {
+ case V_028010_ARRAY_1D_TILED_THIN1:
+ /* don't break userspace */
+ height &= ~0x7;
+ break;
+ case V_028010_ARRAY_2D_TILED_THIN1:
+ break;
+ default:
+ dev_warn(p->dev, "%s invalid tiling %d (0x%08X)\n", __func__,
+ G_028010_ARRAY_MODE(track->db_depth_info),
+ track->db_depth_info);
+ return -EINVAL;
+ }
- if (!IS_ALIGNED(pitch, pitch_align)) {
- dev_warn(p->dev, "%s:%d db pitch (%d, 0x%x, %d) invalid\n",
- __func__, __LINE__, pitch, pitch_align, array_mode);
- return -EINVAL;
- }
- if (!IS_ALIGNED(height, height_align)) {
- dev_warn(p->dev, "%s:%d db height (%d, 0x%x, %d) invalid\n",
- __func__, __LINE__, height, height_align, array_mode);
- return -EINVAL;
- }
- if (!IS_ALIGNED(base_offset, base_align)) {
- dev_warn(p->dev, "%s offset[%d] 0x%llx, 0x%llx, %d not aligned\n", __func__, i,
- base_offset, base_align, array_mode);
- return -EINVAL;
- }
+ if (!IS_ALIGNED(pitch, pitch_align)) {
+ dev_warn(p->dev, "%s:%d db pitch (%d, 0x%x, %d) invalid\n",
+ __func__, __LINE__, pitch, pitch_align, array_mode);
+ return -EINVAL;
+ }
+ if (!IS_ALIGNED(height, height_align)) {
+ dev_warn(p->dev, "%s:%d db height (%d, 0x%x, %d) invalid\n",
+ __func__, __LINE__, height, height_align, array_mode);
+ return -EINVAL;
+ }
+ if (!IS_ALIGNED(base_offset, base_align)) {
+ dev_warn(p->dev, "%s offset[%d] 0x%llx, 0x%llx, %d not aligned\n", __func__, i,
+ base_offset, base_align, array_mode);
+ return -EINVAL;
+ }
- ntiles = G_028000_SLICE_TILE_MAX(track->db_depth_size) + 1;
- nviews = G_028004_SLICE_MAX(track->db_depth_view) + 1;
- tmp = ntiles * bpe * 64 * nviews;
- if ((tmp + track->db_offset) > radeon_bo_size(track->db_bo)) {
- dev_warn(p->dev, "z/stencil buffer (%d) too small (0x%08X %d %d %d -> %u have %lu)\n",
- array_mode,
- track->db_depth_size, ntiles, nviews, bpe, tmp + track->db_offset,
- radeon_bo_size(track->db_bo));
- return -EINVAL;
+ ntiles = G_028000_SLICE_TILE_MAX(track->db_depth_size) + 1;
+ nviews = G_028004_SLICE_MAX(track->db_depth_view) + 1;
+ tmp = ntiles * bpe * 64 * nviews;
+ if ((tmp + track->db_offset) > radeon_bo_size(track->db_bo)) {
+ dev_warn(p->dev, "z/stencil buffer (%d) too small (0x%08X %d %d %d -> %u have %lu)\n",
+ array_mode,
+ track->db_depth_size, ntiles, nviews, bpe, tmp + track->db_offset,
+ radeon_bo_size(track->db_bo));
+ return -EINVAL;
+ }
}
}
+ track->db_dirty = false;
}
return 0;
}
@@ -988,6 +1002,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
break;
case R_028800_DB_DEPTH_CONTROL:
track->db_depth_control = radeon_get_ib_value(p, idx);
+ track->db_dirty = true;
break;
case R_028010_DB_DEPTH_INFO:
if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS) &&
@@ -1008,21 +1023,27 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
ib[idx] |= S_028010_ARRAY_MODE(V_028010_ARRAY_1D_TILED_THIN1);
track->db_depth_info |= S_028010_ARRAY_MODE(V_028010_ARRAY_1D_TILED_THIN1);
}
- } else
+ } else {
track->db_depth_info = radeon_get_ib_value(p, idx);
+ }
+ track->db_dirty = true;
break;
case R_028004_DB_DEPTH_VIEW:
track->db_depth_view = radeon_get_ib_value(p, idx);
+ track->db_dirty = true;
break;
case R_028000_DB_DEPTH_SIZE:
track->db_depth_size = radeon_get_ib_value(p, idx);
track->db_depth_size_idx = idx;
+ track->db_dirty = true;
break;
case R_028AB0_VGT_STRMOUT_EN:
track->vgt_strmout_en = radeon_get_ib_value(p, idx);
+ track->streamout_dirty = true;
break;
case R_028B20_VGT_STRMOUT_BUFFER_EN:
track->vgt_strmout_buffer_en = radeon_get_ib_value(p, idx);
+ track->streamout_dirty = true;
break;
case VGT_STRMOUT_BUFFER_BASE_0:
case VGT_STRMOUT_BUFFER_BASE_1:
@@ -1039,6 +1060,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
track->vgt_strmout_bo[tmp] = reloc->robj;
track->vgt_strmout_bo_mc[tmp] = reloc->lobj.gpu_offset;
+ track->streamout_dirty = true;
break;
case VGT_STRMOUT_BUFFER_SIZE_0:
case VGT_STRMOUT_BUFFER_SIZE_1:
@@ -1047,6 +1069,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
tmp = (reg - VGT_STRMOUT_BUFFER_SIZE_0) / 16;
/* size in register is DWs, convert to bytes */
track->vgt_strmout_size[tmp] = radeon_get_ib_value(p, idx) * 4;
+ track->streamout_dirty = true;
break;
case CP_COHER_BASE:
r = r600_cs_packet_next_reloc(p, &reloc);
@@ -1059,6 +1082,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
break;
case R_028238_CB_TARGET_MASK:
track->cb_target_mask = radeon_get_ib_value(p, idx);
+ track->cb_dirty = true;
break;
case R_02823C_CB_SHADER_MASK:
track->cb_shader_mask = radeon_get_ib_value(p, idx);
@@ -1066,6 +1090,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
case R_028C04_PA_SC_AA_CONFIG:
tmp = G_028C04_MSAA_NUM_SAMPLES(radeon_get_ib_value(p, idx));
track->nsamples = 1 << tmp;
+ track->cb_dirty = true;
break;
case R_0280A0_CB_COLOR0_INFO:
case R_0280A4_CB_COLOR1_INFO:
@@ -1095,6 +1120,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
tmp = (reg - R_0280A0_CB_COLOR0_INFO) / 4;
track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
}
+ track->cb_dirty = true;
break;
case R_028080_CB_COLOR0_VIEW:
case R_028084_CB_COLOR1_VIEW:
@@ -1106,6 +1132,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
case R_02809C_CB_COLOR7_VIEW:
tmp = (reg - R_028080_CB_COLOR0_VIEW) / 4;
track->cb_color_view[tmp] = radeon_get_ib_value(p, idx);
+ track->cb_dirty = true;
break;
case R_028060_CB_COLOR0_SIZE:
case R_028064_CB_COLOR1_SIZE:
@@ -1118,6 +1145,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
tmp = (reg - R_028060_CB_COLOR0_SIZE) / 4;
track->cb_color_size[tmp] = radeon_get_ib_value(p, idx);
track->cb_color_size_idx[tmp] = idx;
+ track->cb_dirty = true;
break;
/* This register were added late, there is userspace
* which does provide relocation for those but set
@@ -1200,6 +1228,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
track->cb_color_base_last[tmp] = ib[idx];
track->cb_color_bo[tmp] = reloc->robj;
track->cb_color_bo_mc[tmp] = reloc->lobj.gpu_offset;
+ track->cb_dirty = true;
break;
case DB_DEPTH_BASE:
r = r600_cs_packet_next_reloc(p, &reloc);
@@ -1212,6 +1241,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
track->db_bo = reloc->robj;
track->db_bo_mc = reloc->lobj.gpu_offset;
+ track->db_dirty = true;
break;
case DB_HTILE_DATA_BASE:
case SQ_PGM_START_FS:
--
1.7.5.4
More information about the dri-devel
mailing list