[Mesa-dev] [PATCH] radeonsi: move caps, vendor/device name, and disk shader cache to radeonsi folder

Tue Nov 28 14:45:22 UTC 2017

From: Nicolai Hähnle <nicolai.haehnle at amd.com>

---
 src/gallium/drivers/radeon/r600_pipe_common.c   | 431 -----------------------
 src/gallium/drivers/radeon/r600_pipe_common.h   |   4 -
 src/gallium/drivers/radeonsi/si_pipe.c          | 432 ++++++++++++++++++++++++
 src/gallium/drivers/radeonsi/si_pipe.h          |   4 +
 src/gallium/drivers/radeonsi/si_state_shaders.c |  14 +-
 5 files changed, 443 insertions(+), 442 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index ce612113c51..036f380b0b3 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -29,21 +29,20 @@
 #include "util/u_memory.h"
 #include "util/u_format_s3tc.h"
 #include "util/u_upload_mgr.h"
 #include "util/os_time.h"
 #include "vl/vl_decoder.h"
 #include "vl/vl_video_buffer.h"
 #include "radeon/radeon_video.h"
 #include "amd/common/ac_llvm_util.h"
 #include "amd/common/sid.h"
 #include <inttypes.h>
-#include <sys/utsname.h>
 
 #include <llvm-c/TargetMachine.h>
 
 
 /*
  * shader binary helpers.
  */
 void si_radeon_shader_binary_init(struct ac_shader_binary *b)
 {
 	memset(b, 0, sizeof(*b));
@@ -632,139 +631,20 @@ static const struct debug_named_value common_debug_options[] = {
 	{ "nodpbb", DBG(NO_DPBB), "Disable DPBB." },
 	{ "nodfsm", DBG(NO_DFSM), "Disable DFSM." },
 	{ "dpbb", DBG(DPBB), "Enable DPBB." },
 	{ "dfsm", DBG(DFSM), "Enable DFSM." },
 	{ "nooutoforder", DBG(NO_OUT_OF_ORDER), "Disable out-of-order rasterization" },
 	{ "reserve_vmid", DBG(RESERVE_VMID), "Force VMID reservation per context." },
 
 	DEBUG_NAMED_VALUE_END /* must be last */
 };
 
-static const char* r600_get_vendor(struct pipe_screen* pscreen)
-{
-	return "X.Org";
-}
-
-static const char* r600_get_device_vendor(struct pipe_screen* pscreen)
-{
-	return "AMD";
-}
-
-static const char *r600_get_marketing_name(struct radeon_winsys *ws)
-{
-	if (!ws->get_chip_name)
-		return NULL;
-	return ws->get_chip_name(ws);
-}
-
-static const char *r600_get_family_name(const struct r600_common_screen *rscreen)
-{
-	switch (rscreen->info.family) {
-	case CHIP_TAHITI: return "AMD TAHITI";
-	case CHIP_PITCAIRN: return "AMD PITCAIRN";
-	case CHIP_VERDE: return "AMD CAPE VERDE";
-	case CHIP_OLAND: return "AMD OLAND";
-	case CHIP_HAINAN: return "AMD HAINAN";
-	case CHIP_BONAIRE: return "AMD BONAIRE";
-	case CHIP_KAVERI: return "AMD KAVERI";
-	case CHIP_KABINI: return "AMD KABINI";
-	case CHIP_HAWAII: return "AMD HAWAII";
-	case CHIP_MULLINS: return "AMD MULLINS";
-	case CHIP_TONGA: return "AMD TONGA";
-	case CHIP_ICELAND: return "AMD ICELAND";
-	case CHIP_CARRIZO: return "AMD CARRIZO";
-	case CHIP_FIJI: return "AMD FIJI";
-	case CHIP_POLARIS10: return "AMD POLARIS10";
-	case CHIP_POLARIS11: return "AMD POLARIS11";
-	case CHIP_POLARIS12: return "AMD POLARIS12";
-	case CHIP_STONEY: return "AMD STONEY";
-	case CHIP_VEGA10: return "AMD VEGA10";
-	case CHIP_RAVEN: return "AMD RAVEN";
-	default: return "AMD unknown";
-	}
-}
-
-static void r600_disk_cache_create(struct r600_common_screen *rscreen)
-{
-	/* Don't use the cache if shader dumping is enabled. */
-	if (rscreen->debug_flags & DBG_ALL_SHADERS)
-		return;
-
-	/* TODO: remove this once gallium supports a nir cache */
-	if (rscreen->debug_flags & DBG(NIR))
-		return;
-
-	uint32_t mesa_timestamp;
-	if (disk_cache_get_function_timestamp(r600_disk_cache_create,
-					      &mesa_timestamp)) {
-		char *timestamp_str;
-		int res = -1;
-		uint32_t llvm_timestamp;
-
-		if (disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo,
-						      &llvm_timestamp)) {
-			res = asprintf(&timestamp_str, "%u_%u",
-				       mesa_timestamp, llvm_timestamp);
-		}
-
-		if (res != -1) {
-			/* These flags affect shader compilation. */
-			uint64_t shader_debug_flags =
-				rscreen->debug_flags &
-				(DBG(FS_CORRECT_DERIVS_AFTER_KILL) |
-				 DBG(SI_SCHED) |
-				 DBG(UNSAFE_MATH));
-
-			rscreen->disk_shader_cache =
-				disk_cache_create(r600_get_family_name(rscreen),
-						  timestamp_str,
-						  shader_debug_flags);
-			free(timestamp_str);
-		}
-	}
-}
-
-static struct disk_cache *r600_get_disk_shader_cache(struct pipe_screen *pscreen)
-{
-	struct r600_common_screen *rscreen = (struct r600_common_screen*)pscreen;
-	return rscreen->disk_shader_cache;
-}
-
-static const char* r600_get_name(struct pipe_screen* pscreen)
-{
-	struct r600_common_screen *rscreen = (struct r600_common_screen*)pscreen;
-
-	return rscreen->renderer_string;
-}
-
-static float r600_get_paramf(struct pipe_screen* pscreen,
-			     enum pipe_capf param)
-{
-	switch (param) {
-	case PIPE_CAPF_MAX_LINE_WIDTH:
-	case PIPE_CAPF_MAX_LINE_WIDTH_AA:
-	case PIPE_CAPF_MAX_POINT_WIDTH:
-	case PIPE_CAPF_MAX_POINT_WIDTH_AA:
-		return 8192.0f;
-	case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
-		return 16.0f;
-	case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
-		return 16.0f;
-	case PIPE_CAPF_GUARD_BAND_LEFT:
-	case PIPE_CAPF_GUARD_BAND_TOP:
-	case PIPE_CAPF_GUARD_BAND_RIGHT:
-	case PIPE_CAPF_GUARD_BAND_BOTTOM:
-		return 0.0f;
-	}
-	return 0.0f;
-}
-
 static int r600_get_video_param(struct pipe_screen *screen,
 				enum pipe_video_profile profile,
 				enum pipe_video_entrypoint entrypoint,
 				enum pipe_video_cap param)
 {
 	switch (param) {
 	case PIPE_VIDEO_CAP_SUPPORTED:
 		return vl_profile_supported(screen, profile, entrypoint);
 	case PIPE_VIDEO_CAP_NPOT_TEXTURES:
 		return 1;
@@ -779,390 +659,79 @@ static int r600_get_video_param(struct pipe_screen *screen,
 		return false;
 	case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
 		return true;
 	case PIPE_VIDEO_CAP_MAX_LEVEL:
 		return vl_level_supported(screen, profile);
 	default:
 		return 0;
 	}
 }
 
-static unsigned get_max_threads_per_block(struct r600_common_screen *screen,
-					  enum pipe_shader_ir ir_type)
-{
-	if (ir_type != PIPE_SHADER_IR_TGSI)
-		return 256;
-
-	/* Only 16 waves per thread-group on gfx9. */
-	if (screen->chip_class >= GFX9)
-		return 1024;
-
-	/* Up to 40 waves per thread-group on GCN < gfx9. Expose a nice
-	 * round number.
-	 */
-	return 2048;
-}
-
-static int r600_get_compute_param(struct pipe_screen *screen,
-        enum pipe_shader_ir ir_type,
-        enum pipe_compute_cap param,
-        void *ret)
-{
-	struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
-
-	//TODO: select these params by asic
-	switch (param) {
-	case PIPE_COMPUTE_CAP_IR_TARGET: {
-		const char *gpu;
-		const char *triple;
-
-		if (HAVE_LLVM < 0x0400)
-			triple = "amdgcn--";
-		else
-			triple = "amdgcn-mesa-mesa3d";
-
-		gpu = ac_get_llvm_processor_name(rscreen->family);
-		if (ret) {
-			sprintf(ret, "%s-%s", gpu, triple);
-		}
-		/* +2 for dash and terminating NIL byte */
-		return (strlen(triple) + strlen(gpu) + 2) * sizeof(char);
-	}
-	case PIPE_COMPUTE_CAP_GRID_DIMENSION:
-		if (ret) {
-			uint64_t *grid_dimension = ret;
-			grid_dimension[0] = 3;
-		}
-		return 1 * sizeof(uint64_t);
-
-	case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
-		if (ret) {
-			uint64_t *grid_size = ret;
-			grid_size[0] = 65535;
-			grid_size[1] = 65535;
-			grid_size[2] = 65535;
-		}
-		return 3 * sizeof(uint64_t) ;
-
-	case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
-		if (ret) {
-			uint64_t *block_size = ret;
-			unsigned threads_per_block = get_max_threads_per_block(rscreen, ir_type);
-			block_size[0] = threads_per_block;
-			block_size[1] = threads_per_block;
-			block_size[2] = threads_per_block;
-		}
-		return 3 * sizeof(uint64_t);
-
-	case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
-		if (ret) {
-			uint64_t *max_threads_per_block = ret;
-			*max_threads_per_block = get_max_threads_per_block(rscreen, ir_type);
-		}
-		return sizeof(uint64_t);
-	case PIPE_COMPUTE_CAP_ADDRESS_BITS:
-		if (ret) {
-			uint32_t *address_bits = ret;
-			address_bits[0] = 64;
-		}
-		return 1 * sizeof(uint32_t);
-
-	case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
-		if (ret) {
-			uint64_t *max_global_size = ret;
-			uint64_t max_mem_alloc_size;
-
-			r600_get_compute_param(screen, ir_type,
-				PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
-				&max_mem_alloc_size);
-
-			/* In OpenCL, the MAX_MEM_ALLOC_SIZE must be at least
-			 * 1/4 of the MAX_GLOBAL_SIZE.  Since the
-			 * MAX_MEM_ALLOC_SIZE is fixed for older kernels,
-			 * make sure we never report more than
-			 * 4 * MAX_MEM_ALLOC_SIZE.
-			 */
-			*max_global_size = MIN2(4 * max_mem_alloc_size,
-						MAX2(rscreen->info.gart_size,
-						     rscreen->info.vram_size));
-		}
-		return sizeof(uint64_t);
-
-	case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
-		if (ret) {
-			uint64_t *max_local_size = ret;
-			/* Value reported by the closed source driver. */
-			*max_local_size = 32768;
-		}
-		return sizeof(uint64_t);
-
-	case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
-		if (ret) {
-			uint64_t *max_input_size = ret;
-			/* Value reported by the closed source driver. */
-			*max_input_size = 1024;
-		}
-		return sizeof(uint64_t);
-
-	case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
-		if (ret) {
-			uint64_t *max_mem_alloc_size = ret;
-
-			*max_mem_alloc_size = rscreen->info.max_alloc_size;
-		}
-		return sizeof(uint64_t);
-
-	case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
-		if (ret) {
-			uint32_t *max_clock_frequency = ret;
-			*max_clock_frequency = rscreen->info.max_shader_clock;
-		}
-		return sizeof(uint32_t);
-
-	case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
-		if (ret) {
-			uint32_t *max_compute_units = ret;
-			*max_compute_units = rscreen->info.num_good_compute_units;
-		}
-		return sizeof(uint32_t);
-
-	case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
-		if (ret) {
-			uint32_t *images_supported = ret;
-			*images_supported = 0;
-		}
-		return sizeof(uint32_t);
-	case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
-		break; /* unused */
-	case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
-		if (ret) {
-			uint32_t *subgroup_size = ret;
-			*subgroup_size = 64;
-		}
-		return sizeof(uint32_t);
-	case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
-		if (ret) {
-			uint64_t *max_variable_threads_per_block = ret;
-			if (ir_type == PIPE_SHADER_IR_TGSI)
-				*max_variable_threads_per_block = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
-			else
-				*max_variable_threads_per_block = 0;
-		}
-		return sizeof(uint64_t);
-	}
-
-        fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param);
-        return 0;
-}
-
-static uint64_t r600_get_timestamp(struct pipe_screen *screen)
-{
-	struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
-
-	return 1000000 * rscreen->ws->query_value(rscreen->ws, RADEON_TIMESTAMP) /
-			rscreen->info.clock_crystal_freq;
-}
-
-static void r600_query_memory_info(struct pipe_screen *screen,
-				   struct pipe_memory_info *info)
-{
-	struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
-	struct radeon_winsys *ws = rscreen->ws;
-	unsigned vram_usage, gtt_usage;
-
-	info->total_device_memory = rscreen->info.vram_size / 1024;
-	info->total_staging_memory = rscreen->info.gart_size / 1024;
-
-	/* The real TTM memory usage is somewhat random, because:
-	 *
-	 * 1) TTM delays freeing memory, because it can only free it after
-	 *    fences expire.
-	 *
-	 * 2) The memory usage can be really low if big VRAM evictions are
-	 *    taking place, but the real usage is well above the size of VRAM.
-	 *
-	 * Instead, return statistics of this process.
-	 */
-	vram_usage = ws->query_value(ws, RADEON_REQUESTED_VRAM_MEMORY) / 1024;
-	gtt_usage =  ws->query_value(ws, RADEON_REQUESTED_GTT_MEMORY) / 1024;
-
-	info->avail_device_memory =
-		vram_usage <= info->total_device_memory ?
-				info->total_device_memory - vram_usage : 0;
-	info->avail_staging_memory =
-		gtt_usage <= info->total_staging_memory ?
-				info->total_staging_memory - gtt_usage : 0;
-
-	info->device_memory_evicted =
-		ws->query_value(ws, RADEON_NUM_BYTES_MOVED) / 1024;
-
-	if (rscreen->info.drm_major == 3 && rscreen->info.drm_minor >= 4)
-		info->nr_device_memory_evictions =
-			ws->query_value(ws, RADEON_NUM_EVICTIONS);
-	else
-		/* Just return the number of evicted 64KB pages. */
-		info->nr_device_memory_evictions = info->device_memory_evicted / 64;
-}
-
 struct pipe_resource *si_resource_create_common(struct pipe_screen *screen,
 						const struct pipe_resource *templ)
 {
 	if (templ->target == PIPE_BUFFER) {
 		return si_buffer_create(screen, templ, 256);
 	} else {
 		return si_texture_create(screen, templ);
 	}
 }
 
 bool si_common_screen_init(struct r600_common_screen *rscreen,
 			   struct radeon_winsys *ws)
 {
-	char family_name[32] = {}, llvm_string[32] = {}, kernel_version[128] = {};
-	struct utsname uname_data;
-	const char *chip_name;
-
-	ws->query_info(ws, &rscreen->info);
-	rscreen->ws = ws;
-
-	if ((chip_name = r600_get_marketing_name(ws)))
-		snprintf(family_name, sizeof(family_name), "%s / ",
-			 r600_get_family_name(rscreen) + 4);
-	else
-		chip_name = r600_get_family_name(rscreen);
-
-	if (uname(&uname_data) == 0)
-		snprintf(kernel_version, sizeof(kernel_version),
-			 " / %s", uname_data.release);
-
-	if (HAVE_LLVM > 0) {
-		snprintf(llvm_string, sizeof(llvm_string),
-			 ", LLVM %i.%i.%i", (HAVE_LLVM >> 8) & 0xff,
-			 HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH);
-	}
-
-	snprintf(rscreen->renderer_string, sizeof(rscreen->renderer_string),
-		 "%s (%sDRM %i.%i.%i%s%s)",
-		 chip_name, family_name, rscreen->info.drm_major,
-		 rscreen->info.drm_minor, rscreen->info.drm_patchlevel,
-		 kernel_version, llvm_string);
-
-	rscreen->b.get_name = r600_get_name;
-	rscreen->b.get_vendor = r600_get_vendor;
-	rscreen->b.get_device_vendor = r600_get_device_vendor;
-	rscreen->b.get_disk_shader_cache = r600_get_disk_shader_cache;
-	rscreen->b.get_compute_param = r600_get_compute_param;
-	rscreen->b.get_paramf = r600_get_paramf;
-	rscreen->b.get_timestamp = r600_get_timestamp;
 	rscreen->b.resource_destroy = u_resource_destroy_vtbl;
 	rscreen->b.resource_from_user_memory = si_buffer_from_user_memory;
-	rscreen->b.query_memory_info = r600_query_memory_info;
 
 	if (rscreen->info.has_hw_decode) {
 		rscreen->b.get_video_param = si_vid_get_video_param;
 		rscreen->b.is_video_format_supported = si_vid_is_format_supported;
 	} else {
 		rscreen->b.get_video_param = r600_get_video_param;
 		rscreen->b.is_video_format_supported = vl_video_buffer_is_format_supported;
 	}
 
 	si_init_screen_texture_functions(rscreen);
 	si_init_screen_query_functions(rscreen);
 
 	rscreen->family = rscreen->info.family;
 	rscreen->chip_class = rscreen->info.chip_class;
 	rscreen->debug_flags |= debug_get_flags_option("R600_DEBUG", common_debug_options, 0);
 	rscreen->has_rbplus = false;
 	rscreen->rbplus_allowed = false;
 
-	r600_disk_cache_create(rscreen);
-
 	slab_create_parent(&rscreen->pool_transfers, sizeof(struct r600_transfer), 64);
 
 	rscreen->force_aniso = MIN2(16, debug_get_num_option("R600_TEX_ANISO", -1));
 	if (rscreen->force_aniso >= 0) {
 		printf("radeon: Forcing anisotropy filter to %ix\n",
 		       /* round down to a power of two */
 		       1 << util_logbase2(rscreen->force_aniso));
 	}
 
 	(void) mtx_init(&rscreen->aux_context_lock, mtx_plain);
 	(void) mtx_init(&rscreen->gpu_load_mutex, mtx_plain);
 
-	if (rscreen->debug_flags & DBG(INFO)) {
-		printf("pci (domain:bus:dev.func): %04x:%02x:%02x.%x\n",
-		       rscreen->info.pci_domain, rscreen->info.pci_bus,
-		       rscreen->info.pci_dev, rscreen->info.pci_func);
-		printf("pci_id = 0x%x\n", rscreen->info.pci_id);
-		printf("family = %i (%s)\n", rscreen->info.family,
-		       r600_get_family_name(rscreen));
-		printf("chip_class = %i\n", rscreen->info.chip_class);
-		printf("pte_fragment_size = %u\n", rscreen->info.pte_fragment_size);
-		printf("gart_page_size = %u\n", rscreen->info.gart_page_size);
-		printf("gart_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.gart_size, 1024*1024));
-		printf("vram_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_size, 1024*1024));
-		printf("vram_vis_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_vis_size, 1024*1024));
-		printf("max_alloc_size = %i MB\n",
-		       (int)DIV_ROUND_UP(rscreen->info.max_alloc_size, 1024*1024));
-		printf("min_alloc_size = %u\n", rscreen->info.min_alloc_size);
-		printf("has_dedicated_vram = %u\n", rscreen->info.has_dedicated_vram);
-		printf("has_virtual_memory = %i\n", rscreen->info.has_virtual_memory);
-		printf("gfx_ib_pad_with_type2 = %i\n", rscreen->info.gfx_ib_pad_with_type2);
-		printf("has_hw_decode = %u\n", rscreen->info.has_hw_decode);
-		printf("num_sdma_rings = %i\n", rscreen->info.num_sdma_rings);
-		printf("num_compute_rings = %u\n", rscreen->info.num_compute_rings);
-		printf("uvd_fw_version = %u\n", rscreen->info.uvd_fw_version);
-		printf("vce_fw_version = %u\n", rscreen->info.vce_fw_version);
-		printf("me_fw_version = %i\n", rscreen->info.me_fw_version);
-		printf("me_fw_feature = %i\n", rscreen->info.me_fw_feature);
-		printf("pfp_fw_version = %i\n", rscreen->info.pfp_fw_version);
-		printf("pfp_fw_feature = %i\n", rscreen->info.pfp_fw_feature);
-		printf("ce_fw_version = %i\n", rscreen->info.ce_fw_version);
-		printf("ce_fw_feature = %i\n", rscreen->info.ce_fw_feature);
-		printf("vce_harvest_config = %i\n", rscreen->info.vce_harvest_config);
-		printf("clock_crystal_freq = %i\n", rscreen->info.clock_crystal_freq);
-		printf("tcc_cache_line_size = %u\n", rscreen->info.tcc_cache_line_size);
-		printf("drm = %i.%i.%i\n", rscreen->info.drm_major,
-		       rscreen->info.drm_minor, rscreen->info.drm_patchlevel);
-		printf("has_userptr = %i\n", rscreen->info.has_userptr);
-		printf("has_syncobj = %u\n", rscreen->info.has_syncobj);
-		printf("has_sync_file = %u\n", rscreen->info.has_sync_file);
-
-		printf("r600_max_quad_pipes = %i\n", rscreen->info.r600_max_quad_pipes);
-		printf("max_shader_clock = %i\n", rscreen->info.max_shader_clock);
-		printf("num_good_compute_units = %i\n", rscreen->info.num_good_compute_units);
-		printf("max_se = %i\n", rscreen->info.max_se);
-		printf("max_sh_per_se = %i\n", rscreen->info.max_sh_per_se);
-
-		printf("r600_gb_backend_map = %i\n", rscreen->info.r600_gb_backend_map);
-		printf("r600_gb_backend_map_valid = %i\n", rscreen->info.r600_gb_backend_map_valid);
-		printf("r600_num_banks = %i\n", rscreen->info.r600_num_banks);
-		printf("num_render_backends = %i\n", rscreen->info.num_render_backends);
-		printf("num_tile_pipes = %i\n", rscreen->info.num_tile_pipes);
-		printf("pipe_interleave_bytes = %i\n", rscreen->info.pipe_interleave_bytes);
-		printf("enabled_rb_mask = 0x%x\n", rscreen->info.enabled_rb_mask);
-		printf("max_alignment = %u\n", (unsigned)rscreen->info.max_alignment);
-	}
 	return true;
 }
 
 void si_destroy_common_screen(struct r600_common_screen *rscreen)
 {
 	si_perfcounters_destroy(rscreen);
 	si_gpu_load_kill_thread(rscreen);
 
 	mtx_destroy(&rscreen->gpu_load_mutex);
 	mtx_destroy(&rscreen->aux_context_lock);
 	rscreen->aux_context->destroy(rscreen->aux_context);
 
 	slab_destroy_parent(&rscreen->pool_transfers);
 
-	disk_cache_destroy(rscreen->disk_shader_cache);
 	rscreen->ws->destroy(rscreen->ws);
 	FREE(rscreen);
 }
 
 bool si_can_dump_shader(struct r600_common_screen *rscreen,
 			unsigned processor)
 {
 	return rscreen->debug_flags & (1 << processor);
 }
 
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index adfcc7c8a70..4b80d188fba 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -387,22 +387,20 @@ struct r600_memory_object {
 struct r600_common_screen {
 	struct pipe_screen		b;
 	struct radeon_winsys		*ws;
 	enum radeon_family		family;
 	enum chip_class			chip_class;
 	struct radeon_info		info;
 	uint64_t			debug_flags;
 	bool				has_rbplus;     /* if RB+ registers exist */
 	bool				rbplus_allowed; /* if RB+ is allowed */
 
-	struct disk_cache		*disk_shader_cache;
-
 	struct slab_parent_pool		pool_transfers;
 
 	/* Texture filter settings. */
 	int				force_aniso; /* -1 = disabled */
 
 	/* Auxiliary context. Mainly used to initialize resources.
 	 * It must be locked prior to using and flushed before unlocking. */
 	struct pipe_context		*aux_context;
 	mtx_t				aux_context_lock;
 
@@ -415,22 +413,20 @@ struct r600_common_screen {
 	 */
 	unsigned			num_shaders_created;
 	unsigned			num_shader_cache_hits;
 
 	/* GPU load thread. */
 	mtx_t				gpu_load_mutex;
 	thrd_t				gpu_load_thread;
 	union r600_mmio_counters	mmio_counters;
 	volatile unsigned		gpu_load_stop_thread; /* bool */
 
-	char				renderer_string[100];
-
 	/* Performance counters. */
 	struct r600_perfcounters	*perfcounters;
 
 	/* If pipe_screen wants to recompute and re-emit the framebuffer,
 	 * sampler, and image states of all contexts, it should atomically
 	 * increment this.
 	 *
 	 * Each context will compare this with its own last known value of
 	 * the counter before drawing and re-emit the states accordingly.
 	 */
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index b3d8ae508bd..b38c55619f7 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -31,20 +31,22 @@
 #include "util/u_log.h"
 #include "util/u_memory.h"
 #include "util/u_suballoc.h"
 #include "util/u_tests.h"
 #include "util/xmlconfig.h"
 #include "vl/vl_decoder.h"
 #include "../ddebug/dd_util.h"
 
 #include "compiler/nir/nir.h"
 
+#include <sys/utsname.h>
+
 /*
  * pipe_context
  */
 static void si_destroy_context(struct pipe_context *context)
 {
 	struct si_context *sctx = (struct si_context *)context;
 	int i;
 
 	/* Unreference the framebuffer normally to disable related logic
 	 * properly.
@@ -394,20 +396,306 @@ static struct pipe_context *si_pipe_create_context(struct pipe_screen *screen,
 	 * implementation for fence_server_sync is incomplete. */
 	return threaded_context_create(ctx, &sscreen->b.pool_transfers,
 				       si_replace_buffer_storage,
 				       sscreen->b.info.drm_major >= 3 ? si_create_fence : NULL,
 				       &((struct si_context*)ctx)->b.tc);
 }
 
 /*
  * pipe_screen
  */
+static const char* si_get_vendor(struct pipe_screen* pscreen)
+{
+	return "X.Org";
+}
+
+static const char* si_get_device_vendor(struct pipe_screen* pscreen)
+{
+	return "AMD";
+}
+
+static const char *si_get_marketing_name(struct radeon_winsys *ws)
+{
+	if (!ws->get_chip_name)
+		return NULL;
+	return ws->get_chip_name(ws);
+}
+
+static const char *si_get_family_name(const struct si_screen *screen)
+{
+	switch (screen->b.info.family) {
+	case CHIP_TAHITI: return "AMD TAHITI";
+	case CHIP_PITCAIRN: return "AMD PITCAIRN";
+	case CHIP_VERDE: return "AMD CAPE VERDE";
+	case CHIP_OLAND: return "AMD OLAND";
+	case CHIP_HAINAN: return "AMD HAINAN";
+	case CHIP_BONAIRE: return "AMD BONAIRE";
+	case CHIP_KAVERI: return "AMD KAVERI";
+	case CHIP_KABINI: return "AMD KABINI";
+	case CHIP_HAWAII: return "AMD HAWAII";
+	case CHIP_MULLINS: return "AMD MULLINS";
+	case CHIP_TONGA: return "AMD TONGA";
+	case CHIP_ICELAND: return "AMD ICELAND";
+	case CHIP_CARRIZO: return "AMD CARRIZO";
+	case CHIP_FIJI: return "AMD FIJI";
+	case CHIP_POLARIS10: return "AMD POLARIS10";
+	case CHIP_POLARIS11: return "AMD POLARIS11";
+	case CHIP_POLARIS12: return "AMD POLARIS12";
+	case CHIP_STONEY: return "AMD STONEY";
+	case CHIP_VEGA10: return "AMD VEGA10";
+	case CHIP_RAVEN: return "AMD RAVEN";
+	default: return "AMD unknown";
+	}
+}
+
+static void si_disk_cache_create(struct si_screen *screen)
+{
+	/* Don't use the cache if shader dumping is enabled. */
+	if (screen->b.debug_flags & DBG_ALL_SHADERS)
+		return;
+
+	/* TODO: remove this once gallium supports a nir cache */
+	if (screen->b.debug_flags & DBG(NIR))
+		return;
+
+	uint32_t mesa_timestamp;
+	if (disk_cache_get_function_timestamp(si_disk_cache_create,
+					      &mesa_timestamp)) {
+		char *timestamp_str;
+		int res = -1;
+		uint32_t llvm_timestamp;
+
+		if (disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo,
+						      &llvm_timestamp)) {
+			res = asprintf(&timestamp_str, "%u_%u",
+				       mesa_timestamp, llvm_timestamp);
+		}
+
+		if (res != -1) {
+			/* These flags affect shader compilation. */
+			uint64_t shader_debug_flags =
+				screen->b.debug_flags &
+				(DBG(FS_CORRECT_DERIVS_AFTER_KILL) |
+				 DBG(SI_SCHED) |
+				 DBG(UNSAFE_MATH));
+
+			screen->disk_shader_cache =
+				disk_cache_create(si_get_family_name(screen),
+						  timestamp_str,
+						  shader_debug_flags);
+			free(timestamp_str);
+		}
+	}
+}
+
+static struct disk_cache *si_get_disk_shader_cache(struct pipe_screen *pscreen)
+{
+	struct si_screen *sscreen = (struct si_screen*)pscreen;
+	return sscreen->disk_shader_cache;
+}
+
+static const char* si_get_name(struct pipe_screen* pscreen)
+{
+	struct si_screen *sscreen = (struct si_screen*)pscreen;
+
+	return sscreen->renderer_string;
+}
+
+static float si_get_paramf(struct pipe_screen* pscreen,
+			   enum pipe_capf param)
+{
+	switch (param) {
+	case PIPE_CAPF_MAX_LINE_WIDTH:
+	case PIPE_CAPF_MAX_LINE_WIDTH_AA:
+	case PIPE_CAPF_MAX_POINT_WIDTH:
+	case PIPE_CAPF_MAX_POINT_WIDTH_AA:
+		return 8192.0f;
+	case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
+		return 16.0f;
+	case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
+		return 16.0f;
+	case PIPE_CAPF_GUARD_BAND_LEFT:
+	case PIPE_CAPF_GUARD_BAND_TOP:
+	case PIPE_CAPF_GUARD_BAND_RIGHT:
+	case PIPE_CAPF_GUARD_BAND_BOTTOM:
+		return 0.0f;
+	}
+	return 0.0f;
+}
+
+static unsigned get_max_threads_per_block(struct si_screen *screen,
+					  enum pipe_shader_ir ir_type)
+{
+	if (ir_type != PIPE_SHADER_IR_TGSI)
+		return 256;
+
+	/* Only 16 waves per thread-group on gfx9. */
+	if (screen->b.chip_class >= GFX9)
+		return 1024;
+
+	/* Up to 40 waves per thread-group on GCN < gfx9. Expose a nice
+	 * round number.
+	 */
+	return 2048;
+}
+
+static int si_get_compute_param(struct pipe_screen *screen,
+        enum pipe_shader_ir ir_type,
+        enum pipe_compute_cap param,
+        void *ret)
+{
+	struct si_screen *sscreen = (struct si_screen *)screen;
+
+	switch (param) {
+	case PIPE_COMPUTE_CAP_IR_TARGET: {
+		const char *gpu;
+		const char *triple;
+
+		if (HAVE_LLVM < 0x0400)
+			triple = "amdgcn--";
+		else
+			triple = "amdgcn-mesa-mesa3d";
+
+		gpu = ac_get_llvm_processor_name(sscreen->b.family);
+		if (ret) {
+			sprintf(ret, "%s-%s", gpu, triple);
+		}
+		/* +2 for dash and terminating NIL byte */
+		return (strlen(triple) + strlen(gpu) + 2) * sizeof(char);
+	}
+	case PIPE_COMPUTE_CAP_GRID_DIMENSION:
+		if (ret) {
+			uint64_t *grid_dimension = ret;
+			grid_dimension[0] = 3;
+		}
+		return 1 * sizeof(uint64_t);
+
+	case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
+		if (ret) {
+			uint64_t *grid_size = ret;
+			grid_size[0] = 65535;
+			grid_size[1] = 65535;
+			grid_size[2] = 65535;
+		}
+		return 3 * sizeof(uint64_t) ;
+
+	case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
+		if (ret) {
+			uint64_t *block_size = ret;
+			unsigned threads_per_block = get_max_threads_per_block(sscreen, ir_type);
+			block_size[0] = threads_per_block;
+			block_size[1] = threads_per_block;
+			block_size[2] = threads_per_block;
+		}
+		return 3 * sizeof(uint64_t);
+
+	case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
+		if (ret) {
+			uint64_t *max_threads_per_block = ret;
+			*max_threads_per_block = get_max_threads_per_block(sscreen, ir_type);
+		}
+		return sizeof(uint64_t);
+	case PIPE_COMPUTE_CAP_ADDRESS_BITS:
+		if (ret) {
+			uint32_t *address_bits = ret;
+			address_bits[0] = 64;
+		}
+		return 1 * sizeof(uint32_t);
+
+	case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
+		if (ret) {
+			uint64_t *max_global_size = ret;
+			uint64_t max_mem_alloc_size;
+
+			si_get_compute_param(screen, ir_type,
+				PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
+				&max_mem_alloc_size);
+
+			/* In OpenCL, the MAX_MEM_ALLOC_SIZE must be at least
+			 * 1/4 of the MAX_GLOBAL_SIZE.  Since the
+			 * MAX_MEM_ALLOC_SIZE is fixed for older kernels,
+			 * make sure we never report more than
+			 * 4 * MAX_MEM_ALLOC_SIZE.
+			 */
+			*max_global_size = MIN2(4 * max_mem_alloc_size,
+						MAX2(sscreen->b.info.gart_size,
+						     sscreen->b.info.vram_size));
+		}
+		return sizeof(uint64_t);
+
+	case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
+		if (ret) {
+			uint64_t *max_local_size = ret;
+			/* Value reported by the closed source driver. */
+			*max_local_size = 32768;
+		}
+		return sizeof(uint64_t);
+
+	case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
+		if (ret) {
+			uint64_t *max_input_size = ret;
+			/* Value reported by the closed source driver. */
+			*max_input_size = 1024;
+		}
+		return sizeof(uint64_t);
+
+	case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
+		if (ret) {
+			uint64_t *max_mem_alloc_size = ret;
+
+			*max_mem_alloc_size = sscreen->b.info.max_alloc_size;
+		}
+		return sizeof(uint64_t);
+
+	case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
+		if (ret) {
+			uint32_t *max_clock_frequency = ret;
+			*max_clock_frequency = sscreen->b.info.max_shader_clock;
+		}
+		return sizeof(uint32_t);
+
+	case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
+		if (ret) {
+			uint32_t *max_compute_units = ret;
+			*max_compute_units = sscreen->b.info.num_good_compute_units;
+		}
+		return sizeof(uint32_t);
+
+	case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
+		if (ret) {
+			uint32_t *images_supported = ret;
+			*images_supported = 0;
+		}
+		return sizeof(uint32_t);
+	case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
+		break; /* unused */
+	case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
+		if (ret) {
+			uint32_t *subgroup_size = ret;
+			*subgroup_size = 64;
+		}
+		return sizeof(uint32_t);
+	case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
+		if (ret) {
+			uint64_t *max_variable_threads_per_block = ret;
+			if (ir_type == PIPE_SHADER_IR_TGSI)
+				*max_variable_threads_per_block = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
+			else
+				*max_variable_threads_per_block = 0;
+		}
+		return sizeof(uint64_t);
+	}
+
+        fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param);
+        return 0;
+}
+
 static bool si_have_tgsi_compute(struct si_screen *sscreen)
 {
 	/* Old kernels disallowed some register writes for SI
 	 * that are used for indirect dispatches. */
 	return (sscreen->b.chip_class >= CIK ||
 		sscreen->b.info.drm_major == 3 ||
 		(sscreen->b.info.drm_major == 2 &&
 		 sscreen->b.info.drm_minor >= 45));
 }
 
@@ -823,20 +1111,69 @@ static const struct nir_shader_compiler_options nir_options = {
 
 static const void *
 si_get_compiler_options(struct pipe_screen *screen,
 			enum pipe_shader_ir ir,
 			enum pipe_shader_type shader)
 {
 	assert(ir == PIPE_SHADER_IR_NIR);
 	return &nir_options;
 }
 
+static uint64_t si_get_timestamp(struct pipe_screen *screen)
+{
+	struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+
+	return 1000000 * rscreen->ws->query_value(rscreen->ws, RADEON_TIMESTAMP) /
+			rscreen->info.clock_crystal_freq;
+}
+
+static void si_query_memory_info(struct pipe_screen *screen,
+				 struct pipe_memory_info *info)
+{
+	struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+	struct radeon_winsys *ws = rscreen->ws;
+	unsigned vram_usage, gtt_usage;
+
+	info->total_device_memory = rscreen->info.vram_size / 1024;
+	info->total_staging_memory = rscreen->info.gart_size / 1024;
+
+	/* The real TTM memory usage is somewhat random, because:
+	 *
+	 * 1) TTM delays freeing memory, because it can only free it after
+	 *    fences expire.
+	 *
+	 * 2) The memory usage can be really low if big VRAM evictions are
+	 *    taking place, but the real usage is well above the size of VRAM.
+	 *
+	 * Instead, return statistics of this process.
+	 */
+	vram_usage = ws->query_value(ws, RADEON_REQUESTED_VRAM_MEMORY) / 1024;
+	gtt_usage =  ws->query_value(ws, RADEON_REQUESTED_GTT_MEMORY) / 1024;
+
+	info->avail_device_memory =
+		vram_usage <= info->total_device_memory ?
+				info->total_device_memory - vram_usage : 0;
+	info->avail_staging_memory =
+		gtt_usage <= info->total_staging_memory ?
+				info->total_staging_memory - gtt_usage : 0;
+
+	info->device_memory_evicted =
+		ws->query_value(ws, RADEON_NUM_BYTES_MOVED) / 1024;
+
+	if (rscreen->info.drm_major == 3 && rscreen->info.drm_minor >= 4)
+		info->nr_device_memory_evictions =
+			ws->query_value(ws, RADEON_NUM_EVICTIONS);
+	else
+		/* Just return the number of evicted 64KB pages. */
+		info->nr_device_memory_evictions = info->device_memory_evicted / 64;
+}
+
 static void si_destroy_screen(struct pipe_screen* pscreen)
 {
 	struct si_screen *sscreen = (struct si_screen *)pscreen;
 	struct si_shader_part *parts[] = {
 		sscreen->vs_prologs,
 		sscreen->tcs_epilogs,
 		sscreen->gs_prologs,
 		sscreen->ps_prologs,
 		sscreen->ps_epilogs
 	};
@@ -861,20 +1198,21 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
 		while (parts[i]) {
 			struct si_shader_part *part = parts[i];
 
 			parts[i] = part->next;
 			si_radeon_shader_binary_clean(&part->binary);
 			FREE(part);
 		}
 	}
 	mtx_destroy(&sscreen->shader_parts_mutex);
 	si_destroy_shader_cache(sscreen);
+	disk_cache_destroy(sscreen->disk_shader_cache);
 	si_destroy_common_screen(&sscreen->b);
 }
 
 static bool si_init_gs_info(struct si_screen *sscreen)
 {
 	/* gs_table_depth is not used by GFX9 */
 	if (sscreen->b.chip_class >= GFX9)
 		return true;
 
 	switch (sscreen->b.family) {
@@ -977,34 +1315,71 @@ static void radeonsi_get_device_uuid(struct pipe_screen *pscreen, char *uuid)
 {
 	struct r600_common_screen *rscreen = (struct r600_common_screen *)pscreen;
 
 	ac_compute_device_uuid(&rscreen->info, uuid, PIPE_UUID_SIZE);
 }
 
 struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
 					   const struct pipe_screen_config *config)
 {
 	struct si_screen *sscreen = CALLOC_STRUCT(si_screen);
+	char family_name[32] = {}, llvm_string[32] = {}, kernel_version[128] = {};
 	unsigned num_threads, num_compiler_threads, num_compiler_threads_lowprio, i;
+	struct utsname uname_data;
+	const char *chip_name;
 
 	if (!sscreen) {
 		return NULL;
 	}
 
+
+	ws->query_info(ws, &sscreen->b.info);
+	sscreen->b.ws = ws;
+
+	if ((chip_name = si_get_marketing_name(ws)))
+		snprintf(family_name, sizeof(family_name), "%s / ",
+			 si_get_family_name(sscreen) + 4);
+	else
+		chip_name = si_get_family_name(sscreen);
+
+	if (uname(&uname_data) == 0)
+		snprintf(kernel_version, sizeof(kernel_version),
+			 " / %s", uname_data.release);
+
+	if (HAVE_LLVM > 0) {
+		snprintf(llvm_string, sizeof(llvm_string),
+			 ", LLVM %i.%i.%i", (HAVE_LLVM >> 8) & 0xff,
+			 HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH);
+	}
+
+	snprintf(sscreen->renderer_string, sizeof(sscreen->renderer_string),
+		 "%s (%sDRM %i.%i.%i%s%s)",
+		 chip_name, family_name, sscreen->b.info.drm_major,
+		 sscreen->b.info.drm_minor, sscreen->b.info.drm_patchlevel,
+		 kernel_version, llvm_string);
+
 	/* Set functions first. */
 	sscreen->b.b.context_create = si_pipe_create_context;
 	sscreen->b.b.destroy = si_destroy_screen;
+	sscreen->b.b.get_name = si_get_name;
+	sscreen->b.b.get_vendor = si_get_vendor;
+	sscreen->b.b.get_device_vendor = si_get_device_vendor;
+	sscreen->b.b.get_disk_shader_cache = si_get_disk_shader_cache;
+	sscreen->b.b.get_compute_param = si_get_compute_param;
+	sscreen->b.b.get_paramf = si_get_paramf;
 	sscreen->b.b.get_param = si_get_param;
 	sscreen->b.b.get_shader_param = si_get_shader_param;
 	sscreen->b.b.get_compiler_options = si_get_compiler_options;
 	sscreen->b.b.get_device_uuid = radeonsi_get_device_uuid;
 	sscreen->b.b.get_driver_uuid = radeonsi_get_driver_uuid;
+	sscreen->b.b.get_timestamp = si_get_timestamp;
+	sscreen->b.b.query_memory_info = si_query_memory_info;
 	sscreen->b.b.resource_create = si_resource_create_common;
 
 	si_init_screen_fence_functions(sscreen);
 	si_init_screen_state_functions(sscreen);
 
 	/* Set these flags in debug_flags early, so that the shader cache takes
 	 * them into account.
 	 */
 	if (driQueryOptionb(config->options,
 			    "glsl_correct_derivatives_after_discard"))
@@ -1012,20 +1387,22 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
 	if (driQueryOptionb(config->options, "radeonsi_enable_sisched"))
 		sscreen->b.debug_flags |= DBG(SI_SCHED);
 
 	if (!si_common_screen_init(&sscreen->b, ws) ||
 	    !si_init_gs_info(sscreen) ||
 	    !si_init_shader_cache(sscreen)) {
 		FREE(sscreen);
 		return NULL;
 	}
 
+	si_disk_cache_create(sscreen);
+
 	/* Only enable as many threads as we have target machines, but at most
 	 * the number of CPUs - 1 if there is more than one.
 	 */
 	num_threads = sysconf(_SC_NPROCESSORS_ONLN);
 	num_threads = MAX2(1, num_threads - 1);
 	num_compiler_threads = MIN2(num_threads, ARRAY_SIZE(sscreen->tm));
 	num_compiler_threads_lowprio =
 		MIN2(num_threads, ARRAY_SIZE(sscreen->tm_low_priority));
 
 	if (!util_queue_init(&sscreen->shader_compiler_queue, "si_shader",
@@ -1144,20 +1521,75 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
 		sscreen->b.debug_flags |= DBG_ALL_SHADERS;
 
 	for (i = 0; i < num_compiler_threads; i++)
 		sscreen->tm[i] = si_create_llvm_target_machine(sscreen);
 	for (i = 0; i < num_compiler_threads_lowprio; i++)
 		sscreen->tm_low_priority[i] = si_create_llvm_target_machine(sscreen);
 
 	/* Create the auxiliary context. This must be done last. */
 	sscreen->b.aux_context = si_create_context(&sscreen->b.b, 0);
 
+	if (sscreen->b.debug_flags & DBG(INFO)) {
+		printf("pci (domain:bus:dev.func): %04x:%02x:%02x.%x\n",
+		       sscreen->b.info.pci_domain, sscreen->b.info.pci_bus,
+		       sscreen->b.info.pci_dev, sscreen->b.info.pci_func);
+		printf("pci_id = 0x%x\n", sscreen->b.info.pci_id);
+		printf("family = %i (%s)\n", sscreen->b.info.family,
+		       si_get_family_name(sscreen));
+		printf("chip_class = %i\n", sscreen->b.info.chip_class);
+		printf("pte_fragment_size = %u\n", sscreen->b.info.pte_fragment_size);
+		printf("gart_page_size = %u\n", sscreen->b.info.gart_page_size);
+		printf("gart_size = %i MB\n", (int)DIV_ROUND_UP(sscreen->b.info.gart_size, 1024*1024));
+		printf("vram_size = %i MB\n", (int)DIV_ROUND_UP(sscreen->b.info.vram_size, 1024*1024));
+		printf("vram_vis_size = %i MB\n", (int)DIV_ROUND_UP(sscreen->b.info.vram_vis_size, 1024*1024));
+		printf("max_alloc_size = %i MB\n",
+		       (int)DIV_ROUND_UP(sscreen->b.info.max_alloc_size, 1024*1024));
+		printf("min_alloc_size = %u\n", sscreen->b.info.min_alloc_size);
+		printf("has_dedicated_vram = %u\n", sscreen->b.info.has_dedicated_vram);
+		printf("has_virtual_memory = %i\n", sscreen->b.info.has_virtual_memory);
+		printf("gfx_ib_pad_with_type2 = %i\n", sscreen->b.info.gfx_ib_pad_with_type2);
+		printf("has_hw_decode = %u\n", sscreen->b.info.has_hw_decode);
+		printf("num_sdma_rings = %i\n", sscreen->b.info.num_sdma_rings);
+		printf("num_compute_rings = %u\n", sscreen->b.info.num_compute_rings);
+		printf("uvd_fw_version = %u\n", sscreen->b.info.uvd_fw_version);
+		printf("vce_fw_version = %u\n", sscreen->b.info.vce_fw_version);
+		printf("me_fw_version = %i\n", sscreen->b.info.me_fw_version);
+		printf("me_fw_feature = %i\n", sscreen->b.info.me_fw_feature);
+		printf("pfp_fw_version = %i\n", sscreen->b.info.pfp_fw_version);
+		printf("pfp_fw_feature = %i\n", sscreen->b.info.pfp_fw_feature);
+		printf("ce_fw_version = %i\n", sscreen->b.info.ce_fw_version);
+		printf("ce_fw_feature = %i\n", sscreen->b.info.ce_fw_feature);
+		printf("vce_harvest_config = %i\n", sscreen->b.info.vce_harvest_config);
+		printf("clock_crystal_freq = %i\n", sscreen->b.info.clock_crystal_freq);
+		printf("tcc_cache_line_size = %u\n", sscreen->b.info.tcc_cache_line_size);
+		printf("drm = %i.%i.%i\n", sscreen->b.info.drm_major,
+		       sscreen->b.info.drm_minor, sscreen->b.info.drm_patchlevel);
+		printf("has_userptr = %i\n", sscreen->b.info.has_userptr);
+		printf("has_syncobj = %u\n", sscreen->b.info.has_syncobj);
+		printf("has_sync_file = %u\n", sscreen->b.info.has_sync_file);
+
+		printf("r600_max_quad_pipes = %i\n", sscreen->b.info.r600_max_quad_pipes);
+		printf("max_shader_clock = %i\n", sscreen->b.info.max_shader_clock);
+		printf("num_good_compute_units = %i\n", sscreen->b.info.num_good_compute_units);
+		printf("max_se = %i\n", sscreen->b.info.max_se);
+		printf("max_sh_per_se = %i\n", sscreen->b.info.max_sh_per_se);
+
+		printf("r600_gb_backend_map = %i\n", sscreen->b.info.r600_gb_backend_map);
+		printf("r600_gb_backend_map_valid = %i\n", sscreen->b.info.r600_gb_backend_map_valid);
+		printf("r600_num_banks = %i\n", sscreen->b.info.r600_num_banks);
+		printf("num_render_backends = %i\n", sscreen->b.info.num_render_backends);
+		printf("num_tile_pipes = %i\n", sscreen->b.info.num_tile_pipes);
+		printf("pipe_interleave_bytes = %i\n", sscreen->b.info.pipe_interleave_bytes);
+		printf("enabled_rb_mask = 0x%x\n", sscreen->b.info.enabled_rb_mask);
+		printf("max_alignment = %u\n", (unsigned)sscreen->b.info.max_alignment);
+	}
+
 	if (sscreen->b.debug_flags & DBG(TEST_DMA))
 		si_test_dma(&sscreen->b);
 
 	if (sscreen->b.debug_flags & (DBG(TEST_VMFAULT_CP) |
 				      DBG(TEST_VMFAULT_SDMA) |
 				      DBG(TEST_VMFAULT_SHADER)))
 		si_test_vmfault(sscreen);
 
 	return &sscreen->b.b;
 }
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 751441df1bc..a66f9da8658 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -99,20 +99,22 @@ struct si_screen {
 	bool				has_msaa_sample_loc_bug;
 	bool				has_ls_vgpr_init_bug;
 	bool				dpbb_allowed;
 	bool				dfsm_allowed;
 	bool				llvm_has_working_vgpr_indexing;
 
 	/* Whether shaders are monolithic (1-part) or separate (3-part). */
 	bool				use_monolithic_shaders;
 	bool				record_llvm_ir;
 
+	struct disk_cache		*disk_shader_cache;
+
 	mtx_t			shader_parts_mutex;
 	struct si_shader_part		*vs_prologs;
 	struct si_shader_part		*tcs_epilogs;
 	struct si_shader_part		*gs_prologs;
 	struct si_shader_part		*ps_prologs;
 	struct si_shader_part		*ps_epilogs;
 
 	/* Shader cache in memory.
 	 *
 	 * Design & limitations:
@@ -132,20 +134,22 @@ struct si_screen {
 	struct util_queue		shader_compiler_queue;
 	/* Use at most 3 normal compiler threads on quadcore and better.
 	 * Hyperthreaded CPUs report the number of threads, but we want
 	 * the number of cores. */
 	LLVMTargetMachineRef		tm[3]; /* used by the queue only */
 
 	struct util_queue		shader_compiler_queue_low_priority;
 	/* Use at most 2 low priority threads on quadcore and better.
 	 * We want to minimize the impact on multithreaded Mesa. */
 	LLVMTargetMachineRef		tm_low_priority[2]; /* at most 2 threads */
+
+	char				renderer_string[100];
 };
 
 struct si_blend_color {
 	struct r600_atom		atom;
 	struct pipe_blend_color		state;
 	bool				any_nonzeros;
 };
 
 struct si_sampler_view {
 	struct pipe_sampler_view	base;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 3edc340f01f..e1c70aaea26 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -199,61 +199,61 @@ static bool si_shader_cache_insert_shader(struct si_screen *sscreen,
 	hw_binary = si_get_shader_binary(shader);
 	if (!hw_binary)
 		return false;
 
 	if (_mesa_hash_table_insert(sscreen->shader_cache, tgsi_binary,
 				    hw_binary) == NULL) {
 		FREE(hw_binary);
 		return false;
 	}
 
-	if (sscreen->b.disk_shader_cache && insert_into_disk_cache) {
-		disk_cache_compute_key(sscreen->b.disk_shader_cache, tgsi_binary,
+	if (sscreen->disk_shader_cache && insert_into_disk_cache) {
+		disk_cache_compute_key(sscreen->disk_shader_cache, tgsi_binary,
 				       *((uint32_t *)tgsi_binary), key);
-		disk_cache_put(sscreen->b.disk_shader_cache, key, hw_binary,
+		disk_cache_put(sscreen->disk_shader_cache, key, hw_binary,
 			       *((uint32_t *) hw_binary), NULL);
 	}
 
 	return true;
 }
 
 static bool si_shader_cache_load_shader(struct si_screen *sscreen,
 					void *tgsi_binary,
 				        struct si_shader *shader)
 {
 	struct hash_entry *entry =
 		_mesa_hash_table_search(sscreen->shader_cache, tgsi_binary);
 	if (!entry) {
-		if (sscreen->b.disk_shader_cache) {
+		if (sscreen->disk_shader_cache) {
 			unsigned char sha1[CACHE_KEY_SIZE];
 			size_t tg_size = *((uint32_t *) tgsi_binary);
 
-			disk_cache_compute_key(sscreen->b.disk_shader_cache,
+			disk_cache_compute_key(sscreen->disk_shader_cache,
 					       tgsi_binary, tg_size, sha1);
 
 			size_t binary_size;
 			uint8_t *buffer =
-				disk_cache_get(sscreen->b.disk_shader_cache,
+				disk_cache_get(sscreen->disk_shader_cache,
 					       sha1, &binary_size);
 			if (!buffer)
 				return false;
 
 			if (binary_size < sizeof(uint32_t) ||
 			    *((uint32_t*)buffer) != binary_size) {
 				 /* Something has gone wrong discard the item
 				  * from the cache and rebuild/link from
 				  * source.
 				  */
 				assert(!"Invalid radeonsi shader disk cache "
 				       "item!");
 
-				disk_cache_remove(sscreen->b.disk_shader_cache,
+				disk_cache_remove(sscreen->disk_shader_cache,
 						  sha1);
 				free(buffer);
 
 				return false;
 			}
 
 			if (!si_load_shader_binary(shader, buffer)) {
 				free(buffer);
 				return false;
 			}
-- 
2.11.0