[Mesa-dev] [PATCH 16/22] intel/tools/BatchbufferLogger: first implementation
kevin.rogovin at intel.com
kevin.rogovin at intel.com
Mon Sep 25 10:34:16 UTC 2017
From: Kevin Rogovin <kevin.rogovin at intel.com>
Initial implementation of BatchbufferLogger; a tool
to correlate batchbuffer contents with GL/GLES API
calls without changing what is sent to the GPU.
Signed-off-by: Kevin Rogovin <kevin.rogovin at intel.com>
---
src/intel/Makefile.tools.am | 27 +
src/intel/tools/i965_batchbuffer_logger.cpp | 4221 ++++++++++++++++++++
.../tools/i965_batchbuffer_logger_instructions.h | 131 +
3 files changed, 4379 insertions(+)
create mode 100644 src/intel/tools/i965_batchbuffer_logger.cpp
create mode 100644 src/intel/tools/i965_batchbuffer_logger_instructions.h
diff --git a/src/intel/Makefile.tools.am b/src/intel/Makefile.tools.am
index 8071220..6e3e497 100644
--- a/src/intel/Makefile.tools.am
+++ b/src/intel/Makefile.tools.am
@@ -19,10 +19,15 @@
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
+intellibdir = $(libdir)
+
noinst_PROGRAMS += \
tools/aubinator \
tools/aubinator_error_decode
+intellib_LTLIBRARIES = \
+ tools/libi965_batchbuffer_logger.la
+
tools_aubinator_SOURCES = \
tools/aubinator.c \
tools/disasm.c \
@@ -63,3 +68,25 @@ tools_aubinator_error_decode_CFLAGS = \
$(AM_CFLAGS) \
$(EXPAT_CFLAGS) \
$(ZLIB_CFLAGS)
+
+tools_libi965_batchbuffer_logger_la_SOURCES = \
+ tools/i965_batchbuffer_logger.cpp \
+ tools/gen_disasm.h \
+ tools/disasm.c \
+ tools/intel_aub.h
+
+tools_libi965_batchbuffer_logger_la_LIBADD = \
+ common/libintel_common.la \
+ compiler/libintel_compiler.la \
+ $(top_builddir)/src/util/libmesautil.la \
+ $(top_builddir)/src/compiler/nir/libnir.la \
+ isl/libisl.la \
+ $(PTHREAD_LIBS) \
+ $(EXPAT_LIBS) \
+ $(ZLIB_LIBS)
+
+tools_libi965_batchbuffer_logger_la_CXXFLAGS = \
+ $(AM_CXXFLAGS) -std=c++11
+
+tools_libi965_batchbuffer_logger_la_LDFLAGS = \
+ -no-undefined -module -avoid-version -shared -shrext .so
diff --git a/src/intel/tools/i965_batchbuffer_logger.cpp b/src/intel/tools/i965_batchbuffer_logger.cpp
new file mode 100644
index 0000000..6300d18
--- /dev/null
+++ b/src/intel/tools/i965_batchbuffer_logger.cpp
@@ -0,0 +1,4221 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <mutex>
+#include <map>
+#include <vector>
+#include <string>
+#include <list>
+#include <sstream>
+#include <fstream>
+#include <iomanip>
+#include <algorithm>
+#include <cstring>
+#include <cstdio>
+#include <cstdlib>
+#include <typeinfo>
+#include <memory>
+#include <functional>
+
+#include <stdarg.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+#include <assert.h>
+#include <dlfcn.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <sys/mman.h>
+#include <pthread.h>
+
+#include "i965_batchbuffer_logger_instructions.h"
+#include "drm-uapi/i915_drm.h"
+#include "common/gen_decoder.h"
+#include "gen_disasm.h"
+#include "compiler/brw_inst.h"
+#include "util/mesa-sha1.h"
+
+#include "tools/i965_batchbuffer_logger_app.h"
+#include "tools/i965_batchbuffer_logger_output.h"
+#include "tools/i965_batchbuffer_logger.h"
+
+/* Basic overview of implementation:
+ * - BatchbufferLogger is a singleton to allow for calls into it
+ * without needing the object itself
+ *
+ * - Using the driver provided function pointer, it "knows" what
+ * is considered the active batchbuffer
+ * * NOTE: before being initialized by a driver, the function
+ * pointer specifying the active batchbuffer returns a value
+ * indicating that there is no active batchbuffer
+ *
+ * - BatchbufferLogger has a map keyed by file descriptor of
+ * GEMBufferTracker objects. A GEMBufferTracker has within it
+ * * a map keyed by GEM BO handle of GEMBufferObjects
+ * * a map keyed by GEM BO handle of BatchbufferLog
+ * * a dummy BatchbufferLog object
+ *
+ * - A BatchbufferLog object is essentially a log of what
+ * API calls are made when in a batchbuffer
+ *
+ * - A BatchbufferLog object is removed when any of the
+ * following occur:
+ * * The driver calls aborted_batchbuffer(); upon removal,
+ * the log is emitted.
+ * * at drmIoctl, the GEM BO is used as the command buffer in
+ * an execbuffer ioctl. Upon this ioctl, the associated
+ * BatchbufferLog is emitted.
+ *
+ * - A BatchbufferLog object is added to a GEMBufferTracker
+ * whenever a GEM BO handle not seen before is emitted by
+ * the function pointer provided by the driver that gives the
+ * active batchbuffer.
+ *
+ * - Whenever an entry is added to a BatchbufferLog object A,
+ * if there are any entries in the dummy BatchbufferLog
+ * those entries are moved to the BatchbufferLog A in a way
+ * so that when BatchbufferLog is printed to file, the entries
+ * from dummy come first.
+ *
+ * - The following environmental values control what is written to
+ * file:
+ * * I965_DECODE_LEVEL controls the level of batchbuffer decoding
+ * - no_decode do not decode batchbuffer at all
+ * - instruction_decode decode instruction name only
+ * - instruction_details_decode decode instruction contents
+ * * I965_PRINT_RELOC_LEVEL controls at what level to print reloc data
+ * - print_reloc_nothing do not print reloc data
+ * - print_reloc_gem_gpu_updates print reloc data GEM by GEM
+ * * I965_DECODE_VS : if 0, vertex shader binaries are written to file;
+ * otherwise their disassembly is emitted
+ * * I965_DECODE_HS : if 0, hull shader binaries are written to file;
+ * otherwise their disassembly is emitted
+ * * I965_DECODE_DS : if 0, domain shader binaries are written to file;
+ * otherwise their disassembly is emitted
+ * * I965_DECODE_GS : if 0, geomtry shader binaries are written to file;
+ * otherwise their disassembly is emitted
+ * * I965_DECODE_PS8 : if 0, 8-wide pixel shader binaries are written
+ * to file; otherwise their disassembly is emitted
+ * * I965_DECODE_PS16 : if 0, 16-wide pixel shader binaries are written
+ * to file; otherwise their disassembly is emitted
+ * * I965_DECODE_PS32 : if 0, 32-wide pixel shader binaries are written
+ * to file; otherwise their disassembly is emitted
+ * * I965_DECODE_CS : if 0, media/compute shader binaries are written to
+ * file; otherwise their disassembly is emitted
+ */
+
+namespace {
+
+bool
+is_header_field(struct gen_group *group, struct gen_field *field)
+{
+ uint32_t bits;
+
+ if (field->start >= 32)
+ return false;
+
+ bits = (1U << (field->end - field->start + 1)) - 1;
+ bits <<= field->start;
+
+ return (group->opcode_mask & bits) != 0;
+}
+
+template<typename T>
+T
+read_from_environment(const char *env, T default_value)
+{
+ const char *tmp;
+ T return_value(default_value);
+
+ tmp = std::getenv(env);
+ if (tmp != nullptr) {
+ std::string str(tmp);
+ std::istringstream istr(tmp);
+ istr >> return_value;
+ }
+
+ return return_value;
+}
+
+class BatchbufferLoggerOutput;
+class APIStartCallMarker;
+class GEMBufferObject;
+class GPUCommandFieldValue;
+class GPUAddressQuery;
+class GPUCommand;
+class i965LatchState;
+class i965Registers;
+class i965HWContextData;
+class GPUState;
+class BatchRelocs;
+class ShaderFileList;
+class BatchbufferDecoder;
+class BatchbufferLog;
+class GEMBufferTracker;
+
+/* BatchbufferLoggerOutput
+ */
+class BatchbufferLoggerOutput {
+public:
+ explicit
+ BatchbufferLoggerOutput(void):
+ m_file(nullptr),
+ m_current_block_level(0)
+ {}
+
+ ~BatchbufferLoggerOutput();
+
+ void
+ open(const char *filename);
+
+ void
+ close(void);
+
+ operator bool() const
+ {
+ return m_file != nullptr;
+ }
+
+ void
+ begin_block(const char *txt);
+
+ void
+ begin_block_value(const char *txt, const char *fmt, ...);
+
+ void
+ vbegin_block_value(const char *txt, const char *fmt, va_list va);
+
+ void
+ end_block(void);
+
+ void
+ clear_block_stack(unsigned int desired_depth = 0);
+
+ unsigned int
+ current_block_level(void)
+ {
+ return m_current_block_level;
+ }
+
+ const std::string&
+ filename(void) const
+ {
+ return m_filename;
+ }
+
+ void
+ print_value(const char *name, const char *fmt, ...);
+
+ void
+ vprint_value(const char *name, const char *fmt, va_list va);
+
+ template<typename F>
+ void
+ functor_print_value(const char *name, F f, bool pre_emit_eol);
+
+private:
+ BatchbufferLoggerOutput(const BatchbufferLoggerOutput &obj) = delete;
+
+ BatchbufferLoggerOutput&
+ operator=(const BatchbufferLoggerOutput &rhs) = delete;
+
+ void
+ write_name_value(enum i965_batchbuffer_logger_message_type_t tp,
+ const char *name, const char *fmt,
+ va_list va);
+
+ std::FILE *m_file;
+ std::string m_filename;
+ unsigned int m_current_block_level;
+};
+
+/* An APIStartCallMarker gives the details of an API call
+ * together with "where" in the batchbuffer the API
+ * call started.
+ */
+class APIStartCallMarker {
+public:
+ APIStartCallMarker(int call_id,
+ bool print_element,
+ const char *api_call,
+ const char *api_call_details,
+ uint32_t t):
+ m_call_id(call_id),
+ m_api_call(api_call),
+ m_api_call_details(api_call_details),
+ m_start_bb_location(t),
+ m_print_element(print_element)
+ {}
+
+ /* on emit, changes value of m_print_element to false
+ * so that the element is not emitted again, returns
+ * true if m_print_element was true at call entry
+ */
+ bool
+ emit(uint32_t next_entry_start_bb_location,
+ BatchbufferLoggerOutput &dst, unsigned int top_level);
+
+ uint32_t
+ start_bb_location(void) const
+ {
+ return m_start_bb_location;
+ }
+
+ int
+ call_id(void) const
+ {
+ return m_call_id;
+ }
+
+ void
+ add_ioctl_log_entry(const std::string &entry)
+ {
+ m_ioctl_log.push_back(entry);
+ }
+
+ static
+ void
+ print_ioctl_log(const std::list<std::string> &ioctl_log,
+ BatchbufferLoggerOutput &dst);
+
+private:
+ /* the ID number for the call */
+ int m_call_id;
+
+ /* name of the API call */
+ std::string m_api_call;
+
+ /* details of the API call */
+ std::string m_api_call_details;
+
+ /* location in the batchbuffer at the time
+ * the marker was made.
+ */
+ uint32_t m_start_bb_location;
+
+ /* true if element is within the print window */
+ bool m_print_element;
+
+ /* additional log-messages that come from ioctl's */
+ std::list<std::string> m_ioctl_log;
+};
+
+class GEMBufferObject {
+public:
+ /* Value passed is the value AFTER the ioctl
+ * DRM_IOCTL_I915_GEM_CREATE; the kernel passes
+ * pack the struct modified
+ */
+ explicit
+ GEMBufferObject(int fd, const struct drm_i915_gem_create &pdata);
+
+ /* Value passed is the value AFTER the ioctl
+ * DRM_IOCTL_I915_GEM_CREATE; the kernel passes
+ * pack the struct modified
+ */
+ explicit
+ GEMBufferObject(int fd, const struct drm_i915_gem_userptr &pdata);
+
+ /* To be called -BEFORE- the ioctl DRM_IOCTL_GEM_CLOSE of
+ * the GEM
+ */
+ ~GEMBufferObject();
+
+ /* Handle to the GEM BO */
+ uint32_t
+ handle(void) const
+ {
+ return m_handle;
+ }
+
+ /* size of GEM BO in bytes */
+ uint64_t
+ size(void) const
+ {
+ return m_size;
+ }
+
+ /* If underlying GEM BO was created with DRM_IOCTL_I915_GEM_USERPTR,
+ * then returns the CPU address of the underlying memory
+ */
+ const void*
+ user_ptr(void) const
+ {
+ return m_user_ptr;
+ }
+
+ /* GPU address of GEM BO, note that until
+ * update_gpu_address() is called the value
+ * is 0, which is guaranteed to be incorrect.
+ */
+ uint64_t
+ gpu_address_begin(void) const
+ {
+ return m_gpu_address;
+ }
+
+ /* Gives the GPU address for the very end of the BO */
+ uint64_t
+ gpu_address_end(void) const
+ {
+ return m_size + m_gpu_address;
+ }
+
+ void
+ update_gpu_address(uint64_t new_gpu_address)
+ {
+ m_gpu_address = new_gpu_address;
+ }
+
+ template<typename T = void>
+ const T*
+ cpu_mapped(void) const
+ {
+ return static_cast<const T*>(m_mapped);
+ }
+
+ int
+ pread_buffer(void *dst, uint64_t start, uint64_t sz) const;
+
+private:
+ /* File descriptor of ioctl to make GEM BO */
+ int m_fd;
+
+ uint32_t m_handle;
+ uint64_t m_size;
+ const uint8_t *m_user_ptr;
+
+ /* The buffer mapped; there is a danger that mapping
+ * the buffer without sufficient cache flushing
+ * will give incorrect data; on the other hand,
+ * the gen_decoder interface wants raw pointers
+ * from which to read. Let's hope that cache
+ * flushing is not needed for reading the contents.
+ */
+ void *m_mapped;
+
+ /* the location in the GPU address space of the GEM
+ * object, this is updated by the kernel in the
+ * value drm_i915_gem_exec_object2::offset
+ */
+ uint64_t m_gpu_address;
+};
+
+/* class to extract a value from a gen_field_iterator */
+class GPUCommandFieldValue {
+public:
+ explicit
+ GPUCommandFieldValue(const gen_field_iterator &iter);
+
+ template<typename T>
+ T
+ value(void) const;
+
+ /**
+ * Returns the gen_type as indicated by the gen_field_iterator
+ * used to constructor, value is an (unnamed) enumeration of
+ * gen_type.
+ */
+ unsigned int
+ type(void) const
+ {
+ return m_gen_type;
+ }
+
+private:
+ /* enum values from the unnamed enum in gen_field::type::kind */
+ unsigned int m_gen_type;
+
+ union {
+ /* for types GEN_TYPE_FLOAT, GEN_TYPE_UFIXED and GEN_TYPE_SFIXED */
+ float f;
+
+ /* for type GEN_TYPE_INT */
+ int64_t i;
+
+ /* for types GEN_TYPE_UNKNOWN, GEN_TYPE_UINT,
+ * GEN_TYPE_ADDRESS, GEN_TYPE_OFFSET, GEN_TYPE_ENUM
+ */
+ uint64_t u;
+
+ /* for GEN_TYPE_BOOL
+ */
+ bool b;
+ } m_value;
+
+ /* field extraction routines and helpers taken from
+ * gen_decoder.c
+ */
+ static
+ uint64_t
+ mask(int start, int end)
+ {
+ uint64_t v;
+ v = ~0ULL >> (63 - end + start);
+ return v << start;
+ }
+
+ static
+ void
+ get_start_end_pos(int *start, int *end)
+ {
+ if (*end - *start > 32) {
+ int len = *end - *start;
+ *start = *start % 32;
+ *end = *start + len;
+ } else {
+ *start = *start % 32;
+ *end = *end % 32;
+ }
+ }
+
+ template<typename T>
+ static
+ T
+ field(uint64_t value, int start, int end)
+ {
+ uint64_t v;
+ get_start_end_pos(&start, &end);
+ v = (value & mask(start, end)) >> (start);
+ return static_cast<T>(v);
+ }
+
+ static
+ uint64_t
+ field_address(uint64_t value, int start, int end)
+ {
+ get_start_end_pos(&start, &end);
+ return (value & mask(start, end));
+ }
+};
+
+
+/* Return results for getting the GEMBufferObject
+ * and offset into the GEMBufferObject of a GPU
+ * address
+ */
+class GPUAddressQuery {
+public:
+ GPUAddressQuery(void):
+ m_gem_bo(nullptr),
+ m_offset_into_gem_bo(-1)
+ {}
+
+ GEMBufferObject *m_gem_bo;
+ uint64_t m_offset_into_gem_bo;
+};
+
+/* A GPUCommand is a location within a GEM BO
+ * specifying where a GPU command is.
+ */
+class GPUCommand {
+public:
+ /* when saving GPUCommand's that set GPU state, we key
+ * the value by the op-code of the GPU command.
+ */
+ typedef uint32_t state_key;
+
+ /* what we do with the GPUCommand on absorbing it:
+ * - save the value as state and do not print it immediately
+ * - print it immediately and show current GPU state
+ * - print it immediately and do now show current GPU state
+ */
+ enum gpu_command_type_t {
+ gpu_command_save_value_as_state_hw_context,
+ gpu_command_save_value_as_state_not_hw_context,
+ gpu_command_set_register,
+ gpu_command_show_value_with_gpu_state,
+ gpu_command_show_value_without_gpu_state,
+ };
+
+ /* only defined for gpu_decode_type_t values
+ * gpu_save_value_as_state and gpu_show_value_with_gpu_state
+ */
+ enum gpu_pipeline_type_t {
+ gpu_pipeline_compute,
+ gpu_pipeline_gfx,
+ };
+
+ GPUCommand(void);
+
+ /* if grp is nullptr, then read use spec and the contents
+ * at the location to figure out what is the GPU command.
+ */
+ GPUCommand(const GEMBufferObject *q, uint64_t dword_offset,
+ struct gen_spec *spec, struct gen_group *grp = nullptr);
+
+ GPUCommand(const GPUAddressQuery &q, struct gen_spec *spec,
+ struct gen_group *grp = nullptr);
+
+ const uint32_t*
+ contents_ptr(void) const
+ {
+ return m_contents;
+ }
+
+ uint32_t
+ operator[](unsigned int I) const
+ {
+ assert(I < contents_size());
+ return m_contents[I];
+ }
+
+ uint32_t
+ content(unsigned int I) const
+ {
+ assert(I < contents_size());
+ return m_contents[I];
+ }
+
+ unsigned int
+ contents_size(void) const
+ {
+ return m_dword_length;
+ }
+
+ struct gen_group*
+ inst(void) const
+ {
+ return m_inst;
+ }
+
+ const GEMBufferObject*
+ gem_bo(void) const
+ {
+ return m_gem_bo;
+ }
+
+ uint64_t
+ offset(void) const
+ {
+ return m_gem_bo_offset;
+ }
+
+ uint64_t
+ dword_offset(void) const
+ {
+ return offset() / sizeof(uint32_t);
+ }
+
+ enum gpu_command_type_t
+ gpu_command_type(void) const
+ {
+ return m_command_type;
+ }
+
+ enum gpu_pipeline_type_t
+ gpu_pipeline_type(void) const
+ {
+ return m_pipeline_type;
+ }
+
+ /* read a GPU address from a location within the GPUCommand */
+ uint64_t
+ get_gpu_address(const BatchRelocs &relocs,
+ uint64_t dword_offset_from_cmd_start,
+ bool ignore_lower_12_bits = true) const;
+
+ /* Sets up the GPUCommand to read data from an internal storage
+ * instead of from the GEM BO.
+ */
+ void
+ archive_data(const BatchRelocs &relocs);
+
+ /* Returns true if and only if the GPUCommand is reading data
+ * from internal storage instead of from the GEM BO.
+ */
+ bool
+ is_archived(void) const
+ {
+ return m_archived_data.size() == m_dword_length;
+ }
+
+ /* Extract the value of a field from a GPUCommand, saving
+ * the value in dst. Returns true on success and false
+ * on failure.
+ */
+ template<typename T>
+ bool
+ extract_field_value(const char *pname, T *dst) const;
+
+private:
+ static
+ enum gpu_command_type_t
+ get_gpu_command_type(struct gen_group *inst);
+
+ static
+ enum gpu_pipeline_type_t
+ get_gpu_pipeline_type(struct gen_group *inst);
+
+ void
+ complete_init(uint32_t dword_offset, struct gen_spec *spec,
+ struct gen_group *grp);
+
+ const GEMBufferObject *m_gem_bo;
+ uint64_t m_gem_bo_offset;
+ struct gen_group *m_inst;
+ const uint32_t *m_contents;
+ unsigned int m_dword_length;
+ enum gpu_command_type_t m_command_type;
+ enum gpu_pipeline_type_t m_pipeline_type;
+ std::vector<uint32_t> m_archived_data;
+};
+
+/* A significant amount of state on i965 depends deeply on other
+ * portions of state for decoding. The biggest example being
+ * the values in STATE_BASE_ADDRESS.
+ */
+class i965LatchState {
+public:
+ class per_stage_values {
+ public:
+ per_stage_values(void):
+ m_binding_table_count(-1),
+ m_sampler_count(-1)
+ {}
+
+ int m_binding_table_count;
+ int m_sampler_count;
+ };
+
+ i965LatchState(void);
+
+ void
+ update_state(BatchbufferDecoder *decoder, BatchbufferLoggerOutput &pfile,
+ const GPUCommand &q);
+
+ /* Tracking STATE_BASE_ADDRESS */
+ uint64_t m_general_state_base_address;
+ uint64_t m_surface_state_base_address;
+ uint64_t m_dynamic_state_base_address;
+ uint64_t m_instruction_base_address;
+
+ /* value derived from 3D_STATE_XS */
+ int m_VIEWPORT_count;
+ per_stage_values m_VS, m_HS, m_DS, m_GS, m_PS, m_CS;
+
+private:
+ void
+ update_stage_values(BatchbufferDecoder *decoder,
+ BatchbufferLoggerOutput &pfile,
+ const GPUCommand &q, per_stage_values *dst);
+
+ static
+ void
+ update_state_base_address_helper(const GPUCommand &q,
+ const char *value_enabled_name,
+ uint64_t *dst, const char *value_name);
+
+ void
+ update_state_base_address(BatchbufferDecoder *decoder,
+ BatchbufferLoggerOutput &pfile,
+ const GPUCommand &q);
+};
+
+/* A simple container to track the value of registers.
+ */
+class i965Registers {
+public:
+ i965Registers(void)
+ {}
+
+ void
+ update_state(BatchbufferDecoder *decoder, BatchbufferLoggerOutput &pfile,
+ const GPUCommand &q);
+
+ void
+ decode_contents(BatchbufferDecoder *decoder,
+ enum GPUCommand::gpu_pipeline_type_t pipeline,
+ BatchbufferLoggerOutput &pfile);
+
+private:
+ /* register values are part of state, the key
+ * to the map is the register offset and the value
+ * is the value of the register.
+ */
+ std::map<uint32_t, uint32_t> m_register_values;
+};
+
+/* The execbuffer2 ioctls, (DRM_IOCTL_I915_GEM_EXECBUFFER2
+ * and DRM_IOCTL_I915_GEM_EXECBUFFER2_WR) can pass a HW
+ * context (via a uint32_t). When a driver uses a HW context,
+ * it can avoid sending large amounts of state commands to
+ * restore state. However, when we decode a batchbuffer,
+ * we need to record HW state that impacts decoding
+ * batchbuffers. The Bspec page to examine for what is
+ * saved and restored in a HW context is at
+ * gfxspecs.intel.com/Predator/Home/Index/20855
+ */
+class i965HWContextData {
+public:
+ explicit
+ i965HWContextData(uint32_t ctx_id);
+ ~i965HWContextData();
+
+ void
+ decode_contents(BatchbufferDecoder *decoder,
+ enum GPUCommand::gpu_pipeline_type_t pipeline,
+ BatchbufferLoggerOutput &pfile);
+
+ void
+ update_state(BatchbufferDecoder *decoder, BatchbufferLoggerOutput &pfile,
+ const GPUCommand &Q);
+
+ /* Batchbuffer decoding needs to examine and change
+ * the values in i965LatchState when decoding some
+ * elements of state.
+ */
+ i965LatchState m_latch_state;
+
+private:
+ uint32_t m_ctx_id;
+ std::map<GPUCommand::state_key, GPUCommand> m_state;
+ i965Registers m_registers;
+};
+
+class GPUState {
+public:
+ explicit
+ GPUState(i965HWContextData *ctx):
+ m_ctx_data(ctx)
+ {}
+
+ void
+ update_state(BatchbufferDecoder *decoder, BatchbufferLoggerOutput &pfile,
+ const GPUCommand &Q);
+
+ void
+ decode_contents(BatchbufferDecoder *decoder,
+ enum GPUCommand::gpu_pipeline_type_t pipeline,
+ BatchbufferLoggerOutput &pfile);
+
+ i965HWContextData&
+ ctx(void) const
+ {
+ return *m_ctx_data;
+ }
+
+private:
+ /* holder for state of the GW context */
+ i965HWContextData *m_ctx_data;
+
+ /* state that is not saved in the HW context */
+ std::map<GPUCommand::state_key, GPUCommand> m_state;
+ i965Registers m_registers;
+};
+
+/* A BatchRelocs tracks the relocation data reported back
+ * from the kernel after an ioctl
+ */
+class BatchRelocs {
+public:
+ explicit
+ BatchRelocs(gen_spec *spec):
+ m_32bit_gpu_addresses(spec && gen_spec_get_gen(spec) < gen_make_gen(8, 0))
+ {
+ }
+
+ void
+ add_entry(const GEMBufferObject *gem,
+ uint64_t offset_into_gem,
+ uint64_t gpu_address)
+ {
+ m_relocs[gem][offset_into_gem] = gpu_address;
+ }
+
+ /* Write into dst any relocations found from the given GEM,
+ * with dst representing the offset in -bytes- from the start
+ * of the GEM.
+ */
+ void
+ place_relocation_values_into_buffer(const GEMBufferObject *gem, uint64_t gem_bo_offset,
+ std::vector<uint32_t> *dst) const;
+
+ /* Decode/get a GPU address from a location in a GEMBufferObject
+ * - dword_offset in units of uint32_t's
+ * - if ignore_lower_12_bts is true, then low 12-bits of the
+ * passed gpu-address are ignored and the fetch is as if
+ * they are zero
+ */
+ uint64_t
+ get_gpu_address(const GEMBufferObject *q, uint64_t dword_offset,
+ const uint32_t *p, bool ignore_lower_12_bits = true) const;
+
+
+ void
+ emit_reloc_data(BatchbufferLoggerOutput &pfile);
+
+private:
+ bool m_32bit_gpu_addresses;
+
+ /* m_relocs[p] gives how to potentially reinterpret GPU addresses
+ * when reading from buffer object p. That list is an std::map
+ * keyed by offsets into p with values as the correct address
+ * at that offset.
+ */
+ typedef std::map<uint64_t, uint64_t> reloc_map_of_gem_bo;
+ typedef std::map<const GEMBufferObject*, reloc_map_of_gem_bo> reloc_map;
+ reloc_map m_relocs;
+};
+
+/* A ShaderFileList acts a map from shaders to filenames.
+ * A hash value is used as the key of the map. If contents
+ * of a shader are not found, then a new entry is made.
+ */
+class ShaderFileList
+{
+public:
+ ShaderFileList(void):
+ m_count(0)
+ {}
+
+ const char*
+ filename(const std::string &fileprefix, const void *shader,
+ int pciid, struct gen_disasm *gen_disasm);
+
+ void
+ clear(void)
+ {
+ m_count = 0;
+ m_files.clear();
+ }
+
+private:
+ typedef std::array<unsigned char, 20> sha1_value;
+
+ ShaderFileList(const ShaderFileList&) = delete;
+
+ ShaderFileList&
+ operator=(const ShaderFileList &rhs) = delete;
+
+ int m_count;
+ std::map<sha1_value, std::string> m_files;
+};
+
+/* A BatchbufferDecoder assists in the decoding the contents
+ * of a batchbuffer, using the machinery in a GEMBufferTracker
+ * to correctly read the contents of indirect state.
+ */
+class BatchbufferDecoder {
+public:
+ enum decode_level_t {
+ no_decode,
+ instruction_decode,
+ instruction_details_decode
+ };
+
+ enum print_reloc_level_t {
+ print_reloc_nothing,
+ print_reloc_gem_gpu_updates,
+ };
+
+ /* enumeration that gives what bit on shader decode */
+ enum shader_decode_entry_t {
+ shader_decode_vs,
+ shader_decode_hs,
+ shader_decode_ds,
+ shader_decode_gs,
+ shader_decode_ps_8,
+ shader_decode_ps_16,
+ shader_decode_ps_32,
+ shader_decode_media_compute,
+
+ shader_decode_entry_count,
+ };
+
+ BatchbufferDecoder(enum decode_level_t decode_level,
+ enum print_reloc_level_t print_reloc_level,
+ uint32_t shader_decode_flags,
+ struct gen_spec *spec,
+ struct gen_disasm *dis,
+ int pciid,
+ GEMBufferTracker *tracker,
+ ShaderFileList *shader_filelist,
+ struct drm_i915_gem_execbuffer2 *execbuffer2);
+
+ void
+ absorb_batchbuffer_contents(bool printing_enabled,
+ BatchbufferLoggerOutput &pfile,
+ unsigned int start_dword, unsigned int end_dword);
+
+ void
+ decode_gpu_command(BatchbufferLoggerOutput &pfile, const GPUCommand &q);
+
+ const GEMBufferTracker&
+ tracker(void) const
+ {
+ return *m_tracker;
+ }
+
+ const GEMBufferObject*
+ batchbuffer(void)
+ {
+ return m_batchbuffer;
+ }
+
+ const BatchRelocs&
+ relocs(void) const
+ {
+ return m_relocs;
+ }
+
+ BatchbufferLog*
+ batchbuffer_log(void)
+ {
+ return m_batchbuffer_log;
+ }
+
+ struct gen_spec*
+ spec(void) const
+ {
+ return m_spec;
+ }
+
+ void
+ emit_log(BatchbufferLoggerOutput &file);
+
+private:
+ class DetailedDecoder
+ {
+ public:
+ static
+ void
+ decode(BatchbufferDecoder *decoder, BatchbufferLoggerOutput &pfile,
+ const GPUCommand &data);
+
+ private:
+ typedef void (BatchbufferDecoder::*fcn)(BatchbufferLoggerOutput &pfile,
+ const GPUCommand &data);
+
+ DetailedDecoder(void);
+
+ DetailedDecoder(const DetailedDecoder &obj) = delete;
+
+ DetailedDecoder&
+ operator=(const DetailedDecoder &rhs) = delete;
+
+ /* keyed by op-code */
+ std::map<uint32_t, fcn> m_elements;
+ };
+
+ void
+ build_driver_values(void);
+
+ void
+ decode_gen_group(BatchbufferLoggerOutput &pfile,
+ const GEMBufferObject *q, uint64_t offset,
+ const uint32_t *p, struct gen_group *inst);
+
+ void
+ decode_gpu_execute_command(BatchbufferLoggerOutput &pfile,
+ const GPUCommand &q);
+
+ void
+ process_gpu_command(bool printing_enabled,
+ BatchbufferLoggerOutput &pfile,
+ const GPUCommand &q);
+
+ void
+ decode_pointer_helper(BatchbufferLoggerOutput &pfile,
+ struct gen_group *g, uint64_t gpu_address);
+
+ void
+ decode_pointer_helper(BatchbufferLoggerOutput &pfile,
+ const char *instruction_name,
+ uint64_t gpu_address);
+
+ void
+ decode_shader(BatchbufferLoggerOutput &pfile,
+ enum shader_decode_entry_t tp, uint64_t gpu_address);
+
+ void
+ decode_3dstate_binding_table_pointers(BatchbufferLoggerOutput &pfile,
+ const std::string &label, uint32_t offset,
+ int cnt);
+
+ void
+ decode_3dstate_sampler_state_pointers_helper(BatchbufferLoggerOutput &pfile,
+ uint32_t offset, int cnt);
+
+ void
+ decode_media_interface_descriptor_load(BatchbufferLoggerOutput &pfile,
+ const GPUCommand &data);
+
+ void
+ decode_3dstate_xs(BatchbufferLoggerOutput &pfile,
+ const GPUCommand &data);
+
+ void
+ decode_3dstate_ps(BatchbufferLoggerOutput &pfile,
+ const GPUCommand &data);
+
+ void
+ decode_3dstate_constant(BatchbufferLoggerOutput &pfile,
+ const GPUCommand &data);
+
+ void
+ decode_3dstate_binding_table_pointers_vs(BatchbufferLoggerOutput &pfile,
+ const GPUCommand &data);
+
+ void
+ decode_3dstate_binding_table_pointers_ds(BatchbufferLoggerOutput &pfile,
+ const GPUCommand &data);
+
+ void
+ decode_3dstate_binding_table_pointers_hs(BatchbufferLoggerOutput &pfile,
+ const GPUCommand &data);
+
+ void
+ decode_3dstate_binding_table_pointers_gs(BatchbufferLoggerOutput &pfile,
+ const GPUCommand &data);
+
+ void
+ decode_3dstate_binding_table_pointers_ps(BatchbufferLoggerOutput &pfile,
+ const GPUCommand &data);
+
+ void
+ decode_3dstate_sampler_state_pointers_vs(BatchbufferLoggerOutput &pfile,
+ const GPUCommand &data);
+
+ void
+ decode_3dstate_sampler_state_pointers_gs(BatchbufferLoggerOutput &pfile,
+ const GPUCommand &data);
+
+ void
+ decode_3dstate_sampler_state_pointers_hs(BatchbufferLoggerOutput &pfile,
+ const GPUCommand &data);
+
+ void
+ decode_3dstate_sampler_state_pointers_ds(BatchbufferLoggerOutput &pfile,
+ const GPUCommand &data);
+
+ void
+ decode_3dstate_sampler_state_pointers_ps(BatchbufferLoggerOutput &pfile,
+ const GPUCommand &data);
+
+ void
+ decode_3dstate_sampler_state_pointers_gen6(BatchbufferLoggerOutput &pfile,
+ const GPUCommand &data);
+
+ void
+ decode_3dstate_viewport_state_pointers_cc(BatchbufferLoggerOutput &pfile,
+ const GPUCommand &data);
+
+ void
+ decode_3dstate_viewport_state_pointers_sf_clip(BatchbufferLoggerOutput &pfile,
+ const GPUCommand &data);
+
+ void
+ decode_3dstate_blend_state_pointers(BatchbufferLoggerOutput &pfile,
+ const GPUCommand &data);
+
+ void
+ decode_3dstate_cc_state_pointers(BatchbufferLoggerOutput &pfile,
+ const GPUCommand &data);
+
+ void
+ decode_3dstate_scissor_state_pointers(BatchbufferLoggerOutput &pfile,
+ const GPUCommand &data);
+
+ enum decode_level_t m_decode_level;
+ enum print_reloc_level_t m_print_reloc_level;
+ uint32_t m_shader_decode_flags;
+ struct gen_spec *m_spec;
+ struct gen_disasm *m_gen_disasm;
+ int m_pci_id;
+ GEMBufferTracker *m_tracker;
+ ShaderFileList *m_shader_filelist;
+ const GEMBufferObject *m_batchbuffer;
+ BatchbufferLog *m_batchbuffer_log;
+ std::vector<GEMBufferObject*> m_buffers;
+ bool m_reloc_handles_are_indices;
+ GPUState m_gpu_state;
+ BatchRelocs m_relocs;
+ struct drm_i915_gem_execbuffer2 *m_execbuffer2;
+};
+
+/* The type to hold the log associated to a single batchbuffer
+ */
+class BatchbufferLog {
+public:
+ BatchbufferLog(int fd, const void *driver_data, uint32_t h)
+ {
+ m_src.gem_bo = h;
+ m_src.fd = fd;
+ m_src.driver_data = driver_data;
+ }
+
+ const struct i965_logged_batchbuffer*
+ src(void) const
+ {
+ return &m_src;
+ }
+
+ bool //returns true if the last entry in the list was printed to the file
+ emit_log(BatchbufferDecoder *decoder, BatchbufferLoggerOutput &file,
+ uint32_t batchbuffer_len);
+
+ void
+ add_call_marker(bool should_print,
+ BatchbufferLog &dummy, unsigned int call_id,
+ const char *fcn_name, const char *call_detailed,
+ uint32_t bb_location)
+ {
+ if (this != &dummy) {
+ m_prints_from_dummy.splice(m_prints_from_dummy.end(),
+ dummy.m_prints);
+ }
+ APIStartCallMarker ap(call_id, should_print, fcn_name, call_detailed,
+ bb_location);
+ m_prints.push_back(ap);
+ }
+
+ void
+ clear(void)
+ {
+ m_prints.clear();
+ m_prints_from_dummy.clear();
+ }
+
+ bool
+ empty(void) const
+ {
+ return m_prints.empty() && m_prints_from_dummy.empty();
+ }
+
+ void
+ add_ioctl_log_entry(const std::string &entry);
+
+private:
+ void
+ handle_batchbuffer_contents(bool print_enabled,
+ BatchbufferDecoder *decoder, BatchbufferLoggerOutput &dst,
+ uint32_t start, uint32_t end);
+
+ friend class GEMBufferTracker;
+
+ /* src parameters of the BatchbufferLog object */
+ struct i965_logged_batchbuffer m_src;
+
+ /* API markers of the batchbuffer */
+ std::list<APIStartCallMarker> m_prints;
+
+ /* For the markers emmitted when there is not active
+ * batchbuffer land in BatchbufferLogger::m_dummy.
+ * The first time BatchbufferLogger has a valid batch
+ * buffer, the merkers of m_dummy are spliced onto
+ * those batchbuffer's log here.
+ */
+ std::list<APIStartCallMarker> m_prints_from_dummy;
+
+ /* when an ioctl log entry is added but there are no
+ * APIStartCallMarker to which to add it.
+ */
+ std::list<std::string> m_orphan_ioctl_log_entries;
+};
+
+class GEMBufferTracker {
+public:
+ explicit
+ GEMBufferTracker(int fd);
+
+ ~GEMBufferTracker();
+
+ /* Add a GEM BO, to be called after the ioctl
+ * DRM_IOCTL_I915_GEM_CREATE returns with the
+ * kernel modified drm_i915_gem_create value
+ */
+ void
+ add_gem_bo(const struct drm_i915_gem_create &pdata);
+
+ /* Add a GEM BO, to be called after the ioctl
+ * DRM_IOCTL_I915_GEM_USERPTR returns with the
+ * kernel modified drm_i915_gem_userptr value
+ */
+ void
+ add_gem_bo(const struct drm_i915_gem_userptr &pdata);
+
+ /* remove a GEM BO from tracking */
+ void
+ remove_gem_bo(uint32_t h);
+
+ /* Fetch a GEMBufferObject given a GEM handle */
+ GEMBufferObject*
+ fetch_gem_bo(uint32_t h) const;
+
+ /* Add a new HW GEM context for tracking */
+ void
+ add_hw_context(const struct drm_i915_gem_context_create &create);
+
+ /* remove a HW GEM context for tracking */
+ void
+ remove_hw_context(const struct drm_i915_gem_context_destroy &destroy);
+
+ /* fetch a GEM HW context from a handle */
+ i965HWContextData*
+ fetch_hw_context(uint32_t h);
+
+ /* to be called just after the ioctl
+ * DRM_IOCTL_I915_GEM_EXECBUFFER2 or
+ * DRM_IOCTL_I915_GEM_EXECBUFFER2_WR
+ * is issued passing the GEM BO list
+ * modified by the kernel; returns what
+ * GEMBufferObject had the GEM handle
+ * and if the GPU address did get changed
+ */
+ std::pair<bool, GEMBufferObject*>
+ update_gem_bo_gpu_address(const struct drm_i915_gem_exec_object2 *p);
+
+ /* Return what GEM BO and offset into
+ * that GEM BO for a given GPU address.
+ */
+ GPUAddressQuery
+ get_memory_at_gpu_address(uint64_t) const;
+
+ /* Use kernel interface pread to read contents */
+ int
+ pread_buffer(void *dst, uint64_t gpu_address, uint64_t size) const;
+
+ /* Get mapped of a GEM BO given from a GPU Address */
+ template<typename T>
+ const T*
+ cpu_mapped(uint64_t gpu_address, GPUAddressQuery *q = nullptr);
+
+ /* Fetch (or create) a BatchbufferLog given a
+ * GEM handle and an opaque pointer provided by the
+ * driver for a batchbuffer.
+ */
+ BatchbufferLog*
+ fetch_or_create(const void *opaque_bb, uint32_t gem_handle);
+
+ /* Fetch a BatchbufferLog given a GEM handle, if
+ * no BatchbufferLog exists, then return nullptr
+ */
+ BatchbufferLog*
+ fetch(uint32_t gem_handle);
+
+ /* remove a BatchbufferLog from tracking */
+ void
+ remove_batchbuffer_log(const BatchbufferLog *q);
+
+ /* Emit any to BatchbufferLog object a file that remain and also remove
+ * them.
+ */
+ void
+ emit_unemitted_log(BatchbufferLoggerOutput &dst);
+
+private:
+ int m_fd;
+
+ /* GEM BO's keyed by DRM handle */
+ std::map<uint32_t, GEMBufferObject*> m_gem_bos_by_handle;
+
+ /* GEM BO's keyed by the GPU address of the end of the GEM BO*/
+ std::map<uint64_t, GEMBufferObject*> m_gem_bos_by_gpu_address_end;
+
+ /* HW contexts keyed by DRM handle */
+ std::map<uint32_t, i965HWContextData> m_hw_contexts;
+
+ /* dummy HW context for execbuffer calls without hw
+ * context, value is reset each it is fetched
+ */
+ i965HWContextData m_dummy_hw_ctx;
+
+ /* backing storage for the logs, keyed by
+ * batchbuffer DRM handle
+ */
+ std::map<uint32_t, BatchbufferLog> m_logs;
+};
+
+class BatchbufferLogger:
+ public i965_batchbuffer_logger,
+ public i965_batchbuffer_logger_app {
+public:
+ static
+ BatchbufferLogger*
+ acquire(void);
+
+ static
+ void
+ release(void);
+
+ static
+ int
+ local_drm_ioctl(int fd, unsigned long request, void *argp);
+
+ void
+ set_driver_funcs(int pci_id,
+ i965_logged_batchbuffer_state f1,
+ i965_active_batchbuffer f2);
+
+ void
+ pre_process_ioctl(int fd, unsigned long request, void *argp);
+
+ void
+ post_process_ioctl(int ioctl_return_code, int fd, unsigned long request, void *argp);
+
+private:
+ BatchbufferLogger(void);
+ ~BatchbufferLogger();
+
+ GEMBufferTracker*
+ gem_buffer_tracker(int fd);
+
+ /* Returns nullptr if fd is -1 or if the
+ * GEMBufferTracker associated to the fd
+ * does not have a BatchbufferLog of
+ * the given gem_bo
+ */
+ BatchbufferLog*
+ fetch_batchbuffer_log(int fd, uint32_t gem_bo)
+ {
+ /* We do NOT want to create a BatchbufferLog
+ * object, thus we use the call that only fetches
+ * and does not create.
+ */
+ return (fd != -1) ?
+ gem_buffer_tracker(fd)->fetch(gem_bo) :
+ nullptr;
+ }
+
+ /* if fd is -1, then returns the dummy BatchbufferLog,
+ * otherwise fetches_or_crates a BatchbufferLog from
+ * the fields of the passed batchbuffer
+ */
+ BatchbufferLog*
+ fetch_or_create_batchbuffer_log(const struct i965_logged_batchbuffer *batchbuffer)
+ {
+ int fd;
+ fd = (batchbuffer != nullptr) ? batchbuffer->fd : -1;
+ return (fd != -1) ?
+ gem_buffer_tracker(fd)->fetch_or_create(batchbuffer->driver_data, batchbuffer->gem_bo) :
+ &m_dummy;
+ }
+
+ /* Calls m_active_batchbuffer to get the value of
+ * the active batchbuffer and uses that.
+ */
+ BatchbufferLog*
+ fetch_or_create_batchbuffer_log(void)
+ {
+ struct i965_logged_batchbuffer bb;
+ m_active_batchbuffer(&bb);
+ return fetch_or_create_batchbuffer_log(&bb);
+ }
+
+ static
+ void
+ aborted_batchbuffer_fcn(struct i965_batchbuffer_logger*, int fd, uint32_t gem_bo);
+
+ static
+ void
+ release_driver_fcn(struct i965_batchbuffer_logger *pthis);
+
+ static
+ void
+ pre_call_fcn(struct i965_batchbuffer_logger_app *pthis,
+ unsigned int call_id,
+ const char *call_detailed,
+ const char *fcn_name);
+
+ static
+ void
+ post_call_fcn(struct i965_batchbuffer_logger_app *pthis,
+ unsigned int call_id);
+
+ static
+ void
+ end_logging_fcn(struct i965_batchbuffer_logger_app *pthis);
+
+ static
+ void
+ begin_logging_fcn(struct i965_batchbuffer_logger_app *pthis,
+ const char *name);
+
+ static
+ void
+ release_app_fcn(struct i965_batchbuffer_logger_app *pthis);
+
+ static
+ uint32_t
+ default_batchbuffer_state_fcn(const struct i965_logged_batchbuffer *st)
+ {
+ return 0;
+ }
+
+ static
+ void
+ default_active_batchbuffer_fcn(struct i965_logged_batchbuffer *st)
+ {
+ st->fd = -1;
+ st->gem_bo = ~0u;
+ st->driver_data = nullptr;
+ }
+
+ /* derived fron enviromental string */
+ unsigned long m_start_log_call_number, m_end_log_call_number;
+ long m_max_file_size;
+ enum BatchbufferDecoder::decode_level_t m_decode_level;
+ enum BatchbufferDecoder::print_reloc_level_t m_print_reloc_level;
+ uint32_t m_shader_decode_flags;
+
+ /* from driver */
+ i965_logged_batchbuffer_state m_batchbuffer_state;
+ i965_active_batchbuffer m_active_batchbuffer;
+ int m_pci_id;
+
+ /* derived data from m_pci_id */
+ struct gen_device_info m_dev_info;
+ struct gen_spec *m_gen_spec;
+ struct gen_disasm *m_gen_disasm;
+
+ /* GEM buffer tracking, keyed by file descriptor */
+ std::map<int, GEMBufferTracker*> m_gem_buffer_trackers;
+
+ ShaderFileList m_shader_filelist;
+
+ /* thread safety guaranteed by std */
+ std::mutex m_mutex;
+
+ /* special dummy batchbuffer; markers are added
+ * to it if there is no active batchbuffer, the
+ * first time we get an active batchbuffer, the
+ * markers on dummy are given to the
+ * BatchbufferLog associated to it.
+ */
+ BatchbufferLog m_dummy;
+ int m_number_aborted_batchbuffers;
+
+ /* output file. */
+ BatchbufferLoggerOutput m_file;
+};
+
+} //namespace
+
+/////////////////////////////////
+//BatchbufferLoggerOutput methods
+BatchbufferLoggerOutput::
+~BatchbufferLoggerOutput()
+{
+ close();
+}
+
+void
+BatchbufferLoggerOutput::
+open(const char *filename)
+{
+ close();
+ m_file = std::fopen(filename, "w");
+ if (m_file) {
+ m_filename = filename;
+ }
+}
+
+void
+BatchbufferLoggerOutput::
+close(void)
+{
+ if (m_file) {
+ clear_block_stack();
+ std::fclose(m_file);
+ m_file = nullptr;
+ m_filename.clear();
+ }
+}
+
+void
+BatchbufferLoggerOutput::
+begin_block(const char *txt)
+{
+ struct i965_batchbuffer_logger_header hdr;
+
+ ++m_current_block_level;
+ hdr.type = I965_BATCHBUFFER_LOGGER_MESSAGE_BLOCK_BEGIN;
+ hdr.name_length = std::strlen(txt);
+ hdr.value_length = 0;
+
+ std::fwrite(&hdr, sizeof(hdr), 1, m_file);
+ std::fwrite(txt, sizeof(char), hdr.name_length, m_file);
+}
+
+void
+BatchbufferLoggerOutput::
+begin_block_value(const char *txt, const char *fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+ vbegin_block_value(txt, fmt, args);
+ va_end(args);
+}
+
+void
+BatchbufferLoggerOutput::
+vbegin_block_value(const char *txt, const char *fmt, va_list va)
+{
+ ++m_current_block_level;
+ write_name_value(I965_BATCHBUFFER_LOGGER_MESSAGE_BLOCK_BEGIN, txt, fmt,
+ va);
+}
+
+void
+BatchbufferLoggerOutput::
+end_block(void)
+{
+ if (m_current_block_level > 0) {
+ struct i965_batchbuffer_logger_header hdr;
+
+ hdr.type = I965_BATCHBUFFER_LOGGER_MESSAGE_BLOCK_END;
+ hdr.name_length = 0;
+ hdr.value_length = 0;
+ std::fwrite(&hdr, sizeof(hdr), 1, m_file);
+ --m_current_block_level;
+ }
+}
+
+void
+BatchbufferLoggerOutput::
+clear_block_stack(unsigned int desired_depth)
+{
+ while(m_current_block_level > desired_depth) {
+ end_block();
+ }
+}
+
+void
+BatchbufferLoggerOutput::
+print_value(const char *name, const char *fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+ vprint_value(name, fmt, args);
+ va_end(args);
+}
+
+void
+BatchbufferLoggerOutput::
+vprint_value(const char *name, const char *fmt, va_list va)
+{
+ write_name_value(I965_BATCHBUFFER_LOGGER_MESSAGE_VALUE, name, fmt, va);
+}
+
+void
+BatchbufferLoggerOutput::
+write_name_value(enum i965_batchbuffer_logger_message_type_t tp,
+ const char *name, const char *fmt,
+ va_list va)
+{
+ char buffer[4096];
+ struct i965_batchbuffer_logger_header hdr;
+ va_list va_value;
+
+ va_copy(va_value, va);
+ hdr.type = tp;
+ hdr.name_length = std::strlen(name);
+ hdr.value_length = std::vsnprintf(buffer, sizeof(buffer), fmt, va);
+
+ std::fwrite(&hdr, sizeof(hdr), 1, m_file);
+ std::fwrite(name, sizeof(char), hdr.name_length, m_file);
+ if (hdr.value_length > sizeof(buffer)) {
+ std::vector<char> tmp(hdr.value_length);
+ std::vsnprintf(&tmp[0], tmp.size(), fmt, va_value);
+ std::fwrite(&tmp[0], sizeof(char), tmp.size(), m_file);
+ } else {
+ std::fwrite(buffer, sizeof(char), hdr.value_length, m_file);
+ }
+}
+
+template<typename F>
+void
+BatchbufferLoggerOutput::
+functor_print_value(const char *name, F f, bool pre_emit_eol)
+{
+ struct i965_batchbuffer_logger_header hdr;
+ long header_start, value_start, value_end;
+
+ hdr.type = I965_BATCHBUFFER_LOGGER_MESSAGE_VALUE;
+ hdr.name_length = std::strlen(name);
+ hdr.value_length = 0;
+
+ header_start = std::ftell(m_file);
+ std::fseek(m_file, sizeof(hdr), SEEK_CUR);
+ std::fwrite(name, sizeof(char), hdr.name_length, m_file);
+ value_start = std::ftell(m_file);
+
+ if (pre_emit_eol) {
+ fprintf(m_file, "\n");
+ }
+ f(m_file);
+
+ value_end = std::ftell(m_file);
+ hdr.value_length = value_end - value_start;
+ std::fseek(m_file, header_start, SEEK_SET);
+ std::fwrite(&hdr, sizeof(hdr), 1, m_file);
+ std::fseek(m_file, value_end, SEEK_SET);
+}
+
+////////////////////////////////////
+// APIStartCallMarker methods
+void
+APIStartCallMarker::
+print_ioctl_log(const std::list<std::string> &ioctl_log,
+ BatchbufferLoggerOutput &dst)
+{
+ if (dst && !ioctl_log.empty()) {
+ uint32_t ioctl_message_id;
+ std::list<std::string>::const_iterator iter;
+ for(ioctl_message_id = 0, iter = ioctl_log.begin();
+ iter != ioctl_log.end(); ++iter, ++ioctl_message_id) {
+ std::ostringstream name;
+ name << "IOCTL." << ioctl_message_id;
+ dst.print_value(name.str().c_str(), "%s", iter->c_str());
+ }
+ }
+}
+
+bool
+APIStartCallMarker::
+emit(uint32_t next_entry_start_bb_location,
+ BatchbufferLoggerOutput &dst, unsigned int top_level)
+{
+ bool return_value(m_print_element);
+
+ if (m_print_element) {
+ std::ostringstream str;
+
+ str << "Call." << m_call_id << "." << m_api_call;
+ if (next_entry_start_bb_location > m_start_bb_location) {
+ str << ".CreatedGPUCommands";
+ }
+ dst.clear_block_stack(top_level);
+ dst.begin_block(str.str().c_str());
+ dst.print_value("Call Number", "%d", m_call_id);
+ dst.print_value("Function", "%s", m_api_call.c_str());
+ dst.print_value("Details", "%s", m_api_call_details.c_str());
+ print_ioctl_log(m_ioctl_log, dst);
+ m_print_element = false;
+ }
+
+ return return_value;
+}
+
+//////////////////////////////////
+// GEMBufferObject methods
+GEMBufferObject::
+GEMBufferObject(int fd, const struct drm_i915_gem_create &pdata):
+ m_fd(fd),
+ m_handle(pdata.handle),
+ m_size(pdata.size),
+ m_user_ptr(nullptr),
+ m_gpu_address(0)
+{
+ struct drm_i915_gem_mmap map;
+ int ret;
+
+ std::memset(&map, 0, sizeof(map));
+ map.handle = m_handle;
+ map.offset = 0;
+ map.size = m_size;
+
+ ret = BatchbufferLogger::local_drm_ioctl(m_fd, DRM_IOCTL_I915_GEM_MMAP, &map);
+ if (ret != -1) {
+ m_mapped = (void*) map.addr_ptr;
+ } else {
+ m_mapped = nullptr;
+ }
+}
+
+GEMBufferObject::
+GEMBufferObject(int fd, const struct drm_i915_gem_userptr &pdata):
+ m_fd(fd),
+ m_handle(pdata.handle),
+ m_size(pdata.user_size),
+ m_user_ptr((const uint8_t*)pdata.user_ptr),
+ m_mapped((void*)pdata.user_ptr),
+ m_gpu_address(0)
+{
+}
+
+GEMBufferObject::
+~GEMBufferObject()
+{
+ if (m_mapped && m_mapped != m_user_ptr) {
+ munmap(m_mapped, m_size);
+ }
+}
+
+int
+GEMBufferObject::
+pread_buffer(void *dst, uint64_t start, uint64_t sz) const
+{
+ if (start + sz > m_size) {
+ return -1;
+ }
+
+ if (!m_user_ptr) {
+ struct drm_i915_gem_pread pread_args;
+ pread_args.handle = m_handle;
+ pread_args.offset = start;
+ pread_args.size = sz;
+ pread_args.data_ptr = (__u64) dst;
+ return BatchbufferLogger::local_drm_ioctl(m_fd, DRM_IOCTL_I915_GEM_PREAD, &pread_args);
+ } else {
+ std::memcpy(dst, m_user_ptr + start, sz);
+ return 0;
+ }
+}
+
+///////////////////////////////
+// GPUCommandFieldValue methods
+GPUCommandFieldValue::
+GPUCommandFieldValue(const gen_field_iterator &iter):
+ m_gen_type(iter.field->type.kind)
+{
+ /* this code is essentially taken from gen_decode.c's function
+ * gen_field_iterator_next(), but rather than printing the value
+ * to a string (iter.value), we extract the value to this's
+ * fields.
+ */
+ union {
+ uint64_t qw;
+ float f;
+ } v;
+
+ if ((iter.field->end - iter.field->start) > 32) {
+ v.qw = ((uint64_t) iter.p[iter.dword + 1] << 32) | iter.p[iter.dword];
+ }
+ else {
+ v.qw = iter.p[iter.dword];
+ }
+
+ switch (iter.field->type.kind) {
+ case gen_type::GEN_TYPE_INT:
+ m_value.i = field<int64_t>(v.qw, iter.field->start, iter.field->end);
+ break;
+ default:
+ case gen_type::GEN_TYPE_UINT:
+ case gen_type::GEN_TYPE_ENUM:
+ case gen_type::GEN_TYPE_UNKNOWN:
+ m_value.u = field<uint64_t>(v.qw, iter.field->start, iter.field->end);
+ break;
+ case gen_type::GEN_TYPE_BOOL:
+ m_value.b = field<bool>(v.qw, iter.field->start, iter.field->end);
+ break;
+ case gen_type::GEN_TYPE_FLOAT:
+ m_value.f = v.f;
+ break;
+ case gen_type::GEN_TYPE_ADDRESS:
+ case gen_type::GEN_TYPE_OFFSET:
+ m_value.u = field_address(v.qw, iter.field->start, iter.field->end);
+ break;
+ case gen_type::GEN_TYPE_UFIXED:
+ m_value.f = field<float>(v.qw, iter.field->start, iter.field->end) / float(1 << iter.field->type.f);
+ break;
+ case gen_type::GEN_TYPE_SFIXED: {
+ uint64_t uv;
+ bool is_negative;
+ uint64_t leading_bit;
+ uv = field<uint64_t>(v.qw, iter.field->start, iter.field->end);
+ leading_bit = iter.field->end - iter.field->start - 1;
+ is_negative = uv & (uint64_t(1) << leading_bit);
+ m_value.f = static_cast<float>(uv) / float(1 << iter.field->type.f);
+ if (is_negative) {
+ m_value.f = -m_value.f;
+ }
+ break;
+ }
+ }
+}
+
+template<typename T>
+T
+GPUCommandFieldValue::
+value(void) const
+{
+ switch(m_gen_type) {
+ case gen_type::GEN_TYPE_INT:
+ return static_cast<T>(m_value.i);
+
+ case gen_type::GEN_TYPE_BOOL:
+ return static_cast<T>(m_value.b);
+
+ case gen_type::GEN_TYPE_FLOAT:
+ case gen_type::GEN_TYPE_UFIXED:
+ case gen_type::GEN_TYPE_SFIXED:
+ return static_cast<T>(m_value.f);
+
+ case gen_type::GEN_TYPE_UINT:
+ case gen_type::GEN_TYPE_ENUM:
+ case gen_type::GEN_TYPE_UNKNOWN:
+ case gen_type::GEN_TYPE_ADDRESS:
+ case gen_type::GEN_TYPE_OFFSET:
+ default:
+ return static_cast<T>(m_value.u);
+ }
+}
+
+/////////////////////////////
+// GPUCommand methods
+GPUCommand::
+GPUCommand(void):
+ m_gem_bo(nullptr),
+ m_gem_bo_offset(-1),
+ m_inst(nullptr),
+ m_contents(nullptr),
+ m_dword_length(0),
+ m_command_type(gpu_command_show_value_without_gpu_state)
+{}
+
+GPUCommand::
+GPUCommand(const GEMBufferObject *q, uint64_t dword_offset, struct gen_spec *spec, struct gen_group *grp):
+ m_gem_bo(q),
+ m_gem_bo_offset(dword_offset * sizeof(uint32_t)),
+ m_dword_length(0),
+ m_command_type(gpu_command_show_value_without_gpu_state),
+ m_pipeline_type(gpu_pipeline_gfx)
+{
+ complete_init(dword_offset, spec, grp);
+}
+
+GPUCommand::
+GPUCommand(const GPUAddressQuery &q, struct gen_spec *spec, struct gen_group *grp):
+ m_gem_bo(q.m_gem_bo),
+ m_gem_bo_offset(q.m_offset_into_gem_bo),
+ m_dword_length(0),
+ m_command_type(gpu_command_show_value_without_gpu_state),
+ m_pipeline_type(gpu_pipeline_gfx)
+{
+ complete_init(m_gem_bo_offset / sizeof(uint32_t), spec, grp);
+}
+
+void
+GPUCommand::
+complete_init(uint32_t dword_offset, struct gen_spec *spec, struct gen_group *grp)
+{
+ int length;
+
+ assert(sizeof(uint32_t) * dword_offset == m_gem_bo_offset);
+
+ m_contents = m_gem_bo->cpu_mapped<uint32_t>() + dword_offset;
+ if(spec && !grp) {
+ m_inst = gen_spec_find_instruction(spec, m_contents);
+ } else {
+ m_inst = grp;
+ }
+
+ if (m_inst) {
+ length = gen_group_get_length(m_inst, m_contents);
+ m_command_type = get_gpu_command_type(m_inst);
+ m_pipeline_type = get_gpu_pipeline_type(m_inst);
+
+ if (length > 0) {
+ m_dword_length = length;
+ }
+ }
+}
+
+template<typename T>
+bool
+GPUCommand::
+extract_field_value(const char *pname, T *dst) const
+{
+ struct gen_field_iterator iter;
+
+ gen_field_iterator_init(&iter, inst(), contents_ptr(), false);
+ while (gen_field_iterator_next(&iter)) {
+ if (!is_header_field(inst(), iter.field) &&
+ 0 == strcmp(pname, iter.name)) {
+ GPUCommandFieldValue value(iter);
+
+ assert(!m_archived_data.empty() ||
+ value.type() == gen_type::GEN_TYPE_ADDRESS);
+ *dst = value.value<T>();
+ return true;
+ }
+ }
+
+ return false;
+}
+
+enum GPUCommand::gpu_command_type_t
+GPUCommand::
+get_gpu_command_type(struct gen_group *inst)
+{
+ uint32_t op_code;
+ op_code = gen_group_get_opcode(inst);
+ switch (op_code) {
+ case _MI_LOAD_REGISTER_MEM: //load a register value from a GEM BO
+ case _MI_LOAD_REGISTER_IMM: //load a register value from batchbuffer
+ case _MI_LOAD_REGISTER_REG: //load a register value from another register
+ return gpu_command_set_register;
+
+ case STATE_BASE_ADDRESS:
+ /* because STATE_BASE_ADDRESS has option to set or not set values,
+ * it is not pure state and thus should be printed on encounter
+ */
+ case _3DSTATE_VF_INSTANCING:
+ /* _3DSTATE_VF_INSTANCING sets if a named vertex attribute is
+ * instanced
+ */
+ case _MI_NOOP:
+ case _MI_BATCH_BUFFER_START:
+ case _MI_BATCH_BUFFER_END:
+ case _MI_STORE_REGISTER_MEM: //writes a register value to a GEM BO
+ case _MI_PREDICATE: //modify predicate value
+ case _MI_ARB_CHECK:
+ case _MI_ATOMIC:
+ case _MI_CLFLUSH:
+ case _MI_CONDITIONAL_BATCH_BUFFER_END:
+ case _MI_COPY_MEM_MEM:
+ case _MI_DISPLAY_FLIP:
+ case _MI_FORCE_WAKEUP:
+ case _MI_LOAD_SCAN_LINES_EXCL:
+ case _MI_LOAD_SCAN_LINES_INCL:
+ case _MI_MATH:
+ case _MI_REPORT_HEAD:
+ case _MI_REPORT_PERF_COUNT:
+ case _MI_RS_CONTEXT:
+ case _MI_RS_CONTROL:
+ case _MI_RS_STORE_DATA_IMM:
+ case _MI_SEMAPHORE_SIGNAL:
+ case _MI_SEMAPHORE_WAIT:
+ case _MI_SET_CONTEXT:
+ case _MI_SET_PREDICATE:
+ case _MI_STORE_DATA_IMM:
+ case _MI_STORE_DATA_INDEX:
+ case _MI_SUSPEND_FLUSH:
+ case _MI_UPDATE_GTT:
+ case _MI_USER_INTERRUPT:
+ case _MI_WAIT_FOR_EVENT:
+ case _3DSTATE_PIPE_CONTROL: //3d pipeline flushing
+ case MEDIA_STATE_FLUSH: //compute/media pipeline flushing
+ case _3DSTATE_PIPELINE_SELECT:
+ case _3DSTATE_PIPELINE_SELECT_GM45:
+ return gpu_command_show_value_without_gpu_state;
+
+ case _3DPRIMITIVE:
+ case _GPGPU_WALKER:
+ return gpu_command_show_value_with_gpu_state;
+
+ default:
+ /* TODO: go through state values and correctly tag
+ * what state is part of HW context and what is not.
+ */
+ return gpu_command_save_value_as_state_hw_context;
+ }
+}
+
+enum GPUCommand::gpu_pipeline_type_t
+GPUCommand::
+get_gpu_pipeline_type(struct gen_group *inst)
+{
+ uint32_t op_code;
+ op_code = gen_group_get_opcode(inst);
+ switch (op_code) {
+ case _GPGPU_WALKER:
+ case MEDIA_INTERFACE_DESCRIPTOR_LOAD:
+ case MEDIA_VFE_STATE:
+ case MEDIA_CURBE_LOAD:
+ return gpu_pipeline_compute;
+ default:
+ return gpu_pipeline_gfx;
+ };
+}
+
+uint64_t
+GPUCommand::
+get_gpu_address(const BatchRelocs &relocs,
+ uint64_t dword_offset_from_cmd_start,
+ bool ignore_lower_12_bits) const
+{
+ const uint32_t *p;
+ const GEMBufferObject *gem;
+ uint64_t dword_offset_from_gem_start;
+
+ p = contents_ptr() + dword_offset_from_cmd_start;
+
+ /* recycle the logic/work in BatchRelocs::get_gpu_address(),
+ * for reading a GPU address from memory, but set the
+ * passed GEM BO and offset to a value that should never
+ * be in the reloc data.
+ */
+ gem = (m_archived_data.empty()) ? gem_bo() : nullptr;
+ dword_offset_from_gem_start = (gem) ?
+ dword_offset_from_cmd_start + dword_offset() :
+ ~uint64_t(0);
+
+ return relocs.get_gpu_address(gem, dword_offset_from_gem_start,
+ p, ignore_lower_12_bits);
+}
+
+void
+GPUCommand::
+archive_data(const BatchRelocs &relocs)
+{
+ assert(!is_archived());
+ if (m_dword_length > 0) {
+ m_archived_data.resize(m_dword_length);
+ std::copy(m_contents, m_contents + m_dword_length,
+ m_archived_data.begin());
+ relocs.place_relocation_values_into_buffer(m_gem_bo, m_gem_bo_offset,
+ &m_archived_data);
+ m_contents = &m_archived_data[0];
+ }
+}
+
+//////////////////////////////////////////
+// i965LatchState methods
+i965LatchState::
+i965LatchState(void):
+ m_general_state_base_address(0),
+ m_surface_state_base_address(0),
+ m_dynamic_state_base_address(0),
+ m_instruction_base_address(0),
+ m_VIEWPORT_count(-1)
+{}
+
+void
+i965LatchState::
+update_state(BatchbufferDecoder *decoder, BatchbufferLoggerOutput &pfile,
+ const GPUCommand &cmd)
+{
+ GPUCommand::state_key op_code;
+ const GPUCommand *p(&cmd);
+ GPUCommand archived;
+
+ if (!cmd.is_archived()) {
+ archived = cmd;
+ archived.archive_data(decoder->relocs());
+ p = &archived;
+ }
+
+ const GPUCommand &q(*p);
+
+ op_code = gen_group_get_opcode(q.inst());
+ switch(op_code) {
+ case _3DSTATE_VS:
+ update_stage_values(decoder, pfile, q, &m_VS);
+ break;
+ case _3DSTATE_HS:
+ update_stage_values(decoder, pfile, q, &m_HS);
+ break;
+ case _3DSTATE_DS:
+ update_stage_values(decoder, pfile, q, &m_DS);
+ break;
+ case _3DSTATE_GS:
+ update_stage_values(decoder, pfile, q, &m_GS);
+ break;
+ case _3DSTATE_PS:
+ update_stage_values(decoder, pfile, q, &m_PS);
+ break;
+ case STATE_BASE_ADDRESS:
+ update_state_base_address(decoder, pfile, q);
+ break;
+ case _3D_STATE_CLIP: {
+ /* TODO: for GEN5 and before, the maximum number of
+ * viewports in in _3D_STATE_GS
+ */
+ int v;
+ if (q.extract_field_value<int>("Maximum VP Index", &v)) {
+ m_VIEWPORT_count = v + 1;
+ }
+ break;
+ }
+ }
+}
+
+void
+i965LatchState::
+update_stage_values(BatchbufferDecoder *decoder, BatchbufferLoggerOutput &pfile,
+ const GPUCommand &q, per_stage_values *dst)
+{
+ int tmp;
+ if (q.extract_field_value<int>("Sampler Count", &tmp)) {
+ /* 3D_STATE_XS holds the number of sampler divided by 4;
+ * the awful consequence is that then we only know the
+ * number of sampler states to a multiple of 4.
+ */
+ dst->m_sampler_count = 4 * tmp;
+ }
+
+ if (q.extract_field_value<int>("Binding Table Entry Count", &tmp)) {
+ dst->m_binding_table_count = tmp;
+ }
+}
+
+void
+i965LatchState::
+update_state_base_address_helper(const GPUCommand &q,
+ const char *value_enabled_name,
+ uint64_t *dst, const char *value_name)
+{
+ bool enabled(false);
+ uint64_t v;
+
+ q.extract_field_value<bool>(value_enabled_name, &enabled);
+ if (enabled && q.extract_field_value<uint64_t>(value_name, &v)) {
+ *dst = v & ~uint64_t(0xFFFu);
+ }
+}
+
+void
+i965LatchState::
+update_state_base_address(BatchbufferDecoder *decoder, BatchbufferLoggerOutput &pfile,
+ const GPUCommand &q)
+{
+ assert(q.is_archived());
+ update_state_base_address_helper(q,
+ "General State Base Address Modify Enable",
+ &m_general_state_base_address,
+ "General State Base Address");
+
+ update_state_base_address_helper(q,
+ "Surface State Base Address Modify Enable",
+ &m_surface_state_base_address,
+ "Surface State Base Address");
+
+ update_state_base_address_helper(q,
+ "Dynamic State Base Address Modify Enable",
+ &m_dynamic_state_base_address,
+ "Dynamic State Base Address");
+
+ update_state_base_address_helper(q,
+ "Instruction Base Address Modify Enable",
+ &m_instruction_base_address,
+ "Instruction Base Address");
+}
+
+///////////////////////////////////////////
+// i965Registers methods
+void
+i965Registers::
+update_state(BatchbufferDecoder *decoder, BatchbufferLoggerOutput &pfile,
+ const GPUCommand &q)
+{
+ GPUCommand::state_key op_code;
+
+ op_code = gen_group_get_opcode(q.inst());
+ switch (op_code) {
+ case _MI_LOAD_REGISTER_MEM: {
+ /* An MEM register means load the register from a GEM BO.
+ * We need to get the register value from the GEM BO
+ * that has the value. DANGER: We are reading the value
+ * from the GEM after the ioctl returns. If the GEM BO
+ * was written to later in the batchbuffer, then our read
+ * here will be the value it was after everything was done,
+ * not when it was used.
+ *
+ * Should we instead record the location and offset of the
+ * value instead?
+ */
+ uint32_t register_offset, register_value;
+ uint64_t gpu_address;
+
+ register_offset = q[1];
+ gpu_address = q.get_gpu_address(decoder->relocs(), 2, false);
+ register_value = decoder->tracker().pread_buffer(®ister_value,
+ gpu_address,
+ sizeof(uint32_t));
+ m_register_values[register_offset] = register_value;
+ break;
+ }
+
+ case _MI_LOAD_REGISTER_IMM: {
+ /* An IMM load has the value for the register stored directly
+ * in the batchbuffer, this command can set multiple registers
+ */
+ for (unsigned int i = 1, endi = q.contents_size(); i < endi; i += 2) {
+ uint32_t register_offset, register_value;
+
+ register_offset = q[i];
+ register_value = q[i + 1];
+ m_register_values[register_offset] = register_value;
+ }
+ break;
+ }
+
+ case _MI_LOAD_REGISTER_REG: {
+ /* command means to copy one register to another */
+ uint32_t register_src_offset, register_dst_offset;
+ register_src_offset = q[1];
+ register_dst_offset = q[2];
+ m_register_values[register_dst_offset] = m_register_values[register_src_offset];
+ break;
+ }
+ }
+}
+
+void
+i965Registers::
+decode_contents(BatchbufferDecoder *decoder,
+ enum GPUCommand::gpu_pipeline_type_t pipeline,
+ BatchbufferLoggerOutput &pfile)
+{
+ /* TODO: classify registers as to what part pipeline(s)
+ * they influence
+ */
+ (void)pipeline;
+
+ pfile.begin_block("Register Values");
+ for(const auto v : m_register_values) {
+ struct gen_group *reg;
+ reg = gen_spec_find_register(decoder->spec(), v.first);
+
+ if (reg) {
+ pfile.begin_block_value("Register", "%s", reg->name);
+ pfile.print_value("ID", "(0x%x)", v.first);
+ pfile.print_value("value", "0x%x", v.second);
+ } else {
+ pfile.begin_block_value("Unknown register", "(0x%x)", v.first);
+ pfile.print_value("ID", "(0x%x)", v.first);
+ pfile.print_value("value", "0x%x", v.second);
+ }
+ pfile.end_block();
+ }
+ pfile.end_block();
+}
+
+///////////////////////////////////////////////
+// i965HWContextData methods
+i965HWContextData::
+i965HWContextData(uint32_t ctx_id):
+ m_ctx_id(ctx_id)
+{
+}
+
+i965HWContextData::
+~i965HWContextData()
+{
+}
+
+void
+i965HWContextData::
+update_state(BatchbufferDecoder *decoder, BatchbufferLoggerOutput &pfile,
+ const GPUCommand &q)
+{
+ enum GPUCommand::gpu_command_type_t tp;
+ const GPUCommand *pq(&q);
+
+ tp = q.gpu_command_type();
+
+ switch (tp) {
+ case GPUCommand::gpu_command_save_value_as_state_hw_context: {
+ uint32_t op_code;
+ op_code = gen_group_get_opcode(q.inst());
+
+ GPUCommand &dst(m_state[op_code]);
+ dst = q;
+ dst.archive_data(decoder->relocs());
+ pq = &dst;
+ break;
+ }
+
+ case GPUCommand::gpu_command_set_register: {
+ /* TODO: not all registers are part of context state; some
+ * are global to the entire GPU. Eventually need to adress
+ * that issue.
+ */
+ m_registers.update_state(decoder, pfile, q);
+ break;
+ }
+
+ default:
+ /* TODO: should we track the values set by _3DSTATE_VF_INSTANCING? */
+ break;
+ }
+ m_latch_state.update_state(decoder, pfile, *pq);
+}
+
+void
+i965HWContextData::
+decode_contents(BatchbufferDecoder *decoder,
+ enum GPUCommand::gpu_pipeline_type_t pipeline,
+ BatchbufferLoggerOutput &pfile)
+{
+ pfile.begin_block("State of Context");
+ for(const auto entry : m_state) {
+ if (entry.second.gpu_pipeline_type() == pipeline) {
+ decoder->decode_gpu_command(pfile, entry.second);
+ }
+ }
+ m_registers.decode_contents(decoder, pipeline, pfile);
+ pfile.end_block();
+}
+
+//////////////////////////////////////
+// GPUState methods
+void
+GPUState::
+update_state(BatchbufferDecoder *decoder, BatchbufferLoggerOutput &pfile,
+ const GPUCommand &q)
+{
+ if (q.gpu_command_type() ==
+ GPUCommand::gpu_command_save_value_as_state_not_hw_context) {
+ GPUCommand::state_key op_code;
+ op_code = gen_group_get_opcode(q.inst());
+
+ GPUCommand &dst(m_state[op_code]);
+ dst = q;
+ dst.archive_data(decoder->relocs());
+ } else {
+ m_ctx_data->update_state(decoder, pfile, q);
+ }
+}
+
+void
+GPUState::
+decode_contents(BatchbufferDecoder *decoder,
+ enum GPUCommand::gpu_pipeline_type_t pipeline,
+ BatchbufferLoggerOutput &pfile)
+{
+ m_ctx_data->decode_contents(decoder, pipeline, pfile);
+ if (!m_state.empty()) {
+ pfile.begin_block("State of GPU, not of Context");
+ for(const auto entry : m_state) {
+ if (entry.second.gpu_pipeline_type() == pipeline) {
+ decoder->decode_gpu_command(pfile, entry.second);
+ }
+ }
+ pfile.end_block();
+ }
+}
+
+///////////////////////////////////////////////
+// BatchbufferDecoder::DetailedDecoder methods
+BatchbufferDecoder::DetailedDecoder::
+DetailedDecoder(void)
+{
+ m_elements[MEDIA_INTERFACE_DESCRIPTOR_LOAD] =
+ &BatchbufferDecoder::decode_media_interface_descriptor_load;
+ m_elements[_3DSTATE_VS] = &BatchbufferDecoder::decode_3dstate_xs;
+ m_elements[_3DSTATE_GS] = &BatchbufferDecoder::decode_3dstate_xs;
+ m_elements[_3DSTATE_DS] = &BatchbufferDecoder::decode_3dstate_xs;
+ m_elements[_3DSTATE_HS] = &BatchbufferDecoder::decode_3dstate_xs;
+ m_elements[_3DSTATE_PS] = &BatchbufferDecoder::decode_3dstate_ps;
+
+ m_elements[_3DSTATE_BINDING_TABLE_POINTERS_VS] =
+ &BatchbufferDecoder::decode_3dstate_binding_table_pointers_vs;
+ m_elements[_3DSTATE_BINDING_TABLE_POINTERS_HS] =
+ &BatchbufferDecoder::decode_3dstate_binding_table_pointers_hs;
+ m_elements[_3DSTATE_BINDING_TABLE_POINTERS_DS] =
+ &BatchbufferDecoder::decode_3dstate_binding_table_pointers_ds;
+ m_elements[_3DSTATE_BINDING_TABLE_POINTERS_GS] =
+ &BatchbufferDecoder::decode_3dstate_binding_table_pointers_gs;
+ m_elements[_3DSTATE_BINDING_TABLE_POINTERS_PS] =
+ &BatchbufferDecoder::decode_3dstate_binding_table_pointers_ps;
+
+ m_elements[_3DSTATE_SAMPLER_STATE_POINTERS_VS] =
+ &BatchbufferDecoder::decode_3dstate_sampler_state_pointers_vs;
+ m_elements[_3DSTATE_SAMPLER_STATE_POINTERS_DS] =
+ &BatchbufferDecoder::decode_3dstate_sampler_state_pointers_hs;
+ m_elements[_3DSTATE_SAMPLER_STATE_POINTERS_HS] =
+ &BatchbufferDecoder::decode_3dstate_sampler_state_pointers_ds;
+ m_elements[_3DSTATE_SAMPLER_STATE_POINTERS_GS] =
+ &BatchbufferDecoder::decode_3dstate_sampler_state_pointers_gs;
+ m_elements[_3DSTATE_SAMPLER_STATE_POINTERS_PS] =
+ &BatchbufferDecoder::decode_3dstate_sampler_state_pointers_ps;
+ m_elements[_3DSTATE_SAMPLER_STATE_POINTERS] =
+ &BatchbufferDecoder::decode_3dstate_sampler_state_pointers_gen6;
+
+ m_elements[_3DSTATE_VIEWPORT_STATE_POINTERS_CC] =
+ &BatchbufferDecoder::decode_3dstate_viewport_state_pointers_cc;
+ m_elements[_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP] =
+ &BatchbufferDecoder::decode_3dstate_viewport_state_pointers_sf_clip;
+ m_elements[_3DSTATE_BLEND_STATE_POINTERS] =
+ &BatchbufferDecoder::decode_3dstate_blend_state_pointers;
+ m_elements[_3DSTATE_CC_STATE_POINTERS] =
+ &BatchbufferDecoder::decode_3dstate_cc_state_pointers;
+ m_elements[_3DSTATE_SCISSOR_STATE_POINTERS] =
+ &BatchbufferDecoder::decode_3dstate_scissor_state_pointers;
+}
+
+void
+BatchbufferDecoder::DetailedDecoder::
+decode(BatchbufferDecoder *decoder, BatchbufferLoggerOutput &pfile,
+ const GPUCommand &data)
+{
+ static DetailedDecoder R;
+ std::map<uint32_t, fcn>::const_iterator iter;
+ uint32_t opcode;
+
+ opcode = gen_group_get_opcode(data.inst());
+ iter = R.m_elements.find(opcode);
+ if (iter != R.m_elements.end()) {
+ fcn function(iter->second);
+ (decoder->*function)(pfile, data);
+ }
+}
+
+//////////////////////////////////////////////
+// BatchRelocs methods
+void
+BatchRelocs::
+emit_reloc_data(BatchbufferLoggerOutput &pfile)
+{
+ pfile.begin_block("Relocs");
+ for(const auto &v : m_relocs) {
+
+ if(v.second.empty()) {
+ continue;
+ }
+
+ pfile.begin_block("Relocs on GEM");
+ pfile.print_value("GEM BO", "%u", v.first->handle());
+ for(const auto &w : v.second) {
+ pfile.begin_block("Reloc Entry");
+ pfile.print_value("Offset", "%0x012" PRIx64, w.first);
+ pfile.print_value("GPU Address", "%0x012" PRIx64, w.second);
+ pfile.end_block();
+ }
+ pfile.end_block();
+ }
+ pfile.end_block();
+}
+
+void
+BatchRelocs::
+place_relocation_values_into_buffer(const GEMBufferObject *gem, uint64_t gem_bo_offset,
+ std::vector<uint32_t> *dst) const
+{
+ reloc_map::const_iterator gem_iter;
+ reloc_map_of_gem_bo::const_iterator reloc_iter;
+ unsigned int dst_end;
+
+ gem_iter = m_relocs.find(gem);
+
+ if (gem_iter == m_relocs.end()) {
+ return;
+ }
+
+ dst_end = sizeof(uint32_t) * dst->size() + gem_bo_offset;
+
+ for(reloc_iter = gem_iter->second.lower_bound(gem_bo_offset);
+ reloc_iter != gem_iter->second.end() && reloc_iter->first < dst_end;
+ ++reloc_iter)
+ {
+ unsigned int s;
+ uint64_t addr;
+
+ addr = reloc_iter->second;
+
+ assert(reloc_iter->first >= gem_bo_offset);
+ s = reloc_iter->first - gem_bo_offset;
+
+ /* Recall that the locations in BatchRelocs are copied
+ * directly from the kernel and are in units of bytes,
+ * NOT DWORD's.
+ */
+ assert(s % sizeof(uint32_t) == 0);
+ s /= sizeof(uint32_t);
+ assert(s < dst->size());
+
+ (*dst)[s] = addr & 0xFFFFFFFF;
+ if (!m_32bit_gpu_addresses) {
+ assert(s + 1 < dst->size());
+ /* preserve the high 16 bits since the address
+ * is 48-bits wide and there may be additional
+ * data stashed in those highest 16-bits.
+ */
+ (*dst)[s + 1] &= 0xFFFF0000u;
+ (*dst)[s + 1] |= (addr >> 32u) & 0x0000FFFFu;
+ }
+ }
+}
+
+uint64_t
+BatchRelocs::
+get_gpu_address(const GEMBufferObject *q, uint64_t dword_offset,
+ const uint32_t *p, bool ignore_lower_12_bits) const
+{
+ reloc_map::const_iterator gem_iter;
+
+ uint64_t addr = p[0];
+ if (!m_32bit_gpu_addresses) {
+ /* On BDW and above, the address is 48-bits wide, consuming
+ * an aditional _32_ bits. Grab the next 16-bits of the
+ * address
+ */
+ addr |= uint64_t(p[1] & 0xFFFF) << uint64_t(32);
+ }
+
+ gem_iter = m_relocs.find(q);
+ if (gem_iter != m_relocs.end()) {
+ reloc_map_of_gem_bo::const_iterator reloc_iter;
+ reloc_iter = gem_iter->second.find(sizeof(uint32_t) * dword_offset);
+ if (reloc_iter != gem_iter->second.end()) {
+ addr = reloc_iter->second;
+ }
+ }
+
+ /* Address are to be page aligned (i.e. last 12 bits are zero),
+ * but HW commands might stash extra data in those 12-bits,
+ * zero those bits out.
+ */
+ return ignore_lower_12_bits ?
+ addr & ~uint64_t(0xFFFu) :
+ addr;
+}
+
+///////////////////////////////////////////////
+// BatchbufferDecoder methods
+BatchbufferDecoder::
+BatchbufferDecoder(enum decode_level_t decode_level,
+ enum print_reloc_level_t print_reloc_level,
+ uint32_t shader_decode_flags,
+ struct gen_spec *spec, struct gen_disasm *dis,
+ int pciid, GEMBufferTracker *tracker,
+ ShaderFileList *shader_filelist,
+ struct drm_i915_gem_execbuffer2 *execbuffer2):
+ m_decode_level(decode_level),
+ m_print_reloc_level(print_reloc_level),
+ m_shader_decode_flags(shader_decode_flags),
+ m_spec(spec),
+ m_gen_disasm(dis),
+ m_pci_id(pciid),
+ m_tracker(tracker),
+ m_shader_filelist(shader_filelist),
+ m_buffers(execbuffer2->buffer_count),
+ m_reloc_handles_are_indices(execbuffer2->flags & I915_EXEC_HANDLE_LUT),
+ m_gpu_state(m_tracker->fetch_hw_context(execbuffer2->rsvd1)),
+ m_relocs(spec),
+ m_execbuffer2(execbuffer2)
+{
+ struct drm_i915_gem_exec_object2 *exec_objects;
+
+ exec_objects = (struct drm_i915_gem_exec_object2 *) (uintptr_t) execbuffer2->buffers_ptr;
+ for(unsigned int i = 0; i < execbuffer2->buffer_count; ++i) {
+ m_buffers[i] = m_tracker->fetch_gem_bo(exec_objects[i].handle);
+ }
+
+ if (execbuffer2->flags & I915_EXEC_BATCH_FIRST) {
+ m_batchbuffer = m_buffers.front();
+ } else {
+ m_batchbuffer = m_buffers.back();
+ }
+
+ m_batchbuffer_log = m_tracker->fetch_or_create(nullptr, m_batchbuffer->handle());
+ assert(m_batchbuffer_log);
+ for(unsigned int i = 0; i < execbuffer2->buffer_count; ++i) {
+ std::pair<bool, GEMBufferObject*> q;
+ std::ostringstream pstr;
+
+ q = tracker->update_gem_bo_gpu_address(&exec_objects[i]);
+ if (!q.second) {
+ continue;
+ }
+
+ /* Bah humbug; the kernel interface does not state that
+ * the address values in a batchbuffer will get updated;
+ * The upshot is that we then need to examine the reloc
+ * data of the ioctl call.
+ */
+ struct drm_i915_gem_relocation_entry *reloc_entries;
+
+ reloc_entries = (struct drm_i915_gem_relocation_entry*) exec_objects[i].relocs_ptr;
+ for (unsigned int r = 0; r < exec_objects[i].relocation_count; ++r) {
+ uint32_t gem_bo_handle;
+ GEMBufferObject *bo;
+ uint64_t gpu_address;
+
+ gem_bo_handle = reloc_entries[r].target_handle;
+ if (execbuffer2->flags & I915_EXEC_HANDLE_LUT) {
+ gem_bo_handle = exec_objects[gem_bo_handle].handle;
+ }
+
+ bo = m_tracker->fetch_gem_bo(gem_bo_handle);
+ if (!bo) {
+ continue;
+ }
+
+ gpu_address = bo->gpu_address_begin() + reloc_entries[r].delta;
+ /* When reading from BO q an address at offset, we will
+ * read the gpu_address below.
+ */
+ m_relocs.add_entry(q.second, reloc_entries[r].offset, gpu_address);
+ }
+ }
+}
+
+void
+BatchbufferDecoder::
+decode_shader(BatchbufferLoggerOutput &pfile, enum shader_decode_entry_t tp,
+ uint64_t gpu_address)
+{
+ const void *shader;
+ GPUAddressQuery query;
+ static const char *labels[shader_decode_entry_count] = {
+ [shader_decode_vs] = "Vertex Shader",
+ [shader_decode_hs] = "Hull (tessellation control) Shader",
+ [shader_decode_ds] = "Domain (tessellation evalulation) Shader",
+ [shader_decode_gs] = "Geometry Shader",
+ [shader_decode_ps_8] = "8-Pixel Shader",
+ [shader_decode_ps_16] = "16-Pixel Shader",
+ [shader_decode_ps_32] = "32-Pixel Shader",
+ [shader_decode_media_compute] = "Media/Compute Shader",
+ };
+
+ pfile.begin_block(labels[tp]);
+
+ shader = m_tracker->cpu_mapped<void>(gpu_address, &query);
+ pfile.print_value("GPU Address", "0x%012" PRIx64, gpu_address);
+ if (shader && query.m_gem_bo) {
+ if (m_shader_decode_flags & (1u << tp)) {
+ pfile.functor_print_value("Assembly",
+ std::bind(gen_disasm_disassemble,
+ m_gen_disasm,
+ shader, 0,
+ std::placeholders::_1),
+ true);
+ } else {
+ const char *filename;
+ filename = m_shader_filelist->filename(pfile.filename(), shader,
+ m_pci_id, m_gen_disasm);
+ if (filename) {
+ pfile.print_value("ShaderFile", "%s", filename);
+ }
+ }
+ } else {
+ pfile.print_value("GPU Address", "0x%012 (BAD)" PRIx64, gpu_address);
+ }
+
+ pfile.end_block();
+}
+
+void
+BatchbufferDecoder::
+emit_log(BatchbufferLoggerOutput &pfile)
+{
+ assert(m_batchbuffer_log);
+
+ bool print_ioctl;
+
+ print_ioctl = m_batchbuffer_log->emit_log(this, pfile, m_execbuffer2->batch_len / 4);
+
+ /* Only print the drmIoctl command details if
+ * the last APIStartCallMarker was printable
+ */
+ if (print_ioctl && pfile) {
+ pfile.begin_block("drmIoctl(execbuffer2)");
+ pfile.print_value("length", "%d bytes", m_execbuffer2->batch_len);
+ pfile.print_value("length", "%d dwords", m_execbuffer2->batch_len / 4);
+ pfile.print_value("start", "%d", m_execbuffer2->batch_start_offset);
+ pfile.print_value("fd", "%d", m_batchbuffer_log->src()->fd);
+ pfile.print_value("GEM BO", "%u", m_batchbuffer_log->src()->gem_bo);
+
+ if (m_print_reloc_level >= print_reloc_gem_gpu_updates) {
+ m_relocs.emit_reloc_data(pfile);
+ }
+ pfile.end_block();
+ }
+}
+
+
+void
+BatchbufferDecoder::
+decode_media_interface_descriptor_load(BatchbufferLoggerOutput &pfile, const GPUCommand &data)
+{
+ struct gen_group *grp;
+ uint64_t gpu_address;
+
+ grp = gen_spec_find_struct(m_spec, "INTERFACE_DESCRIPTOR_DATA");
+ if (!grp) {
+ return;
+ }
+
+ gpu_address = m_gpu_state.ctx().m_latch_state.m_dynamic_state_base_address + data[3];
+ for(int i = 0, length = data[2] / 32; i < length; ++i,
+ gpu_address += 8 * sizeof(uint32_t)) {
+ GPUAddressQuery address_query(m_tracker->get_memory_at_gpu_address(gpu_address));
+ GPUCommand descriptor(address_query, m_spec, grp);
+ uint64_t shader_gpu_address;
+ int tmp, binding_table_count, sampler_count;
+
+ pfile.begin_block_value("Descriptor", "#%d", i);
+ pfile.print_value("GPU Address", "%012" PRIx64, gpu_address);
+ decode_gen_group(pfile, descriptor.gem_bo(), descriptor.dword_offset(),
+ descriptor.contents_ptr(), descriptor.inst());
+
+ shader_gpu_address = m_gpu_state.ctx().m_latch_state.m_instruction_base_address + descriptor[0];
+
+ /* ISSUE: When decoding from UFO, we get crashes on Media/Compute
+ * shader decode from within gen_disasm_disassemble().
+ */
+ decode_shader(pfile, shader_decode_media_compute, shader_gpu_address);
+
+ sampler_count = -1;
+ if (descriptor.extract_field_value<int>("Sampler Count", &tmp)) {
+ sampler_count = 4 * tmp;
+ }
+
+ binding_table_count = -1;
+ if (descriptor.extract_field_value<int>("Binding Table Entry Count", &tmp)) {
+ binding_table_count = tmp;
+ }
+
+ decode_3dstate_sampler_state_pointers_helper(pfile,
+ descriptor[3] & ~0x1f,
+ sampler_count);
+ decode_3dstate_binding_table_pointers(pfile, "MEDIA",
+ descriptor[4] & ~0x1f,
+ binding_table_count);
+
+ pfile.end_block();
+ }
+}
+
+void
+BatchbufferDecoder::
+decode_3dstate_xs(BatchbufferLoggerOutput &pfile, const GPUCommand &data)
+{
+ bool has_shader(false);
+ uint64_t offset(0), gpu_address;
+ uint32_t opcode;
+ enum shader_decode_entry_t shader_tp;
+
+ data.extract_field_value<bool>("Enable", &has_shader);
+ has_shader = has_shader
+ && data.extract_field_value<uint64_t>("Kernel Start Pointer", &offset);
+
+ if(!has_shader) {
+ return;
+ }
+
+ opcode = gen_group_get_opcode(data.inst());
+ switch(opcode) {
+ default:
+ case _3DSTATE_VS:
+ shader_tp = shader_decode_vs;
+ break;
+ case _3DSTATE_HS:
+ shader_tp = shader_decode_hs;
+ break;
+ case _3DSTATE_DS:
+ shader_tp = shader_decode_ds;
+ break;
+ case _3DSTATE_GS:
+ shader_tp = shader_decode_gs;
+ break;
+ }
+
+ gpu_address = m_gpu_state.ctx().m_latch_state.m_instruction_base_address + offset;
+ decode_shader(pfile, shader_tp, gpu_address);
+}
+
+void
+BatchbufferDecoder::
+decode_3dstate_ps(BatchbufferLoggerOutput &pfile, const GPUCommand &data)
+{
+ typedef std::pair<enum shader_decode_entry_t, const char*> decode_job;
+ std::vector<decode_job> decode_jobs;
+ bool has_8(false), has_16(false), has_32(false);
+ int numEnabled;
+ static const char *kernels[3] = {
+ "Kernel Start Pointer 0",
+ "Kernel Start Pointer 1",
+ "Kernel Start Pointer 2",
+ };
+
+ data.extract_field_value<bool>("8 Pixel Dispatch Enable", &has_8);
+ data.extract_field_value<bool>("16 Pixel Dispatch Enable", &has_16);
+ data.extract_field_value<bool>("32 Pixel Dispatch Enable", &has_32);
+
+ /* GEN is amusing at times, depending on what dispatches are enabled,
+ * which kernel is used for different dispatch modes changes.
+ *
+ * | 8-enabled | 16-enabled | 32-enabled | 8-shader | 16-shader | 32-shader |
+ * | TRUE | FALSE | FALSE | Kerenl0 | | |
+ * | TRUE | TRUE | FALSE | Kerenl0 | Kerenl2 | |
+ * | TRUE | TRUE | TRUE | Kernel0 | Kerenl2 | Kernel1 |
+ * | FALSE | TRUE | FALSE | | Kernal0 | |
+ * | FALSE | FALSE | TRUE | | | Kernel0 |
+ * | FALSE | TRUE | TRUE | | Kernel2 | Kernel1 |
+ *
+ * Atleast from the table, we can get a simple set or rules:
+ * - 8-wide, it is enabled is alway at Kernel0
+ * - if N-wide is the only one enabled, then it is at Kernel0
+ * - if there are atleast 2-enables, then 16-wide is at 2 and 32-wide is at 1.
+ */
+ numEnabled = int(has_8) + int(has_16) + int(has_32);
+ if (has_8) {
+ decode_jobs.push_back(decode_job(shader_decode_ps_8, kernels[0]));
+ }
+
+ if (numEnabled > 1) {
+ if (has_16) {
+ decode_jobs.push_back(decode_job(shader_decode_ps_16, kernels[2]));
+ }
+ if (has_32) {
+ decode_jobs.push_back(decode_job(shader_decode_ps_32, kernels[1]));
+ }
+ } else {
+ if (has_16) {
+ decode_jobs.push_back(decode_job(shader_decode_ps_16, kernels[0]));
+ }
+ else if (has_32) {
+ decode_jobs.push_back(decode_job(shader_decode_ps_32, kernels[0]));
+ }
+ }
+
+ for (const decode_job &J : decode_jobs) {
+ uint64_t addr;
+ if (data.extract_field_value<uint64_t>(J.second, &addr)) {
+ addr += m_gpu_state.ctx().m_latch_state.m_instruction_base_address;
+ decode_shader(pfile, J.first, addr);
+ }
+ }
+}
+
+void
+BatchbufferDecoder::
+decode_3dstate_constant(BatchbufferLoggerOutput &pfile,
+ const GPUCommand &data)
+{
+}
+
+void
+BatchbufferDecoder::
+decode_3dstate_binding_table_pointers_vs(BatchbufferLoggerOutput &pfile,
+ const GPUCommand &data)
+{
+ decode_3dstate_binding_table_pointers(pfile, "VS", data[1] & ~0x1fu,
+ m_gpu_state.ctx().m_latch_state.m_VS.m_binding_table_count);
+}
+
+void
+BatchbufferDecoder::
+decode_3dstate_binding_table_pointers_ds(BatchbufferLoggerOutput &pfile,
+ const GPUCommand &data)
+{
+ decode_3dstate_binding_table_pointers(pfile, "DS", data[1] & ~0x1fu,
+ m_gpu_state.ctx().m_latch_state.m_DS.m_binding_table_count);
+}
+
+void
+BatchbufferDecoder::
+decode_3dstate_binding_table_pointers_hs(BatchbufferLoggerOutput &pfile,
+ const GPUCommand &data)
+{
+ decode_3dstate_binding_table_pointers(pfile, "HS", data[1] & ~0x1fu,
+ m_gpu_state.ctx().m_latch_state.m_HS.m_binding_table_count);
+}
+
+void
+BatchbufferDecoder::
+decode_3dstate_binding_table_pointers_ps(BatchbufferLoggerOutput &pfile,
+ const GPUCommand &data)
+{
+ decode_3dstate_binding_table_pointers(pfile, "PS", data[1] & ~0x1fu,
+ m_gpu_state.ctx().m_latch_state.m_PS.m_binding_table_count);
+}
+
+void
+BatchbufferDecoder::
+decode_3dstate_binding_table_pointers_gs(BatchbufferLoggerOutput &pfile,
+ const GPUCommand &data)
+{
+ decode_3dstate_binding_table_pointers(pfile, "GS", data[1] & ~0x1fu,
+ m_gpu_state.ctx().m_latch_state.m_GS.m_binding_table_count);
+}
+
+void
+BatchbufferDecoder::
+decode_3dstate_binding_table_pointers(BatchbufferLoggerOutput &pfile,
+ const std::string &label, uint32_t offset,
+ int cnt)
+{
+ struct gen_group *surface_state;
+ uint64_t gpu_address;
+ GPUAddressQuery Q;
+ const uint32_t *v;
+
+ /* The command is essentially just provides an address (given as an
+ * offset from surface_state_base_address) of a sequence of values V.
+ * That sequence of values V is just a sequence of offsets also from
+ * surface_state_base_address which is the location of the surface
+ * state values.
+ */
+ surface_state = gen_spec_find_struct(m_spec, "RENDER_SURFACE_STATE");
+ gpu_address = offset + m_gpu_state.ctx().m_latch_state.m_surface_state_base_address;
+ v = m_tracker->cpu_mapped<uint32_t>(gpu_address, &Q);
+
+ if (!Q.m_gem_bo || !surface_state) {
+ return;
+ }
+
+ pfile.begin_block_value("Binding Tables", "%s", label.c_str());
+
+ /* i965 driver does "Track-ish" the number of binding table entries in
+ * each program stage, the value of X.base.binding_table.size_bytes /4
+ * is the number of entries for a stage X where X is brw->wm,
+ * brw->vs, brw->gs, brw->tcs and brw->tes
+ */
+ if (cnt < 0) {
+ cnt = 16;
+ pfile.print_value("Count", "%d (Guessing)", cnt);
+ } else {
+ pfile.print_value("Count", "%d", cnt);
+ }
+
+ for (int i = 0; i < cnt; ++i) {
+ uint64_t state_gpu_address;
+ const uint32_t *state_ptr;
+ GPUAddressQuery SQ;
+
+ if (v[i] == 0) {
+ continue;
+ }
+
+ pfile.begin_block_value("Binding Table", "#%d", i);
+ pfile.print_value("offset", "%u", v[i]);
+
+ state_gpu_address = v[i] + m_gpu_state.ctx().m_latch_state.m_surface_state_base_address;
+ state_ptr = m_tracker->cpu_mapped<uint32_t>(state_gpu_address, &SQ);
+ if (!SQ.m_gem_bo) {
+ pfile.print_value("GPU address", "0x%012 (BAD)" PRIx64, state_gpu_address);
+ pfile.end_block();
+ continue;
+ }
+
+ pfile.print_value("GPU address", "0x%012 " PRIx64, state_gpu_address);
+ decode_gen_group(pfile, SQ.m_gem_bo, SQ.m_offset_into_gem_bo, state_ptr, surface_state);
+
+ pfile.end_block();
+ }
+
+ pfile.end_block();
+}
+
+void
+BatchbufferDecoder::
+decode_3dstate_sampler_state_pointers_vs(BatchbufferLoggerOutput &pfile,
+ const GPUCommand &data)
+{
+ int cnt;
+ cnt = m_gpu_state.ctx().m_latch_state.m_VS.m_sampler_count;
+ decode_3dstate_sampler_state_pointers_helper(pfile, data[1], cnt);
+}
+
+void
+BatchbufferDecoder::
+decode_3dstate_sampler_state_pointers_gs(BatchbufferLoggerOutput &pfile,
+ const GPUCommand &data)
+{
+ int cnt;
+ cnt = m_gpu_state.ctx().m_latch_state.m_GS.m_sampler_count;
+ decode_3dstate_sampler_state_pointers_helper(pfile, data[1], cnt);
+}
+
+void
+BatchbufferDecoder::
+decode_3dstate_sampler_state_pointers_hs(BatchbufferLoggerOutput &pfile,
+ const GPUCommand &data)
+{
+ int cnt;
+ cnt = m_gpu_state.ctx().m_latch_state.m_HS.m_sampler_count;
+ decode_3dstate_sampler_state_pointers_helper(pfile, data[1], cnt);
+}
+
+void
+BatchbufferDecoder::
+decode_3dstate_sampler_state_pointers_ds(BatchbufferLoggerOutput &pfile,
+ const GPUCommand &data)
+{
+ int cnt;
+ cnt = m_gpu_state.ctx().m_latch_state.m_DS.m_sampler_count;
+ decode_3dstate_sampler_state_pointers_helper(pfile, data[1], cnt);
+}
+
+void
+BatchbufferDecoder::
+decode_3dstate_sampler_state_pointers_ps(BatchbufferLoggerOutput &pfile,
+ const GPUCommand &data)
+{
+ int cnt;
+ cnt = m_gpu_state.ctx().m_latch_state.m_PS.m_sampler_count;
+ decode_3dstate_sampler_state_pointers_helper(pfile, data[1], cnt);
+}
+
+void
+BatchbufferDecoder::
+decode_3dstate_sampler_state_pointers_gen6(BatchbufferLoggerOutput &pfile,
+ const GPUCommand &data)
+{
+ int sample_counts[3] = {
+ m_gpu_state.ctx().m_latch_state.m_VS.m_sampler_count,
+ m_gpu_state.ctx().m_latch_state.m_GS.m_sampler_count,
+ m_gpu_state.ctx().m_latch_state.m_PS.m_sampler_count
+ };
+
+ for (unsigned int stage = 0; stage < 3; ++stage) {
+ int cnt;
+ cnt = sample_counts[stage];
+ decode_3dstate_sampler_state_pointers_helper(pfile, data[stage + 1], cnt);
+ }
+}
+
+void
+BatchbufferDecoder::
+decode_3dstate_sampler_state_pointers_helper(BatchbufferLoggerOutput &pfile,
+ uint32_t offset, int cnt)
+{
+ struct gen_group *g;
+ uint64_t gpu_address;
+
+ g = gen_spec_find_struct(m_spec, "SAMPLER_STATE");
+ pfile.begin_block("SAMPLER_STATEs");
+
+ if (cnt < 0) {
+ cnt = 4;
+ pfile.print_value("Count", "%d (Guessing)", cnt);
+ } else {
+ pfile.print_value("Count", "%d", cnt);
+ }
+
+ gpu_address = m_gpu_state.ctx().m_latch_state.m_dynamic_state_base_address + offset;
+ for (int i = 0; i < cnt; ++i) {
+ pfile.begin_block_value("SamplerState", "#%d", i);
+ decode_pointer_helper(pfile, g, gpu_address);
+ pfile.end_block();
+ }
+
+ pfile.end_block();
+}
+
+void
+BatchbufferDecoder::
+decode_3dstate_viewport_state_pointers_cc(BatchbufferLoggerOutput &pfile,
+ const GPUCommand &data)
+{
+ uint64_t gpu_address;
+ struct gen_group *g;
+
+ g = gen_spec_find_struct(m_spec, "CC_VIEWPORT");
+ gpu_address = m_gpu_state.ctx().m_latch_state.m_dynamic_state_base_address + (data[1] & ~0x1fu);
+
+ pfile.begin_block("CC_VIEWPORTs");
+
+ uint32_t cnt;
+ if (m_gpu_state.ctx().m_latch_state.m_VIEWPORT_count < 0) {
+ cnt = 4;
+ pfile.print_value("Count", "%d (Guessing)", cnt);
+ } else {
+ cnt = m_gpu_state.ctx().m_latch_state.m_VIEWPORT_count;
+ pfile.print_value("Count", "%d", cnt);
+ }
+
+ for (uint32_t i = 0; i < cnt; ++i) {
+ pfile.begin_block_value("CC-Viewport", "#%d", i);
+ decode_pointer_helper(pfile, g, gpu_address + i * 8);
+ pfile.end_block();
+ }
+
+ pfile.end_block();
+}
+
+void
+BatchbufferDecoder::
+decode_3dstate_viewport_state_pointers_sf_clip(BatchbufferLoggerOutput &pfile,
+ const GPUCommand &data)
+{
+ uint64_t gpu_address;
+ struct gen_group *g;
+
+ g = gen_spec_find_struct(m_spec, "SF_CLIP_VIEWPORT");
+ gpu_address = m_gpu_state.ctx().m_latch_state.m_dynamic_state_base_address + (data[1] & ~0x3fu);
+
+ pfile.begin_block("SF_CLIP_VIEWPORTs");
+
+ uint32_t cnt;
+ if (m_gpu_state.ctx().m_latch_state.m_VIEWPORT_count < 0) {
+ cnt = 4;
+ pfile.print_value("Count", "%d (Guessing)", cnt);
+ } else {
+ cnt = m_gpu_state.ctx().m_latch_state.m_VIEWPORT_count;
+ pfile.print_value("Count", "%d", cnt);
+ }
+
+ for (uint32_t i = 0; i < cnt; ++i) {
+ pfile.begin_block_value("Viewport", "#%d", i);
+ decode_pointer_helper(pfile, g, gpu_address + i * 64);
+ pfile.end_block();
+ }
+
+ pfile.end_block();
+}
+
+void
+BatchbufferDecoder::
+decode_3dstate_blend_state_pointers(BatchbufferLoggerOutput &pfile,
+ const GPUCommand &data)
+{
+ uint64_t gpu_address;
+
+ gpu_address = m_gpu_state.ctx().m_latch_state.m_dynamic_state_base_address + (data[1] & ~0x3fu);
+ pfile.begin_block("BLEND_STATE");
+ decode_pointer_helper(pfile, "BLEND_STATE", gpu_address);
+ pfile.end_block();
+}
+
+void
+BatchbufferDecoder::
+decode_3dstate_cc_state_pointers(BatchbufferLoggerOutput &pfile,
+ const GPUCommand &data)
+{
+ uint64_t gpu_address;
+
+ gpu_address = m_gpu_state.ctx().m_latch_state.m_dynamic_state_base_address + (data[1] & ~0x3fu);
+ pfile.begin_block("COLOR_CALC_STATE");
+ decode_pointer_helper(pfile, "COLOR_CALC_STATE", gpu_address);
+ pfile.end_block();
+}
+
+void
+BatchbufferDecoder::
+decode_3dstate_scissor_state_pointers(BatchbufferLoggerOutput &pfile,
+ const GPUCommand &data)
+{
+ uint64_t gpu_address;
+
+ gpu_address = m_gpu_state.ctx().m_latch_state.m_dynamic_state_base_address + (data[1] & ~0x1fu);
+ pfile.begin_block("SCISSOR_RECT");
+ decode_pointer_helper(pfile, "SCISSOR_RECT", gpu_address);
+ pfile.end_block();
+}
+
+void
+BatchbufferDecoder::
+decode_pointer_helper(BatchbufferLoggerOutput &pfile,
+ const char *instruction_name, uint64_t gpu_address)
+{
+ struct gen_group *g;
+
+ g = gen_spec_find_struct(m_spec, instruction_name);
+ if (g) {
+ pfile.print_value("Type", instruction_name);
+ decode_pointer_helper(pfile, g, gpu_address);
+ } else {
+ pfile.print_value("Unknown Type", "%s", instruction_name);
+ }
+}
+
+void
+BatchbufferDecoder::
+decode_pointer_helper(BatchbufferLoggerOutput &pfile,
+ struct gen_group *g, uint64_t gpu_address)
+{
+ const uint32_t *p;
+ GPUAddressQuery Q;
+
+ p = m_tracker->cpu_mapped<uint32_t>(gpu_address, &Q);
+ if (p) {
+ int len;
+ len = gen_group_get_length(g, p);
+
+ if (len < 0) {
+ pfile.print_value("BAD length", "%d", len);
+ return;
+ }
+
+ if (Q.m_offset_into_gem_bo + len > Q.m_gem_bo->size()) {
+ pfile.begin_block("Length to large");
+ pfile.print_value("length", "%d", len);
+ pfile.print_value("GEM BO offset", "%u", Q.m_offset_into_gem_bo);
+ pfile.print_value("GEM BO size", "%u", Q.m_gem_bo->size());
+ pfile.end_block();
+ return;
+ }
+ } else {
+ pfile.print_value("Bad GPU Address", "0x%012" PRIx64, gpu_address);
+ return;
+ }
+
+ decode_gen_group(pfile, Q.m_gem_bo, Q.m_offset_into_gem_bo, p, g);
+}
+
+void
+BatchbufferDecoder::
+decode_gen_group(BatchbufferLoggerOutput &pfile,
+ const GEMBufferObject *q, uint64_t dword_offset,
+ const uint32_t *p, struct gen_group *group)
+{
+ struct gen_field_iterator iter;
+
+ gen_field_iterator_init(&iter, group, p, false);
+
+ while (gen_field_iterator_next(&iter)) {
+ if (!is_header_field(group, iter.field)) {
+ if (iter.struct_desc) {
+ uint64_t struct_offset;
+ struct_offset = dword_offset + iter.dword;
+ pfile.begin_block_value(iter.name, "%s", iter.value);
+ decode_gen_group(pfile, q, struct_offset,
+ p + iter.dword, iter.struct_desc);
+ pfile.end_block();
+ } else {
+ pfile.print_value(iter.name, "%s", iter.value);
+ }
+ }
+ }
+}
+
+void
+BatchbufferDecoder::
+decode_gpu_command(BatchbufferLoggerOutput &pfile, const GPUCommand &q)
+{
+ pfile.begin_block(gen_group_get_name(q.inst()));
+ decode_gen_group(pfile, q.gem_bo(), q.dword_offset(), q.contents_ptr(), q.inst());
+ DetailedDecoder::decode(this, pfile, q);
+ pfile.end_block();
+}
+
+void
+BatchbufferDecoder::
+decode_gpu_execute_command(BatchbufferLoggerOutput &pfile, const GPUCommand &q)
+{
+ pfile.begin_block("Execute GPU command");
+ pfile.print_value("Command", "%s", gen_group_get_name(q.inst()));
+
+ decode_gpu_command(pfile, q);
+ pfile.begin_block("GPU State");
+ m_gpu_state.decode_contents(this, q.gpu_pipeline_type(), pfile);
+ pfile.end_block();
+
+ pfile.end_block();
+}
+
+void
+BatchbufferDecoder::
+process_gpu_command(bool printing_enabled, BatchbufferLoggerOutput &pfile,
+ const GPUCommand &q)
+{
+ enum GPUCommand::gpu_command_type_t tp;
+
+ m_gpu_state.update_state(this, pfile, q);
+ tp = q.gpu_command_type();
+ switch (tp) {
+ case GPUCommand::gpu_command_show_value_with_gpu_state:
+ if (printing_enabled) {
+ decode_gpu_execute_command(pfile, q);
+ }
+ break;
+
+ case GPUCommand::gpu_command_show_value_without_gpu_state:
+ if (printing_enabled) {
+ decode_gen_group(pfile, q.gem_bo(), q.dword_offset(), q.contents_ptr(), q.inst());
+ DetailedDecoder::decode(this, pfile, q);
+ }
+ break;
+
+ default:
+ /* nothing */
+ break;
+ }
+}
+
+void
+BatchbufferDecoder::
+absorb_batchbuffer_contents(bool printing_enabled, BatchbufferLoggerOutput &dst,
+ unsigned int start_dword, unsigned int end_dword)
+{
+ if (m_decode_level == no_decode || start_dword >= end_dword) {
+ return;
+ }
+
+ int length;
+
+ for (; start_dword < end_dword; start_dword += length) {
+ GPUCommand q(m_batchbuffer, start_dword, m_spec);
+
+ length = std::max(1u, q.contents_size());
+ if (q.inst()) {
+ if (printing_enabled) {
+ std::ostringstream str;
+ dst.begin_block_value(gen_group_get_name(q.inst()), "%u", start_dword);
+ }
+
+ if (m_decode_level >= instruction_details_decode) {
+ process_gpu_command(printing_enabled, dst, q);
+ }
+
+ if (printing_enabled) {
+ dst.end_block();
+ }
+ } else if (printing_enabled) {
+ dst.begin_block_value("Unknown instruction", "%u (0x%08x)",
+ start_dword, q[0]);
+ dst.end_block();
+ }
+ }
+}
+
+//////////////////////////////////
+// ShaderFileList methods
+const char*
+ShaderFileList::
+filename(const std::string &fileprefix, const void *shader,
+ int pciid, struct gen_disasm *gen_disasm)
+{
+ sha1_value key;
+ std::map<sha1_value, std::string>::iterator iter;
+ int shader_sz;
+
+ shader_sz = gen_disasm_assembly_length(gen_disasm, shader, 0);
+ _mesa_sha1_compute(shader, shader_sz, key.data());
+ iter = m_files.find(key);
+ if (iter != m_files.end()) {
+ return iter->second.c_str();
+ }
+
+ std::ostringstream str;
+ std::string filename;
+
+ str << fileprefix << "-shader_file#" << ++m_count
+ << ".pciid." << pciid << ".bin";
+ filename = str.str();
+
+ std::ofstream shader_file(filename.c_str(),
+ std::ios_base::out | std::ios_base::binary);
+ if (!shader_file.is_open()) {
+ return nullptr;
+ }
+
+ shader_file.write(static_cast<const char*>(shader), shader_sz);
+ iter = m_files.insert(std::make_pair(key, filename)).first;
+
+ return iter->second.c_str();
+}
+//////////////////////////////////
+// BatchbufferLog methods
+void
+BatchbufferLog::
+add_ioctl_log_entry(const std::string &entry)
+{
+ if (!m_prints.empty()) {
+ m_prints.back().add_ioctl_log_entry(entry);
+ } else {
+ m_orphan_ioctl_log_entries.push_back(entry);
+ }
+}
+
+
+void
+BatchbufferLog::
+handle_batchbuffer_contents(bool printing_enabled,
+ BatchbufferDecoder *decoder,
+ BatchbufferLoggerOutput &dst,
+ uint32_t start, uint32_t end)
+{
+ if (printing_enabled && dst) {
+ dst.begin_block_value("GPU commands", "[%u, %u)", start, end);
+ dst.print_value("dword start", "%u", start);
+ dst.print_value("dword end", "%u", end);
+ dst.print_value("dword length", "%u", end - start);
+ }
+
+ if (decoder) {
+ decoder->absorb_batchbuffer_contents(printing_enabled, dst,
+ start, end);
+ }
+
+ if (printing_enabled && dst) {
+ dst.end_block();
+ }
+}
+
+bool
+BatchbufferLog::
+emit_log(BatchbufferDecoder *decoder, BatchbufferLoggerOutput &dst,
+ uint32_t batchbuffer_len)
+{
+ uint32_t last_time(0);
+ unsigned int top_level(dst.current_block_level());
+ bool printing_enabled(false);
+
+ for(auto iter = m_prints_from_dummy.begin();
+ iter != m_prints_from_dummy.end(); ++iter) {
+ APIStartCallMarker &entry(*iter);
+ printing_enabled = entry.emit(entry.start_bb_location(), dst,
+ top_level);
+ }
+
+ APIStartCallMarker::print_ioctl_log(m_orphan_ioctl_log_entries, dst);
+
+ for(auto iter = m_prints.begin(); iter != m_prints.end(); ++iter) {
+ APIStartCallMarker &entry(*iter);
+ if (entry.start_bb_location() > last_time) {
+ /* We clear to 1 level, so that the batch-buffer decoding
+ * is a child element of the last APIStartCallMarker block;
+ * note that if m_prints_from_dummy was empty and entry is
+ * the first element in m_prints, then the GPU command decode
+ * block is a child of top_level instead.
+ */
+ dst.clear_block_stack(top_level + 1);
+ handle_batchbuffer_contents(printing_enabled, decoder, dst,
+ last_time, entry.start_bb_location());
+ last_time = entry.start_bb_location();
+ }
+
+ auto next_iter(iter);
+ uint32_t next_time;
+
+ ++next_iter;
+ next_time = (next_iter != m_prints.end()) ?
+ next_iter->start_bb_location() :
+ batchbuffer_len;
+ printing_enabled = entry.emit(next_time, dst, top_level);
+ }
+
+ /* close up all blocks we have left open */
+ if (dst) {
+ dst.clear_block_stack(top_level);
+ }
+
+ if (batchbuffer_len > last_time) {
+ handle_batchbuffer_contents(printing_enabled, decoder, dst,
+ last_time, batchbuffer_len);
+ }
+
+ return printing_enabled;
+}
+
+//////////////////////////////
+// GEMBufferTracker methods
+GEMBufferTracker::
+GEMBufferTracker(int fd):
+ m_fd(fd),
+ m_dummy_hw_ctx(0)
+{}
+
+GEMBufferTracker::
+~GEMBufferTracker()
+{
+ for(const auto &value : m_gem_bos_by_handle) {
+ delete value.second;
+ }
+}
+
+void
+GEMBufferTracker::
+emit_unemitted_log(BatchbufferLoggerOutput &dst)
+{
+ bool has_stuff_to_emit(false);
+
+ for (const auto &v : m_logs) {
+ if (!v.second.empty()) {
+ has_stuff_to_emit = true;
+ break;
+ }
+ }
+
+ if (!has_stuff_to_emit)
+ return;
+
+ dst.begin_block("UnemittedBatchbuffer");
+ dst.print_value("fd", "%d", m_fd);
+ for (auto &v : m_logs) {
+ if (!v.second.empty()) {
+ dst.begin_block_value("gem_bo", "%u", v.second.src()->gem_bo);
+ v.second.emit_log(nullptr, dst, 0);
+ dst.end_block();
+ }
+ }
+ dst.end_block();
+}
+
+void
+GEMBufferTracker::
+add_gem_bo(const struct drm_i915_gem_create &pdata)
+{
+ GEMBufferObject *p;
+ p = new GEMBufferObject(m_fd, pdata);
+ m_gem_bos_by_handle[pdata.handle] = p;
+}
+
+void
+GEMBufferTracker::
+add_gem_bo(const struct drm_i915_gem_userptr &pdata)
+{
+ GEMBufferObject *p;
+ p = new GEMBufferObject(m_fd, pdata);
+ m_gem_bos_by_handle[pdata.handle] = p;
+}
+
+void
+GEMBufferTracker::
+remove_gem_bo(uint32_t h)
+{
+ std::map<uint32_t, GEMBufferObject*>::const_iterator iter;
+ GEMBufferObject *p;
+
+ iter = m_gem_bos_by_handle.find(h);
+ if (iter != m_gem_bos_by_handle.end()) {
+ p = iter->second;
+ m_gem_bos_by_handle.erase(iter);
+ m_gem_bos_by_gpu_address_end.erase(p->gpu_address_end());
+ delete p;
+ }
+}
+
+GEMBufferObject*
+GEMBufferTracker::
+fetch_gem_bo(uint32_t h) const
+{
+ std::map<uint32_t, GEMBufferObject*>::const_iterator iter;
+ iter = m_gem_bos_by_handle.find(h);
+ return (iter != m_gem_bos_by_handle.end()) ?
+ iter->second :
+ nullptr;
+}
+
+void
+GEMBufferTracker::
+add_hw_context(const struct drm_i915_gem_context_create &create)
+{
+ uint32_t h;
+ h = create.ctx_id;
+ m_hw_contexts.insert(std::make_pair(h, i965HWContextData(h)));
+}
+
+void
+GEMBufferTracker::
+remove_hw_context(const struct drm_i915_gem_context_destroy &destroy)
+{
+ uint32_t h;
+ h = destroy.ctx_id;
+ m_hw_contexts.erase(h);
+}
+
+i965HWContextData*
+GEMBufferTracker::
+fetch_hw_context(uint32_t h)
+{
+ std::map<uint32_t, i965HWContextData>::iterator iter;
+ iter = m_hw_contexts.find(h);
+ if (iter != m_hw_contexts.end()) {
+ return &iter->second;
+ } else {
+ m_dummy_hw_ctx = i965HWContextData(0);
+ return &m_dummy_hw_ctx;
+ }
+}
+
+std::pair<bool, GEMBufferObject*>
+GEMBufferTracker::
+update_gem_bo_gpu_address(const struct drm_i915_gem_exec_object2 *p)
+{
+ std::map<uint32_t, GEMBufferObject*>::const_iterator iter;
+
+ iter = m_gem_bos_by_handle.find(p->handle);
+ if (iter == m_gem_bos_by_handle.end()) {
+ return std::make_pair(false, nullptr);
+ }
+
+ uint64_t old_gpu_address;
+ old_gpu_address = iter->second->gpu_address_begin();
+ if (old_gpu_address != p->offset) {
+ /* remove from m_gem_bos_by_gpu_address_end
+ * before updating
+ */
+ m_gem_bos_by_gpu_address_end.erase(iter->second->gpu_address_end());
+
+ /* Update GPU address of GEM BO */
+ iter->second->update_gpu_address(p->offset);
+
+ /* Place GEM BO into m_gem_bos_by_gpu_address_end */
+ uint64_t key;
+ key = iter->second->gpu_address_end();
+ m_gem_bos_by_gpu_address_end[key] = iter->second;
+ return std::make_pair(true, iter->second);
+ }
+
+ return std::make_pair(false, iter->second);
+}
+
+GPUAddressQuery
+GEMBufferTracker::
+get_memory_at_gpu_address(uint64_t address) const
+{
+ std::map<uint64_t, GEMBufferObject*>::const_iterator iter;
+ GPUAddressQuery return_value;
+
+ /* Get the first BO whose GPU end address is
+ * greater than address, thus iter->first > address
+ */
+ iter = m_gem_bos_by_gpu_address_end.upper_bound(address);
+ if (iter != m_gem_bos_by_gpu_address_end.end()
+ && iter->second->gpu_address_begin() <= address) {
+ return_value.m_gem_bo = iter->second;
+ return_value.m_offset_into_gem_bo =
+ address - iter->second->gpu_address_begin();
+ } else {
+ return_value.m_gem_bo = nullptr;
+ return_value.m_offset_into_gem_bo = 0uL;
+ }
+ return return_value;
+}
+
+template<typename T>
+const T*
+GEMBufferTracker::
+cpu_mapped(uint64_t gpu_address, GPUAddressQuery *q)
+{
+ GPUAddressQuery Q;
+
+ q = (q) ? q : &Q;
+ *q = get_memory_at_gpu_address(gpu_address);
+ if (q->m_gem_bo) {
+ const void *p;
+ p = q->m_gem_bo->cpu_mapped<uint8_t>() + q->m_offset_into_gem_bo;
+ return static_cast<const T*>(p);
+ } else {
+ return nullptr;
+ }
+}
+
+int
+GEMBufferTracker::
+pread_buffer(void *dst, uint64_t gpu_address, uint64_t size) const
+{
+ GPUAddressQuery q;
+ q = get_memory_at_gpu_address(gpu_address);
+
+ if (q.m_gem_bo
+ && q.m_gem_bo->gpu_address_begin() >= gpu_address
+ && q.m_gem_bo->gpu_address_end() < gpu_address) {
+ uint64_t offset;
+ offset = q.m_offset_into_gem_bo - gpu_address;
+ return q.m_gem_bo->pread_buffer(dst, offset, size);
+ } else {
+ return -1;
+ }
+}
+
+
+BatchbufferLog*
+GEMBufferTracker::
+fetch(uint32_t gem_handle)
+{
+ std::map<uint32_t, BatchbufferLog>::iterator iter;
+ iter = m_logs.find(gem_handle);
+ return (iter != m_logs.end()) ?
+ &iter->second:
+ nullptr;
+}
+
+BatchbufferLog*
+GEMBufferTracker::
+fetch_or_create(const void *bb, uint32_t h)
+{
+ BatchbufferLog *b;
+ b = fetch(h);
+
+ if (b == nullptr) {
+ std::map<uint32_t, BatchbufferLog>::iterator iter;
+ BatchbufferLog m(m_fd, bb, h);
+
+ iter = m_logs.insert(std::make_pair(h, m)).first;
+ b = &iter->second;
+ }
+
+ return b;
+}
+
+void
+GEMBufferTracker::
+remove_batchbuffer_log(const BatchbufferLog *q)
+{
+ assert(q != nullptr);
+ assert(q == fetch(q->src()->gem_bo));
+ m_logs.erase(q->src()->gem_bo);
+}
+
+///////////////////////////////
+// BatchbufferLogger methods
+BatchbufferLogger::
+BatchbufferLogger(void):
+ m_start_log_call_number(read_from_environment<unsigned int>("i965_INSTR_START", 0)),
+ m_end_log_call_number(read_from_environment<unsigned int>("i965_INSTR_END", ~0u)),
+ m_max_file_size(read_from_environment<unsigned int>("i965_INSTR_FILE_SIZE", 1 << 28)),
+ m_batchbuffer_state(default_batchbuffer_state_fcn),
+ m_active_batchbuffer(default_active_batchbuffer_fcn),
+ m_gen_spec(nullptr),
+ m_gen_disasm(nullptr),
+ m_dummy(-1, nullptr, ~0u),
+ m_number_aborted_batchbuffers(0)
+{
+ aborted_batchbuffer = aborted_batchbuffer_fcn;
+ release_driver = release_driver_fcn;
+
+ pre_call = pre_call_fcn;
+ post_call = post_call_fcn;
+ begin_logging = begin_logging_fcn;
+ end_logging = end_logging_fcn;
+ release_app = release_app_fcn;
+
+ std::string decode_level_str;
+ decode_level_str =
+ read_from_environment<std::string>("I965_DECODE_LEVEL",
+ "instruction_details_decode");
+
+ if (decode_level_str == "no_decode") {
+ m_decode_level = BatchbufferDecoder::no_decode;
+ } else if (decode_level_str == "instruction_decode") {
+ m_decode_level = BatchbufferDecoder::instruction_decode;
+ } else {
+ m_decode_level = BatchbufferDecoder::instruction_details_decode;
+ }
+
+ decode_level_str =
+ read_from_environment<std::string>("I965_PRINT_RELOC_LEVEL",
+ "print_reloc_nothing");
+ if (decode_level_str == "print_reloc_gem_gpu_updates") {
+ m_print_reloc_level = BatchbufferDecoder::print_reloc_gem_gpu_updates;
+ } else {
+ m_print_reloc_level = BatchbufferDecoder::print_reloc_nothing;
+ }
+
+ m_shader_decode_flags = 0u;
+ if (read_from_environment<int>("I965_DECODE_VS", 1)) {
+ m_shader_decode_flags |= (1u << BatchbufferDecoder::shader_decode_vs);
+ }
+ if (read_from_environment<int>("I965_DECODE_HS", 1)) {
+ m_shader_decode_flags |= (1u << BatchbufferDecoder::shader_decode_hs);
+ }
+ if (read_from_environment<int>("I965_DECODE_DS", 1)) {
+ m_shader_decode_flags |= (1u << BatchbufferDecoder::shader_decode_ds);
+ }
+ if (read_from_environment<int>("I965_DECODE_GS", 1)) {
+ m_shader_decode_flags |= (1u << BatchbufferDecoder::shader_decode_gs);
+ }
+ if (read_from_environment<int>("I965_DECODE_PS8", 1)) {
+ m_shader_decode_flags |= (1u << BatchbufferDecoder::shader_decode_ps_8);
+ }
+ if (read_from_environment<int>("I965_DECODE_PS16", 1)) {
+ m_shader_decode_flags |= (1u << BatchbufferDecoder::shader_decode_ps_16);
+ }
+ if (read_from_environment<int>("I965_DECODE_PS32", 1)) {
+ m_shader_decode_flags |= (1u << BatchbufferDecoder::shader_decode_ps_32);
+ }
+ if (read_from_environment<int>("I965_DECODE_CS", 1)) {
+ m_shader_decode_flags |=
+ (1u << BatchbufferDecoder::shader_decode_media_compute);
+ }
+}
+
+BatchbufferLogger::
+~BatchbufferLogger()
+{
+ for (const auto &v : m_gem_buffer_trackers) {
+ v.second->emit_unemitted_log(m_file);
+ delete v.second;
+ }
+
+ if (!m_dummy.empty() && m_file) {
+ m_file.begin_block("Logs not associated to batchbuffer");
+ m_dummy.emit_log(nullptr, m_file, 0);
+ m_file.end_block();
+ }
+
+ if (m_gen_disasm) {
+ gen_disasm_destroy(m_gen_disasm);
+ }
+}
+
+void
+BatchbufferLogger::
+aborted_batchbuffer_fcn(struct i965_batchbuffer_logger *pthis,
+ int fd, uint32_t gem_bo)
+{
+ BatchbufferLogger *R;
+ R = static_cast<BatchbufferLogger*>(pthis);
+
+ R->m_mutex.lock();
+
+ BatchbufferLog *bb;
+ bb = R->fetch_batchbuffer_log(fd, gem_bo);
+ if (bb) {
+ if(R->m_file && !bb->empty()) {
+ ++R->m_number_aborted_batchbuffers;
+ R->m_file.begin_block_value("Aborted batchbuffer", "#%d",
+ R->m_number_aborted_batchbuffers);
+ R->m_file.print_value("fd", "%d", bb->src()->fd);
+ R->m_file.print_value("gem_bo", "%u", bb->src()->gem_bo);
+ bb->emit_log(nullptr, R->m_file, 0);
+ R->m_file.end_block();
+ }
+ R->gem_buffer_tracker(bb->src()->fd)->remove_batchbuffer_log(bb);
+ }
+
+ R->m_mutex.unlock();
+}
+
+void
+BatchbufferLogger::
+release_driver_fcn(struct i965_batchbuffer_logger *pthis)
+{
+ release();
+}
+
+void
+BatchbufferLogger::
+pre_call_fcn(struct i965_batchbuffer_logger_app *pthis,
+ unsigned int call_id,
+ const char *call_detailed,
+ const char *fcn_name)
+{
+ BatchbufferLogger *R;
+ BatchbufferLog *bb;
+ uint32_t time_of_print(0);
+
+ R = static_cast<BatchbufferLogger*>(pthis);
+
+ R->m_mutex.lock();
+ bb = R->fetch_or_create_batchbuffer_log();
+ if (bb != &R->m_dummy) {
+ time_of_print = R->m_batchbuffer_state(bb->src());
+ }
+ bb->add_call_marker(R->m_file, R->m_dummy, call_id, fcn_name,
+ call_detailed, time_of_print);
+ R->m_mutex.unlock();
+}
+
+void
+BatchbufferLogger::
+post_call_fcn(struct i965_batchbuffer_logger_app *pthis,
+ unsigned int call_id)
+{
+}
+
+void
+BatchbufferLogger::
+begin_logging_fcn(struct i965_batchbuffer_logger_app *pthis,
+ const char *name)
+{
+ BatchbufferLogger *R;
+ R = static_cast<BatchbufferLogger*>(pthis);
+
+ R->m_mutex.lock();
+ R->m_file.open(name);
+ R->m_shader_filelist.clear();
+ R->m_mutex.unlock();
+}
+
+void
+BatchbufferLogger::
+end_logging_fcn(struct i965_batchbuffer_logger_app *pthis)
+{
+ BatchbufferLogger *R;
+ R = static_cast<BatchbufferLogger*>(pthis);
+
+ R->m_mutex.lock();
+ /* We need to emit all the data of batchbuffers with a log */
+ for(const auto &v: R->m_gem_buffer_trackers) {
+ v.second->emit_unemitted_log(R->m_file);
+ }
+ R->m_file.close();
+ R->m_mutex.unlock();
+}
+
+void
+BatchbufferLogger::
+release_app_fcn(struct i965_batchbuffer_logger_app *pthis)
+{
+ release();
+}
+
+GEMBufferTracker*
+BatchbufferLogger::
+gem_buffer_tracker(int fd)
+{
+ GEMBufferTracker *q;
+ std::map<int, GEMBufferTracker*>::iterator iter;
+
+ iter = m_gem_buffer_trackers.find(fd);
+ if (iter != m_gem_buffer_trackers.end()) {
+ q = iter->second;
+ } else {
+ q = new GEMBufferTracker(fd);
+ m_gem_buffer_trackers[fd] = q;
+ }
+
+ return q;
+}
+
+void
+BatchbufferLogger::
+pre_process_ioctl(int fd, unsigned long request, void *argp)
+{
+ m_mutex.lock();
+}
+
+void
+BatchbufferLogger::
+post_process_ioctl(int ioctl_return_code, int fd, unsigned long request,
+ void *argp)
+{
+ if (ioctl_return_code == -1) {
+ m_mutex.unlock();
+ return;
+ }
+
+ GEMBufferTracker *tracker;
+ BatchbufferLog *bb;
+ struct i965_logged_batchbuffer driver_bb;
+
+ tracker = gem_buffer_tracker(fd);
+ m_active_batchbuffer(&driver_bb);
+ if (driver_bb.fd == fd) {
+ bb = tracker->fetch_or_create(driver_bb.driver_data,
+ driver_bb.gem_bo);
+ } else {
+ bb = &m_dummy;
+ }
+
+ switch(request) {
+ case DRM_IOCTL_I915_GEM_CREATE: {
+ struct drm_i915_gem_create *create;
+
+ create = (struct drm_i915_gem_create*) argp;
+ tracker->add_gem_bo(*create);
+
+ std::ostringstream ostr;
+ ostr << "Create GEM BO fd = " << std::dec << fd
+ << ", size = " << create->size
+ << ", handle = " << create->handle;
+ bb->add_ioctl_log_entry(ostr.str());
+ break;
+ }
+
+ case DRM_IOCTL_I915_GEM_USERPTR: {
+ struct drm_i915_gem_userptr *create;
+
+ create = (struct drm_i915_gem_userptr*) argp;
+ tracker->add_gem_bo(*create);
+
+ std::ostringstream ostr;
+ ostr << "Create GEM BO-userptr fd = " << std::dec << fd
+ << ", user_size = " << create->user_size
+ << ", user_ptr = " << create->user_ptr
+ << ", handle = " << create->handle;
+ bb->add_ioctl_log_entry(ostr.str());
+ break;
+ }
+
+ case DRM_IOCTL_GEM_CLOSE: {
+ struct drm_gem_close *cmd;
+ std::ostringstream str;
+
+ cmd = (struct drm_gem_close *) argp;
+ tracker->remove_gem_bo(cmd->handle);
+
+ str << "Remove GEM BO fd = " << fd
+ << ", handle = " << cmd->handle;
+ bb->add_ioctl_log_entry(str.str());
+ break;
+ }
+
+ case DRM_IOCTL_I915_GEM_CONTEXT_CREATE: {
+ struct drm_i915_gem_context_create *create_hw_ctx;
+
+ create_hw_ctx = (struct drm_i915_gem_context_create*)argp;
+ tracker->add_hw_context(*create_hw_ctx);
+
+ std::ostringstream ostr;
+ ostr << "Create GEM HW context, fd = " << std::dec << fd
+ << ", handle = " << create_hw_ctx->ctx_id;
+ bb->add_ioctl_log_entry(ostr.str());
+ break;
+ }
+
+ case DRM_IOCTL_I915_GEM_CONTEXT_DESTROY: {
+ struct drm_i915_gem_context_destroy *destroy_hw_ctx;
+
+ destroy_hw_ctx = (struct drm_i915_gem_context_destroy*)argp;
+ tracker->remove_hw_context(*destroy_hw_ctx);
+
+ std::ostringstream ostr;
+ ostr << "Destroy GEM HW context, fd = " << std::dec << fd
+ << ", handle = " << destroy_hw_ctx->ctx_id;
+ bb->add_ioctl_log_entry(ostr.str());
+ break;
+ }
+
+ case DRM_IOCTL_I915_GEM_EXECBUFFER: {
+ //TODO:
+ break;
+ }
+
+ case DRM_IOCTL_I915_GEM_EXECBUFFER2:
+ case DRM_IOCTL_I915_GEM_EXECBUFFER2_WR: {
+ struct drm_i915_gem_execbuffer2 *execbuffer2 =
+ (struct drm_i915_gem_execbuffer2*) argp;
+ BatchbufferDecoder decoder(m_decode_level, m_print_reloc_level,
+ m_shader_decode_flags,
+ m_gen_spec, m_gen_disasm,
+ m_pci_id, tracker, &m_shader_filelist,
+ execbuffer2);
+
+ assert(decoder.batchbuffer_log());
+ decoder.emit_log(m_file);
+ tracker->remove_batchbuffer_log(decoder.batchbuffer_log());
+ break;
+ }
+
+ } //of switch(request)
+
+ m_mutex.unlock();
+}
+
+int
+BatchbufferLogger::
+local_drm_ioctl(int fd, unsigned long request, void *argp)
+{
+ int ret;
+
+ do {
+ ret = ioctl(fd, request, argp);
+ } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
+
+ return ret;
+}
+
+static pthread_mutex_t i965_batchbuffer_logger_acquire_mutex =
+ PTHREAD_MUTEX_INITIALIZER;
+static int i965_batchbuffer_logger_acquire_ref_count = 0;
+static BatchbufferLogger *i965_batchbuffer_logger_object = nullptr;
+
+BatchbufferLogger*
+BatchbufferLogger::
+acquire(void)
+{
+ pthread_mutex_lock(&i965_batchbuffer_logger_acquire_mutex);
+
+ if (!i965_batchbuffer_logger_object) {
+ i965_batchbuffer_logger_object = new BatchbufferLogger();
+ }
+ ++i965_batchbuffer_logger_acquire_ref_count;
+
+ pthread_mutex_unlock(&i965_batchbuffer_logger_acquire_mutex);
+
+ return i965_batchbuffer_logger_object;
+}
+
+void
+BatchbufferLogger::
+release(void)
+{
+ pthread_mutex_lock(&i965_batchbuffer_logger_acquire_mutex);
+
+ --i965_batchbuffer_logger_acquire_ref_count;
+ if (i965_batchbuffer_logger_acquire_ref_count == 0) {
+ delete i965_batchbuffer_logger_object;
+ i965_batchbuffer_logger_object = nullptr;
+ }
+
+ pthread_mutex_unlock(&i965_batchbuffer_logger_acquire_mutex);
+}
+
+void
+BatchbufferLogger::
+set_driver_funcs(int pci_id,
+ i965_logged_batchbuffer_state f1,
+ i965_active_batchbuffer f2)
+{
+ int old_pci_id;
+
+ m_mutex.lock();
+ old_pci_id = m_pci_id;
+ m_batchbuffer_state = f1;
+ m_active_batchbuffer = f2;
+ m_pci_id = pci_id;
+ gen_get_device_info(m_pci_id, &m_dev_info);
+ m_gen_spec = gen_spec_load(&m_dev_info);
+
+ if (m_gen_disasm && old_pci_id != m_pci_id) {
+ gen_disasm_destroy(m_gen_disasm);
+ m_gen_disasm = nullptr;
+ }
+
+ if (m_gen_disasm == nullptr) {
+ m_gen_disasm = gen_disasm_create(m_pci_id);
+ }
+
+ m_mutex.unlock();
+}
+
+/* Replacing ioctl with like that found in aubdump of IGT
+ * does not work with apitrace; some of the ioctls are
+ * picked up, but not all. This appears to only happen on
+ * apitrace (and its glretrace program). I have no idea why
+ * replacing ioctl does not work, but replacing drmIoctl does
+ * work.
+ */
+extern "C"
+int
+drmIoctl(int fd, unsigned long request, void *arg)
+{
+ int return_value;
+
+ pthread_mutex_lock(&i965_batchbuffer_logger_acquire_mutex);
+
+ if (i965_batchbuffer_logger_object) {
+ i965_batchbuffer_logger_object->pre_process_ioctl(fd, request, arg);
+ }
+
+ return_value = BatchbufferLogger::local_drm_ioctl(fd, request, arg);
+
+ if (i965_batchbuffer_logger_object) {
+ i965_batchbuffer_logger_object->post_process_ioctl(return_value, fd,
+ request, arg);
+ }
+
+ pthread_mutex_unlock(&i965_batchbuffer_logger_acquire_mutex);
+
+ return return_value;
+}
+
+//////////////////////////////////////////
+// exported symbols for application integration
+extern "C"
+struct i965_batchbuffer_logger_app*
+i965_batchbuffer_logger_app_acquire(void)
+{
+ BatchbufferLogger *R;
+ R = BatchbufferLogger::acquire();
+ return R;
+}
+
+///////////////////////////////////////////
+// exported symbols for 3D driver integration
+extern "C"
+struct i965_batchbuffer_logger*
+i965_batchbuffer_logger_acquire(int pci_id,
+ i965_logged_batchbuffer_state f1,
+ i965_active_batchbuffer f2)
+{
+ BatchbufferLogger *R;
+ R = BatchbufferLogger::acquire();
+ R->set_driver_funcs(pci_id, f1, f2);
+ return R;
+}
diff --git a/src/intel/tools/i965_batchbuffer_logger_instructions.h b/src/intel/tools/i965_batchbuffer_logger_instructions.h
new file mode 100644
index 0000000..ad3385d
--- /dev/null
+++ b/src/intel/tools/i965_batchbuffer_logger_instructions.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef _i965_INSTRUMENTATION_INSTRUCTIONS_H_
+#define _i965_INSTRUMENTATION_INSTRUCTIONS_H_
+
+#define STATE_BASE_ADDRESS 0x61010000
+
+#define MEDIA_INTERFACE_DESCRIPTOR_LOAD 0x70020000
+#define MEDIA_CURBE_LOAD 0x70010000
+#define MEDIA_VFE_STATE 0x70000000
+#define MEDIA_STATE_FLUSH 0x70040000
+
+#define _3DSTATE_PIPELINE_SELECT 0x61040000
+#define _3DSTATE_PIPELINE_SELECT_GM45 0x69040000
+
+#define _3DSTATE_INDEX_BUFFER 0x780a0000
+#define _3DSTATE_VERTEX_BUFFERS 0x78080000
+
+#define _3DSTATE_VF_INSTANCING 0x78490000
+
+#define _3DSTATE_VS 0x78100000
+#define _3DSTATE_GS 0x78110000
+#define _3DSTATE_HS 0x781b0000
+#define _3DSTATE_DS 0x781d0000
+#define _3DSTATE_PS 0x78200000
+
+#define _3D_STATE_CLIP 0x78120000
+
+#define _3DSTATE_CONSTANT_VS 0x78150000
+#define _3DSTATE_CONSTANT_GS 0x78160000
+#define _3DSTATE_CONSTANT_PS 0x78170000
+#define _3DSTATE_CONSTANT_HS 0x78190000
+#define _3DSTATE_CONSTANT_DS 0x781A0000
+
+#define _3DSTATE_BINDING_TABLE_POINTERS_VS 0x78260000
+#define _3DSTATE_BINDING_TABLE_POINTERS_HS 0x78270000
+#define _3DSTATE_BINDING_TABLE_POINTERS_DS 0x78280000
+#define _3DSTATE_BINDING_TABLE_POINTERS_GS 0x78290000
+#define _3DSTATE_BINDING_TABLE_POINTERS_PS 0x782a0000
+
+#define _3DSTATE_SAMPLER_STATE_POINTERS_VS 0x782b0000
+#define _3DSTATE_SAMPLER_STATE_POINTERS_DS 0x782c0000
+#define _3DSTATE_SAMPLER_STATE_POINTERS_HS 0x782d0000
+#define _3DSTATE_SAMPLER_STATE_POINTERS_GS 0x782e0000
+#define _3DSTATE_SAMPLER_STATE_POINTERS_PS 0x782f0000
+#define _3DSTATE_SAMPLER_STATE_POINTERS 0x78020000
+
+#define _3DSTATE_VIEWPORT_STATE_POINTERS_CC 0x78230000
+#define _3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP 0x78210000
+#define _3DSTATE_BLEND_STATE_POINTERS 0x78240000
+#define _3DSTATE_CC_STATE_POINTERS 0x780e0000
+#define _3DSTATE_SCISSOR_STATE_POINTERS 0x780f0000
+
+#define _MI_CMD_3D (0x3 << 29)
+#define _3DSTATE_PIPE_CONTROL (_MI_CMD_3D | (3 << 27) | (2 << 24))
+
+#define _3DPRIMITIVE 0x7b000000
+#define _GPGPU_WALKER 0x71050000
+
+#define _MI_CMD (0x0 << 29)
+
+/* _MI's that set values of registers that we can (mostly)
+ * determine the value after the kernel returns the ioctl.
+ */
+#define _MI_LOAD_REGISTER_IMM (_MI_CMD | (34 << 23))
+#define _MI_LOAD_REGISTER_REG (_MI_CMD | (42 << 23))
+#define _MI_LOAD_REGISTER_MEM (_MI_CMD | (41 << 23))
+#define _MI_STORE_REGISTER_MEM (_MI_CMD | (36 << 23))
+
+/* _MI_'s that are commands, not all of these are allowed
+ * in an execlist
+ */
+#define _MI_NOOP (_MI_CMD | ( 0 << 23))
+#define _MI_BATCH_BUFFER_END (_MI_CMD | (10 << 23))
+#define _MI_BATCH_BUFFER_START (_MI_CMD | (49 << 23))
+#define _MI_ARB_CHECK (_MI_CMD | ( 5 << 23))
+#define _MI_ATOMIC (_MI_CMD | (47 << 23))
+#define _MI_CLFLUSH (_MI_CMD | (39 << 23))
+#define _MI_CONDITIONAL_BATCH_BUFFER_END (_MI_CMD | (54 << 23))
+#define _MI_COPY_MEM_MEM (_MI_CMD | (46 << 23))
+#define _MI_DISPLAY_FLIP (_MI_CMD | (20 << 23))
+#define _MI_FORCE_WAKEUP (_MI_CMD | (29 << 23))
+#define _MI_LOAD_SCAN_LINES_EXCL (_MI_CMD | (19 << 23))
+#define _MI_LOAD_SCAN_LINES_INCL (_MI_CMD | (18 << 23))
+#define _MI_MATH (_MI_CMD | (26 << 23))
+#define _MI_REPORT_HEAD (_MI_CMD | ( 7 << 23))
+#define _MI_REPORT_PERF_COUNT (_MI_CMD | (40 << 23))
+#define _MI_RS_CONTEXT (_MI_CMD | (15 << 23))
+#define _MI_RS_CONTROL (_MI_CMD | ( 6 << 23))
+#define _MI_RS_STORE_DATA_IMM (_MI_CMD | (43 << 23))
+#define _MI_SEMAPHORE_SIGNAL (_MI_CMD | (27 << 23))
+#define _MI_SEMAPHORE_WAIT (_MI_CMD | (28 << 23))
+#define _MI_SET_CONTEXT (_MI_CMD | (24 << 23))
+#define _MI_STORE_DATA_IMM (_MI_CMD | (32 << 23))
+#define _MI_STORE_DATA_INDEX (_MI_CMD | (33 << 23))
+#define _MI_SUSPEND_FLUSH (_MI_CMD | (11 << 23))
+#define _MI_UPDATE_GTT (_MI_CMD | (35 << 23))
+#define _MI_USER_INTERRUPT (_MI_CMD | ( 2 << 23))
+#define _MI_WAIT_FOR_EVENT (_MI_CMD | ( 3 << 23))
+/* setting the predicate directly or via registers is viewed
+ * as a command and not state because the value to which it is set
+ * is not entirely determined by CPU values.
+ */
+#define _MI_SET_PREDICATE (_MI_CMD | ( 1 << 23))
+#define _MI_PREDICATE (_MI_CMD | (12 << 23))
+
+/* _MI_'s that set state value */
+#define _MI_TOPOLOGY_FILTER (_MI_CMD | 13 << 23)
+
+#endif
--
2.7.4
More information about the mesa-dev
mailing list