[Mesa-dev] [PATCH 10/14] swr: [rasterizer archrast] add events
Tim Rowley
timothy.o.rowley at intel.com
Thu Nov 10 03:18:44 UTC 2016
Added events for tracking early/late Depth and stencil events,
TE patch info, GS prim info, and FrontEnd/BackEnd DrawEnd events.
---
.../drivers/swr/rasterizer/archrast/archrast.cpp | 279 ++++++++++++++++++++-
.../drivers/swr/rasterizer/archrast/events.proto | 255 +++++++++++++++++++
.../drivers/swr/rasterizer/core/backend.cpp | 7 +
.../drivers/swr/rasterizer/core/frontend.cpp | 3 +-
.../drivers/swr/rasterizer/core/threads.cpp | 1 +
.../scripts/templates/ar_eventhandler_h.template | 2 +-
6 files changed, 541 insertions(+), 6 deletions(-)
diff --git a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
index 16b6d33..5bb1c7b 100644
--- a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
+++ b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
@@ -35,16 +35,289 @@
namespace ArchRast
{
//////////////////////////////////////////////////////////////////////////
+ /// @brief struct that keeps track of depth and stencil event information
+ struct DepthStencilStats
+ {
+ uint32_t earlyZTestPassCount = 0;
+ uint32_t earlyZTestFailCount = 0;
+ uint32_t lateZTestPassCount = 0;
+ uint32_t lateZTestFailCount = 0;
+ uint32_t earlyStencilTestPassCount = 0;
+ uint32_t earlyStencilTestFailCount = 0;
+ uint32_t lateStencilTestPassCount = 0;
+ uint32_t lateStencilTestFailCount = 0;
+ uint32_t earlyZTestCount = 0;
+ uint32_t lateZTestCount = 0;
+ uint32_t earlyStencilTestCount = 0;
+ uint32_t lateStencilTestCount = 0;
+ };
+
+ struct CStats
+ {
+ uint32_t clippedVerts = 0;
+ };
+
+ struct TEStats
+ {
+ uint32_t inputPrims = 0;
+ //@todo:: Change this to numPatches. Assumed: 1 patch per prim. If holds, its fine.
+ };
+
+ struct GSStats
+ {
+ uint32_t inputPrimCount;
+ uint32_t primGeneratedCount;
+ uint32_t vertsInput;
+ };
+
+ //////////////////////////////////////////////////////////////////////////
/// @brief Event handler that saves stat events to event files. This
/// handler filters out unwanted events.
class EventHandlerStatsFile : public EventHandlerFile
{
public:
+ DepthStencilStats DSSingleSample = {};
+ DepthStencilStats DSSampleRate = {};
+ DepthStencilStats DSPixelRate = {};
+ DepthStencilStats DSNullPS = {};
+ DepthStencilStats DSOmZ = {};
+ CStats CS = {};
+ TEStats TS = {};
+ GSStats GS = {};
+
EventHandlerStatsFile(uint32_t id) : EventHandlerFile(id) {}
// These are events that we're not interested in saving in stats event files.
virtual void Handle(Start& event) {}
virtual void Handle(End& event) {}
+
+ virtual void Handle(EarlyDepthStencilInfoSingleSample& event)
+ {
+ //earlyZ test compute
+ DSSingleSample.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
+ DSSingleSample.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+ DSSingleSample.earlyZTestCount += (_mm_popcnt_u32(event.data.depthPassMask) + _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask));
+
+ //earlyStencil test compute
+ DSSingleSample.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
+ DSSingleSample.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+ DSSingleSample.earlyStencilTestCount += (_mm_popcnt_u32(event.data.stencilPassMask) + _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask));
+
+ //outputerMerger test compute
+ DSOmZ.earlyZTestPassCount += DSSingleSample.earlyZTestPassCount;
+ DSOmZ.earlyZTestFailCount += DSSingleSample.earlyZTestFailCount;
+ DSOmZ.earlyZTestCount += DSSingleSample.earlyZTestCount;
+ DSOmZ.earlyStencilTestPassCount += DSSingleSample.earlyStencilTestPassCount;
+ DSOmZ.earlyStencilTestFailCount += DSSingleSample.earlyStencilTestFailCount;
+ DSOmZ.earlyStencilTestCount += DSSingleSample.earlyStencilTestCount;
+ }
+
+ virtual void Handle(EarlyDepthStencilInfoSampleRate& event)
+ {
+ //earlyZ test compute
+ DSSampleRate.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
+ DSSampleRate.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+ DSSampleRate.earlyZTestCount += (_mm_popcnt_u32(event.data.depthPassMask) + _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask));
+
+ //earlyStencil test compute
+ DSSampleRate.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
+ DSSampleRate.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+ DSSampleRate.earlyStencilTestCount += (_mm_popcnt_u32(event.data.stencilPassMask) + _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask));
+
+ //outputerMerger test compute
+ DSOmZ.earlyZTestPassCount += DSSampleRate.earlyZTestPassCount;
+ DSOmZ.earlyZTestFailCount += DSSampleRate.earlyZTestFailCount;
+ DSOmZ.earlyZTestCount += DSSampleRate.earlyZTestCount;
+ DSOmZ.earlyStencilTestPassCount += DSSampleRate.earlyStencilTestPassCount;
+ DSOmZ.earlyStencilTestFailCount += DSSampleRate.earlyStencilTestFailCount;
+ DSOmZ.earlyStencilTestCount += DSSampleRate.earlyStencilTestCount;
+ }
+
+ virtual void Handle(EarlyDepthStencilInfoNullPS& event)
+ {
+ //earlyZ test compute
+ DSNullPS.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
+ DSNullPS.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+ DSNullPS.earlyZTestCount += (_mm_popcnt_u32(event.data.depthPassMask) + _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask));
+
+ //earlyStencil test compute
+ DSNullPS.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
+ DSNullPS.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+ DSNullPS.earlyStencilTestCount += (_mm_popcnt_u32(event.data.stencilPassMask) + _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask));
+
+ //outputerMerger test compute
+ DSOmZ.earlyZTestPassCount += DSNullPS.earlyZTestPassCount;
+ DSOmZ.earlyZTestFailCount += DSNullPS.earlyZTestFailCount;
+ DSOmZ.earlyZTestCount += DSNullPS.earlyZTestCount;
+ DSOmZ.earlyStencilTestPassCount += DSNullPS.earlyStencilTestPassCount;
+ DSOmZ.earlyStencilTestFailCount += DSNullPS.earlyStencilTestFailCount;
+ DSOmZ.earlyStencilTestCount += DSNullPS.earlyStencilTestCount;
+ }
+
+ virtual void Handle(LateDepthStencilInfoSingleSample& event)
+ {
+ //lateZ test compute
+ DSSingleSample.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
+ DSSingleSample.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+ DSSingleSample.lateZTestCount += (_mm_popcnt_u32(event.data.depthPassMask) + _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask));
+
+ //lateStencil test compute
+ DSSingleSample.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
+ DSSingleSample.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+ DSSingleSample.lateStencilTestCount += (_mm_popcnt_u32(event.data.stencilPassMask) + _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask));
+
+ //outputerMerger test compute
+ DSOmZ.lateZTestPassCount += DSSingleSample.lateZTestPassCount;
+ DSOmZ.lateZTestFailCount += DSSingleSample.lateZTestFailCount;
+ DSOmZ.lateZTestCount += DSSingleSample.lateZTestCount;
+ DSOmZ.lateStencilTestPassCount += DSSingleSample.lateStencilTestPassCount;
+ DSOmZ.lateStencilTestFailCount += DSSingleSample.lateStencilTestFailCount;
+ DSOmZ.lateStencilTestCount += DSSingleSample.lateStencilTestCount;
+ }
+
+ virtual void Handle(LateDepthStencilInfoSampleRate& event)
+ {
+ //lateZ test compute
+ DSSampleRate.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
+ DSSampleRate.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+ DSSampleRate.lateZTestCount += (_mm_popcnt_u32(event.data.depthPassMask) + _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask));
+
+ //lateStencil test compute
+ DSSampleRate.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
+ DSSampleRate.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+ DSSampleRate.lateStencilTestCount += (_mm_popcnt_u32(event.data.stencilPassMask) + _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask));
+
+ //outputerMerger test compute
+ DSOmZ.lateZTestPassCount += DSSampleRate.lateZTestPassCount;
+ DSOmZ.lateZTestFailCount += DSSampleRate.lateZTestFailCount;
+ DSOmZ.lateZTestCount += DSSampleRate.lateZTestCount;
+ DSOmZ.lateStencilTestPassCount += DSSampleRate.lateStencilTestPassCount;
+ DSOmZ.lateStencilTestFailCount += DSSampleRate.lateStencilTestFailCount;
+ DSOmZ.lateStencilTestCount += DSSampleRate.lateStencilTestCount;
+ }
+
+ virtual void Handle(LateDepthStencilInfoNullPS& event)
+ {
+ //lateZ test compute
+ DSNullPS.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
+ DSNullPS.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+ DSNullPS.lateZTestCount += (_mm_popcnt_u32(event.data.depthPassMask) + _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask));
+
+ //lateStencil test compute
+ DSNullPS.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
+ DSNullPS.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+ DSNullPS.lateStencilTestCount += (_mm_popcnt_u32(event.data.stencilPassMask) + _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask));
+
+ //outputerMerger test compute
+ DSOmZ.lateZTestPassCount += DSNullPS.lateZTestPassCount;
+ DSOmZ.lateZTestFailCount += DSNullPS.lateZTestFailCount;
+ DSOmZ.lateZTestCount += DSNullPS.lateZTestCount;
+ DSOmZ.lateStencilTestPassCount += DSNullPS.lateStencilTestPassCount;
+ DSOmZ.lateStencilTestFailCount += DSNullPS.lateStencilTestFailCount;
+ DSOmZ.lateStencilTestCount += DSNullPS.lateStencilTestCount;
+ }
+
+ virtual void Handle(EarlyDepthInfoPixelRate& event)
+ {
+ //earlyZ test compute
+ DSPixelRate.earlyZTestCount += _mm_popcnt_u32(event.data.activeLanes);
+ DSPixelRate.earlyZTestPassCount += event.data.depthPassCount;
+ DSPixelRate.earlyZTestFailCount += (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
+
+ //outputerMerger test compute
+ DSOmZ.earlyZTestPassCount += DSPixelRate.earlyZTestPassCount;
+ DSOmZ.earlyZTestFailCount += DSPixelRate.earlyZTestFailCount;
+ DSOmZ.earlyZTestCount += DSPixelRate.earlyZTestCount;
+ }
+
+
+ virtual void Handle(LateDepthInfoPixelRate& event)
+ {
+ //lateZ test compute
+ DSPixelRate.lateZTestCount += _mm_popcnt_u32(event.data.activeLanes);
+ DSPixelRate.lateZTestPassCount += event.data.depthPassCount;
+ DSPixelRate.lateZTestFailCount += (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
+
+ //outputerMerger test compute
+ DSOmZ.lateZTestPassCount += DSPixelRate.lateZTestPassCount;
+ DSOmZ.lateZTestFailCount += DSPixelRate.lateZTestFailCount;
+ DSOmZ.lateZTestCount += DSPixelRate.lateZTestCount;
+
+ }
+
+
+ virtual void Handle(BackendDrawEndEvent& event)
+ {
+ //singleSample
+ EventHandlerFile::Handle(EarlyZSingleSample(event.data.drawId, DSSingleSample.earlyZTestPassCount, DSSingleSample.earlyZTestFailCount, DSSingleSample.earlyZTestCount));
+ EventHandlerFile::Handle(LateZSingleSample(event.data.drawId, DSSingleSample.lateZTestPassCount, DSSingleSample.lateZTestFailCount, DSSingleSample.lateZTestCount));
+ EventHandlerFile::Handle(EarlyStencilSingleSample(event.data.drawId, DSSingleSample.earlyStencilTestPassCount, DSSingleSample.earlyStencilTestFailCount, DSSingleSample.earlyStencilTestCount));
+ EventHandlerFile::Handle(LateStencilSingleSample(event.data.drawId, DSSingleSample.lateStencilTestPassCount, DSSingleSample.lateStencilTestFailCount, DSSingleSample.lateStencilTestCount));
+
+ //sampleRate
+ EventHandlerFile::Handle(EarlyZSampleRate(event.data.drawId, DSSampleRate.earlyZTestPassCount, DSSampleRate.earlyZTestFailCount, DSSampleRate.earlyZTestCount));
+ EventHandlerFile::Handle(LateZSampleRate(event.data.drawId, DSSampleRate.lateZTestPassCount, DSSampleRate.lateZTestFailCount, DSSampleRate.lateZTestCount));
+ EventHandlerFile::Handle(EarlyStencilSampleRate(event.data.drawId, DSSampleRate.earlyStencilTestPassCount, DSSampleRate.earlyStencilTestFailCount, DSSampleRate.earlyStencilTestCount));
+ EventHandlerFile::Handle(LateStencilSampleRate(event.data.drawId, DSSampleRate.lateStencilTestPassCount, DSSampleRate.lateStencilTestFailCount, DSSampleRate.lateStencilTestCount));
+
+ //pixelRate
+ EventHandlerFile::Handle(EarlyZPixelRate(event.data.drawId, DSPixelRate.earlyZTestPassCount, DSPixelRate.earlyZTestFailCount, DSPixelRate.earlyZTestCount));
+ EventHandlerFile::Handle(LateZPixelRate(event.data.drawId, DSPixelRate.lateZTestPassCount, DSPixelRate.lateZTestFailCount, DSPixelRate.lateZTestCount));
+
+
+ //NullPS
+ EventHandlerFile::Handle(EarlyZNullPS(event.data.drawId, DSNullPS.earlyZTestPassCount, DSNullPS.earlyZTestFailCount, DSNullPS.earlyZTestCount));
+ EventHandlerFile::Handle(EarlyStencilNullPS(event.data.drawId, DSNullPS.earlyStencilTestPassCount, DSNullPS.earlyStencilTestFailCount, DSNullPS.earlyStencilTestCount));
+
+ //OmZ
+ EventHandlerFile::Handle(EarlyOmZ(event.data.drawId, DSOmZ.earlyZTestPassCount, DSOmZ.earlyZTestFailCount, DSOmZ.earlyZTestCount));
+ EventHandlerFile::Handle(EarlyOmStencil(event.data.drawId, DSOmZ.earlyStencilTestPassCount, DSOmZ.earlyStencilTestFailCount, DSOmZ.earlyStencilTestCount));
+ EventHandlerFile::Handle(LateOmZ(event.data.drawId, DSOmZ.lateZTestPassCount, DSOmZ.lateZTestFailCount, DSOmZ.lateZTestCount));
+ EventHandlerFile::Handle(LateOmStencil(event.data.drawId, DSOmZ.lateStencilTestPassCount, DSOmZ.lateStencilTestFailCount, DSOmZ.lateStencilTestCount));
+
+ //Reset Internal Counters
+ DSSingleSample = {};
+ DSSampleRate = {};
+ DSPixelRate = {};
+ DSNullPS = {};
+ DSOmZ = {};
+ }
+
+ virtual void Handle(FrontendDrawEndEvent& event)
+ {
+ //Clipper
+ EventHandlerFile::Handle(VertsClipped(event.data.drawId, CS.clippedVerts));
+
+ //Tesselator
+ EventHandlerFile::Handle(TessPrims(event.data.drawId, TS.inputPrims));
+
+ //Geometry Shader
+ EventHandlerFile::Handle(GSInputPrims(event.data.drawId, GS.inputPrimCount));
+ EventHandlerFile::Handle(GSPrimsGen(event.data.drawId, GS.primGeneratedCount));
+ EventHandlerFile::Handle(GSVertsInput(event.data.drawId, GS.vertsInput));
+
+ //Reset Internal Counters
+ CS = {};
+ TS = {};
+ GS = {};
+ }
+
+ virtual void Handle(GSPrimInfo& event)
+ {
+ GS.inputPrimCount += event.data.inputPrimCount;
+ GS.primGeneratedCount += event.data.primGeneratedCount;
+ GS.vertsInput += event.data.vertsInput;
+ }
+
+ virtual void Handle(ClipVertexCount& event)
+ {
+ CS.clippedVerts += (_mm_popcnt_u32(event.data.primMask) * event.data.vertsPerPrim);
+ }
+
+ virtual void Handle(TessPrimCount& event)
+ {
+ TS.inputPrims += event.data.primCount;
+ }
};
static EventManager* FromHandle(HANDLE hThreadContext)
@@ -68,13 +341,11 @@ namespace ArchRast
if (type == AR_THREAD::API)
{
- ThreadStartApiEvent e;
- pManager->Dispatch(e);
+ pHandler->Handle(ThreadStartApiEvent());
}
else
{
- ThreadStartWorkerEvent e;
- pManager->Dispatch(e);
+ pHandler->Handle(ThreadStartWorkerEvent());
}
pHandler->MarkHeader();
diff --git a/src/gallium/drivers/swr/rasterizer/archrast/events.proto b/src/gallium/drivers/swr/rasterizer/archrast/events.proto
index 107d7a3..95cb79b 100644
--- a/src/gallium/drivers/swr/rasterizer/archrast/events.proto
+++ b/src/gallium/drivers/swr/rasterizer/archrast/events.proto
@@ -170,3 +170,258 @@ event BackendStatsEvent
uint64_t CsInvocations;
};
+
+event EarlyDepthStencilInfoSingleSample
+{
+ uint64_t depthPassMask;
+ uint64_t stencilPassMask;
+ uint64_t coverageMask;
+};
+
+event EarlyDepthStencilInfoSampleRate
+{
+ uint64_t depthPassMask;
+ uint64_t stencilPassMask;
+ uint64_t coverageMask;
+};
+
+event EarlyDepthStencilInfoNullPS
+{
+ uint64_t depthPassMask;
+ uint64_t stencilPassMask;
+ uint64_t coverageMask;
+};
+
+event LateDepthStencilInfoSingleSample
+{
+ uint64_t depthPassMask;
+ uint64_t stencilPassMask;
+ uint64_t coverageMask;
+};
+
+event LateDepthStencilInfoSampleRate
+{
+ uint64_t depthPassMask;
+ uint64_t stencilPassMask;
+ uint64_t coverageMask;
+};
+
+event LateDepthStencilInfoNullPS
+{
+ uint64_t depthPassMask;
+ uint64_t stencilPassMask;
+ uint64_t coverageMask;
+};
+
+event EarlyDepthInfoPixelRate
+{
+ uint64_t depthPassCount;
+ uint64_t activeLanes;
+};
+
+
+event LateDepthInfoPixelRate
+{
+ uint64_t depthPassCount;
+ uint64_t activeLanes;
+};
+
+
+event BackendDrawEndEvent
+{
+ uint32_t drawId;
+};
+
+event FrontendDrawEndEvent
+{
+ uint32_t drawId;
+};
+
+event EarlyZSingleSample
+{
+ uint32_t drawId;
+ uint64_t passCount;
+ uint64_t failCount;
+ uint64_t testCount;
+};
+
+event LateZSingleSample
+{
+ uint32_t drawId;
+ uint64_t passCount;
+ uint64_t failCount;
+ uint64_t testCount;
+};
+
+event EarlyStencilSingleSample
+{
+ uint32_t drawId;
+ uint64_t passCount;
+ uint64_t failCount;
+ uint64_t testCount;
+};
+
+event LateStencilSingleSample
+{
+ uint32_t drawId;
+ uint64_t passCount;
+ uint64_t failCount;
+ uint64_t testCount;
+};
+
+event EarlyZSampleRate
+{
+ uint32_t drawId;
+ uint64_t passCount;
+ uint64_t failCount;
+ uint64_t testCount;
+};
+
+event LateZSampleRate
+{
+ uint32_t drawId;
+ uint64_t passCount;
+ uint64_t failCount;
+ uint64_t testCount;
+};
+
+event EarlyStencilSampleRate
+{
+ uint32_t drawId;
+ uint64_t passCount;
+ uint64_t failCount;
+ uint64_t testCount;
+};
+
+event LateStencilSampleRate
+{
+ uint32_t drawId;
+ uint64_t passCount;
+ uint64_t failCount;
+ uint64_t testCount;
+};
+
+event EarlyZNullPS
+{
+ uint32_t drawId;
+ uint64_t passCount;
+ uint64_t failCount;
+ uint64_t testCount;
+};
+
+event EarlyStencilNullPS
+{
+ uint32_t drawId;
+ uint64_t passCount;
+ uint64_t failCount;
+ uint64_t testCount;
+};
+
+event EarlyZPixelRate
+{
+ uint32_t drawId;
+ uint64_t passCount;
+ uint64_t failCount;
+ uint64_t testCount;
+};
+
+event LateZPixelRate
+{
+ uint32_t drawId;
+ uint64_t passCount;
+ uint64_t failCount;
+ uint64_t testCount;
+};
+
+
+event EarlyOmZ
+{
+ uint32_t drawId;
+ uint64_t passCount;
+ uint64_t failCount;
+ uint64_t testCount;
+};
+
+event EarlyOmStencil
+{
+ uint32_t drawId;
+ uint64_t passCount;
+ uint64_t failCount;
+ uint64_t testCount;
+};
+
+event LateOmZ
+{
+ uint32_t drawId;
+ uint64_t passCount;
+ uint64_t failCount;
+ uint64_t testCount;
+};
+
+event LateOmStencil
+{
+ uint32_t drawId;
+ uint64_t passCount;
+ uint64_t failCount;
+ uint64_t testCount;
+};
+
+event GSPrimInfo
+{
+ uint64_t inputPrimCount;
+ uint64_t primGeneratedCount;
+ uint64_t vertsInput;
+};
+
+event GSInputPrims
+{
+ uint32_t drawId;
+ uint64_t inputPrimCount;
+};
+
+event GSPrimsGen
+{
+ uint32_t drawId;
+ uint64_t primGeneratedCount;
+};
+
+event GSVertsInput
+{
+ uint32_t drawId;
+ uint64_t vertsInput;
+};
+
+event ClipVertexCount
+{
+ uint64_t vertsPerPrim;
+ uint64_t primMask;
+};
+
+//REMOVE AND REPLACE
+event FlushVertClip
+{
+ uint32_t drawId;
+};
+
+event VertsClipped
+{
+ uint32_t drawId;
+ uint64_t clipCount;
+};
+
+event TessPrimCount
+{
+ uint64_t primCount;
+};
+
+//REMOVE AND REPLACE
+event TessPrimFlush
+{
+ uint32_t drawId;
+};
+
+event TessPrims
+{
+ uint32_t drawId;
+ uint64_t primCount;
+};
\ No newline at end of file
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.cpp b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
index c5e6b98..16c4537 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
@@ -533,6 +533,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
AR_BEGIN(BEEarlyDepthTest, pDC->drawId);
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
psContext.vZ, pDepthBuffer, vCoverageMask, pStencilBuffer, &stencilPassMask);
+ AR_EVENT(EarlyDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask)));
AR_END(BEEarlyDepthTest, 0);
// early-exit if no pixels passed depth or earlyZ is forced on
@@ -565,6 +566,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
AR_BEGIN(BELateDepthTest, pDC->drawId);
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
psContext.vZ, pDepthBuffer, vCoverageMask, pStencilBuffer, &stencilPassMask);
+ AR_EVENT(LateDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask)));
AR_END(BELateDepthTest, 0);
if (!_simd_movemask_ps(depthPassMask))
@@ -742,6 +744,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
AR_BEGIN(BEEarlyDepthTest, pDC->drawId);
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
+ AR_EVENT(EarlyDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask)));
AR_END(BEEarlyDepthTest, 0);
// early-exit if no samples passed depth or earlyZ is forced on.
@@ -775,6 +778,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
AR_BEGIN(BELateDepthTest, pDC->drawId);
depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
+ AR_EVENT(LateDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask)));
AR_END(BELateDepthTest, 0);
if (!_simd_movemask_ps(depthPassMask))
@@ -923,6 +927,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
{
uint32_t depthPassCount = PixelRateZTest(activeLanes, psContext, BEEarlyDepthTest);
UPDATE_STAT_BE(DepthPassCount, depthPassCount);
+ AR_EVENT(EarlyDepthInfoPixelRate(depthPassCount, _simd_movemask_ps(activeLanes)));
}
// if we have no covered samples that passed depth at this point, go to next tile
@@ -956,6 +961,7 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
{
uint32_t depthPassCount = PixelRateZTest(activeLanes, psContext, BELateDepthTest);
UPDATE_STAT_BE(DepthPassCount, depthPassCount);
+ AR_EVENT(LateDepthInfoPixelRate(depthPassCount, _simd_movemask_ps(activeLanes)));
}
// if we have no covered samples that passed depth at this point, skip OM and go to next tile
@@ -1134,6 +1140,7 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
AR_BEGIN(BEEarlyDepthTest, pDC->drawId);
simdscalar depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
+ AR_EVENT(EarlyDepthStencilInfoNullPS(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(vCoverageMask), _simd_movemask_ps(stencilPassMask)));
DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask);
AR_END(BEEarlyDepthTest, 0);
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
index a208a36..c8dce10 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
@@ -853,7 +853,7 @@ static void GeometryShaderStage(
// update GS pipeline stats
UPDATE_STAT_FE(GsInvocations, numInputPrims * pState->instanceCount);
UPDATE_STAT_FE(GsPrimitives, totalPrimsGenerated);
-
+ AR_EVENT(GSPrimInfo(numInputPrims, totalPrimsGenerated, numVertsPerPrim*numInputPrims));
AR_END(FEGeometryShader, 1);
}
@@ -1027,6 +1027,7 @@ static void TessellationStages(
SWR_TS_TESSELLATED_DATA tsData = { 0 };
AR_BEGIN(FETessellation, pDC->drawId);
TSTessellate(tsCtx, hsContext.pCPout[p].tessFactors, tsData);
+ AR_EVENT(TessPrimCount(1));
AR_END(FETessellation, 0);
if (tsData.NumPrimitives == 0)
diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
index f7730ff..ee12612 100644
--- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
@@ -581,6 +581,7 @@ INLINE void CompleteDrawFE(SWR_CONTEXT* pContext, uint32_t workerId, DRAW_CONTEX
stats.SoPrimStorageNeeded[0], stats.SoPrimStorageNeeded[1], stats.SoPrimStorageNeeded[2], stats.SoPrimStorageNeeded[3],
stats.SoNumPrimsWritten[0], stats.SoNumPrimsWritten[1], stats.SoNumPrimsWritten[2], stats.SoNumPrimsWritten[3]
));
+ AR_EVENT(FrontendDrawEndEvent(pDC->drawId));
pContext->pfnUpdateStatsFE(GetPrivateState(pDC), &stats);
}
diff --git a/src/gallium/drivers/swr/rasterizer/scripts/templates/ar_eventhandler_h.template b/src/gallium/drivers/swr/rasterizer/scripts/templates/ar_eventhandler_h.template
index 95c5442..abde3c0 100644
--- a/src/gallium/drivers/swr/rasterizer/scripts/templates/ar_eventhandler_h.template
+++ b/src/gallium/drivers/swr/rasterizer/scripts/templates/ar_eventhandler_h.template
@@ -43,7 +43,7 @@ namespace ArchRast
virtual ~EventHandler() {}
% for name in protos['event_names']:
- virtual void Handle(${name}& event) {}
+ virtual void Handle(${name}&& event) {}
% endfor
};
}
--
2.7.4
More information about the mesa-dev
mailing list