[Mesa-dev] [PATCH 09/11] swr: [rasterizer core] Put DRAW_CONTEXT on a diet
Tim Rowley
timothy.o.rowley at intel.com
Sat Apr 2 00:22:56 UTC 2016
No need for 256 pointers per DC.
---
src/gallium/drivers/swr/rasterizer/core/api.cpp | 31 +++++++++++++++-------
.../drivers/swr/rasterizer/core/backend.cpp | 8 +++---
src/gallium/drivers/swr/rasterizer/core/backend.h | 2 +-
src/gallium/drivers/swr/rasterizer/core/context.h | 16 ++++++-----
.../drivers/swr/rasterizer/core/threads.cpp | 8 ++++--
.../drivers/swr/rasterizer/core/tilemgr.cpp | 21 ---------------
src/gallium/drivers/swr/rasterizer/core/tilemgr.h | 6 -----
7 files changed, 43 insertions(+), 49 deletions(-)
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp
index 665b6c0..d0738a7 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp
@@ -29,6 +29,7 @@
#include <cfloat>
#include <cmath>
#include <cstdio>
+#include <new>
#include "core/api.h"
#include "core/backend.h"
@@ -65,11 +66,14 @@ HANDLE SwrCreateContext(
pContext->dcRing.Init(KNOB_MAX_DRAWS_IN_FLIGHT);
pContext->dsRing.Init(KNOB_MAX_DRAWS_IN_FLIGHT);
+ pContext->pMacroTileManagerArray = (MacroTileMgr*)_aligned_malloc(sizeof(MacroTileMgr) * KNOB_MAX_DRAWS_IN_FLIGHT, 64);
+ pContext->pDispatchQueueArray = (DispatchQueue*)_aligned_malloc(sizeof(DispatchQueue) * KNOB_MAX_DRAWS_IN_FLIGHT, 64);
+
for (uint32_t dc = 0; dc < KNOB_MAX_DRAWS_IN_FLIGHT; ++dc)
{
pContext->dcRing[dc].pArena = new CachingArena(pContext->cachingArenaAllocator);
- pContext->dcRing[dc].pTileMgr = new MacroTileMgr(*(pContext->dcRing[dc].pArena));
- pContext->dcRing[dc].pDispatch = new DispatchQueue(); /// @todo Could lazily allocate this if Dispatch seen.
+ new (&pContext->pMacroTileManagerArray[dc]) MacroTileMgr(*pContext->dcRing[dc].pArena);
+ new (&pContext->pDispatchQueueArray[dc]) DispatchQueue();
pContext->dsRing[dc].pArena = new CachingArena(pContext->cachingArenaAllocator);
}
@@ -143,10 +147,13 @@ void SwrDestroyContext(HANDLE hContext)
{
delete pContext->dcRing[i].pArena;
delete pContext->dsRing[i].pArena;
- delete(pContext->dcRing[i].pTileMgr);
- delete(pContext->dcRing[i].pDispatch);
+ pContext->pMacroTileManagerArray[i].~MacroTileMgr();
+ pContext->pDispatchQueueArray[i].~DispatchQueue();
}
+ _aligned_free(pContext->pDispatchQueueArray);
+ _aligned_free(pContext->pMacroTileManagerArray);
+
// Free scratch space.
for (uint32_t i = 0; i < pContext->NumWorkerThreads; ++i)
{
@@ -176,6 +183,15 @@ void WakeAllThreads(SWR_CONTEXT *pContext)
template<bool IsDraw>
void QueueWork(SWR_CONTEXT *pContext)
{
+ DRAW_CONTEXT* pDC = pContext->pCurDrawContext;
+ uint32_t dcIndex = pDC->drawId % KNOB_MAX_DRAWS_IN_FLIGHT;
+
+ if (IsDraw)
+ {
+ pDC->pTileMgr = &pContext->pMacroTileManagerArray[dcIndex];
+ pDC->pTileMgr->initialize();
+ }
+
// Each worker thread looks at a DC for both FE and BE work at different times and so we
// multiply threadDone by 2. When the threadDone counter has reached 0 then all workers
// have moved past this DC. (i.e. Each worker has checked this DC for both FE and BE work and
@@ -299,8 +315,6 @@ DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false)
pCurDrawContext->FeLock = 0;
pCurDrawContext->threadsDone = 0;
- pCurDrawContext->pTileMgr->initialize();
-
// Assign unique drawId for this DC
pCurDrawContext->drawId = pContext->dcRing.GetHead();
@@ -1368,9 +1382,6 @@ void SwrDispatch(
pDC->isCompute = true; // This is a compute context.
- // Ensure spill fill pointers are initialized to nullptr.
- memset(pDC->pSpillFill, 0, sizeof(pDC->pSpillFill));
-
COMPUTE_DESC* pTaskData = (COMPUTE_DESC*)pDC->pArena->AllocAligned(sizeof(COMPUTE_DESC), 64);
pTaskData->threadGroupCountX = threadGroupCountX;
@@ -1378,6 +1389,8 @@ void SwrDispatch(
pTaskData->threadGroupCountZ = threadGroupCountZ;
uint32_t totalThreadGroups = threadGroupCountX * threadGroupCountY * threadGroupCountZ;
+ uint32_t dcIndex = pDC->drawId % KNOB_MAX_DRAWS_IN_FLIGHT;
+ pDC->pDispatch = &pContext->pDispatchQueueArray[dcIndex];
pDC->pDispatch->initialize(totalThreadGroups, pTaskData);
QueueDispatch(pContext);
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.cpp b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
index 842ea32..b2d3d9e 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
@@ -70,7 +70,7 @@ static PFN_CLEAR_TILES sClearTilesTable[NUM_SWR_FORMATS];
/// @param pDC - pointer to draw context (dispatch).
/// @param workerId - The unique worker ID that is assigned to this thread.
/// @param threadGroupId - the linear index for the thread group within the dispatch.
-void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId)
+void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer)
{
RDTSC_START(BEDispatch);
@@ -80,10 +80,10 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroup
SWR_ASSERT(pTaskData != nullptr);
// Ensure spill fill memory has been allocated.
- if (pDC->pSpillFill[workerId] == nullptr)
+ if (pSpillFillBuffer == nullptr)
{
///@todo Add state which indicates the spill fill size.
- pDC->pSpillFill[workerId] = (uint8_t*)pDC->pArena->AllocAlignedSync(4 * sizeof(MEGABYTE), sizeof(float) * 8);
+ pSpillFillBuffer = pDC->pArena->AllocAlignedSync(4 * sizeof(MEGABYTE), sizeof(float) * 8);
}
const API_STATE& state = GetApiState(pDC);
@@ -94,7 +94,7 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroup
csContext.dispatchDims[1] = pTaskData->threadGroupCountY;
csContext.dispatchDims[2] = pTaskData->threadGroupCountZ;
csContext.pTGSM = pContext->pScratch[workerId];
- csContext.pSpillFillBuffer = pDC->pSpillFill[workerId];
+ csContext.pSpillFillBuffer = (uint8_t*)pSpillFillBuffer;
state.pfnCsFunc(GetPrivateState(pDC), &csContext);
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.h b/src/gallium/drivers/swr/rasterizer/core/backend.h
index 2fa1895..d0626b9 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend.h
+++ b/src/gallium/drivers/swr/rasterizer/core/backend.h
@@ -32,7 +32,7 @@
#include "core/context.h"
#include "core/multisample.h"
-void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId);
+void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer);
void ProcessSyncBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData);
void ProcessQueryStatsBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData);
void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData);
diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h
index 2c28286..660c86e 100644
--- a/src/gallium/drivers/swr/rasterizer/core/context.h
+++ b/src/gallium/drivers/swr/rasterizer/core/context.h
@@ -384,8 +384,11 @@ struct DRAW_CONTEXT
{
SWR_CONTEXT* pContext;
uint64_t drawId;
- MacroTileMgr* pTileMgr;
- DispatchQueue* pDispatch; // Queue for thread groups. (isCompute)
+ union
+ {
+ MacroTileMgr* pTileMgr;
+ DispatchQueue* pDispatch; // Queue for thread groups. (isCompute)
+ };
uint64_t dependency;
DRAW_STATE* pState;
CachingArena* pArena;
@@ -394,12 +397,10 @@ struct DRAW_CONTEXT
bool cleanupState; // True if this is the last draw using an entry in the state ring.
volatile bool doneFE; // Is FE work done for this draw?
+ FE_WORK FeWork;
+
volatile OSALIGNLINE(uint32_t) FeLock;
volatile int64_t threadsDone;
-
- OSALIGNLINE(FE_WORK) FeWork;
- uint8_t* pSpillFill[KNOB_MAX_NUM_THREADS]; // Scratch space used for spill fills.
-
};
static_assert((sizeof(DRAW_CONTEXT) & 63) == 0, "Invalid size for DRAW_CONTEXT");
@@ -445,6 +446,9 @@ struct SWR_CONTEXT
DRAW_CONTEXT *pCurDrawContext; // This points to DC entry in ring for an unsubmitted draw.
DRAW_CONTEXT *pPrevDrawContext; // This points to DC entry for the previous context submitted that we can copy state from.
+ MacroTileMgr* pMacroTileManagerArray;
+ DispatchQueue* pDispatchQueueArray;
+
// Draw State Ring
// When draw are very large (lots of primitives) then the API thread will break these up.
// These split draws all have identical state. So instead of storing the state directly
diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
index bee1e13..4b7a207 100644
--- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
@@ -291,7 +291,10 @@ INLINE int64_t CompleteDrawContext(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC)
{
// Cleanup memory allocations
pDC->pArena->Reset(true);
- pDC->pTileMgr->initialize();
+ if (!pDC->isCompute)
+ {
+ pDC->pTileMgr->initialize();
+ }
if (pDC->cleanupState)
{
pDC->pState->pArena->Reset(true);
@@ -546,10 +549,11 @@ void WorkOnCompute(
// Is there any work remaining?
if (queue.getNumQueued() > 0)
{
+ void* pSpillFillBuffer = nullptr;
uint32_t threadGroupId = 0;
while (queue.getWork(threadGroupId))
{
- ProcessComputeBE(pDC, workerId, threadGroupId);
+ ProcessComputeBE(pDC, workerId, threadGroupId, pSpillFillBuffer);
queue.finishedWork();
}
diff --git a/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp b/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp
index 7945772..c053e27 100644
--- a/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp
@@ -35,27 +35,6 @@
#define TILE_ID(x,y) ((x << 16 | y))
-// override new/delete for alignment
-void *MacroTileMgr::operator new(size_t size)
-{
- return _aligned_malloc(size, 64);
-}
-
-void MacroTileMgr::operator delete(void *p)
-{
- _aligned_free(p);
-}
-
-void* DispatchQueue::operator new(size_t size)
-{
- return _aligned_malloc(size, 64);
-}
-
-void DispatchQueue::operator delete(void *p)
-{
- _aligned_free(p);
-}
-
MacroTileMgr::MacroTileMgr(CachingArena& arena) : mArena(arena)
{
}
diff --git a/src/gallium/drivers/swr/rasterizer/core/tilemgr.h b/src/gallium/drivers/swr/rasterizer/core/tilemgr.h
index 34992aa..82a15e1 100644
--- a/src/gallium/drivers/swr/rasterizer/core/tilemgr.h
+++ b/src/gallium/drivers/swr/rasterizer/core/tilemgr.h
@@ -140,9 +140,6 @@ public:
x = (tileID >> 16) & 0xffff;
}
- void *operator new(size_t size);
- void operator delete (void *p);
-
private:
CachingArena& mArena;
std::unordered_map<uint32_t, MacroTileQueue> mTiles;
@@ -229,9 +226,6 @@ public:
return mpTaskData;
}
- void *operator new(size_t size);
- void operator delete (void *p);
-
void* mpTaskData{ nullptr }; // The API thread will set this up and the callback task function will interpet this.
OSALIGNLINE(volatile LONG) mTasksAvailable{ 0 };
--
1.9.1
More information about the mesa-dev
mailing list