[Mesa-dev] [PATCH 04/11] swr: [rasterizer core] Fix global arena allocator bug
Tim Rowley
timothy.o.rowley at intel.com
Thu Apr 14 19:53:11 UTC 2016
- Plus some minor code refactoring
---
src/gallium/drivers/swr/rasterizer/core/arena.h | 88 ++++++++++++----------
.../drivers/swr/rasterizer/core/backend.cpp | 5 +-
2 files changed, 51 insertions(+), 42 deletions(-)
diff --git a/src/gallium/drivers/swr/rasterizer/core/arena.h b/src/gallium/drivers/swr/rasterizer/core/arena.h
index 64184e1..d0c0123 100644
--- a/src/gallium/drivers/swr/rasterizer/core/arena.h
+++ b/src/gallium/drivers/swr/rasterizer/core/arena.h
@@ -37,47 +37,54 @@
#include <atomic>
#include "core/utils.h"
+static const size_t ARENA_BLOCK_ALIGN = 64;
+
+struct ArenaBlock
+{
+ size_t blockSize = 0;
+ ArenaBlock* pNext = nullptr;
+};
+static_assert(sizeof(ArenaBlock) <= ARENA_BLOCK_ALIGN,
+ "Increase BLOCK_ALIGN size");
+
class DefaultAllocator
{
public:
- void* AllocateAligned(size_t size, size_t align)
+ ArenaBlock* AllocateAligned(size_t size, size_t align)
{
- void* p = _aligned_malloc(size, align);
+ SWR_ASSERT(size >= sizeof(ArenaBlock));
+
+ ArenaBlock* p = new (_aligned_malloc(size, align)) ArenaBlock();
+ p->blockSize = size;
return p;
}
- void Free(void* pMem)
+
+ void Free(ArenaBlock* pMem)
{
- _aligned_free(pMem);
+ if (pMem)
+ {
+ SWR_ASSERT(pMem->blockSize < size_t(0xdddddddd));
+ _aligned_free(pMem);
+ }
}
};
-static const size_t ARENA_BLOCK_ALIGN = 64;
-
-struct ArenaBlock
-{
- size_t blockSize = 0;
- ArenaBlock* pNext = nullptr;
-};
-static_assert(sizeof(ArenaBlock) <= ARENA_BLOCK_ALIGN,
- "Increase BLOCK_ALIGN size");
-
// Caching Allocator for Arena
-template<uint32_t NumBucketsT = 4, uint32_t StartBucketBitT = 16>
+template<uint32_t NumBucketsT = 8, uint32_t StartBucketBitT = 12>
struct CachingAllocatorT : DefaultAllocator
{
- void* AllocateAligned(size_t size, size_t align)
+ ArenaBlock* AllocateAligned(size_t size, size_t align)
{
SWR_ASSERT(size >= sizeof(ArenaBlock));
SWR_ASSERT(size <= uint32_t(-1));
- size_t blockSize = size - ARENA_BLOCK_ALIGN;
- uint32_t bucket = GetBucketId(blockSize);
+ uint32_t bucket = GetBucketId(size);
{
// search cached blocks
std::lock_guard<std::mutex> l(m_mutex);
ArenaBlock* pPrevBlock = &m_cachedBlocks[bucket];
- ArenaBlock* pBlock = SearchBlocks(pPrevBlock, blockSize, align);
+ ArenaBlock* pBlock = SearchBlocks(pPrevBlock, size, align);
if (pBlock)
{
@@ -89,15 +96,15 @@ struct CachingAllocatorT : DefaultAllocator
}
else
{
- pPrevBlock = &m_oldCachedBlocks[GetBucketId(blockSize)];
- pBlock = SearchBlocks(pPrevBlock, blockSize, align);
+ pPrevBlock = &m_oldCachedBlocks[bucket];
+ pBlock = SearchBlocks(pPrevBlock, size, align);
if (pBlock)
{
m_oldCachedSize -= pBlock->blockSize;
if (pBlock == m_pOldLastCachedBlocks[bucket])
{
- m_pLastCachedBlocks[bucket] = pPrevBlock;
+ m_pOldLastCachedBlocks[bucket] = pPrevBlock;
}
}
}
@@ -126,15 +133,14 @@ struct CachingAllocatorT : DefaultAllocator
return this->DefaultAllocator::AllocateAligned(size, align);
}
- void Free(void* pMem)
+ void Free(ArenaBlock* pMem)
{
if (pMem)
{
- ArenaBlock* pNewBlock = reinterpret_cast<ArenaBlock*>(pMem);
- SWR_ASSERT(pNewBlock->blockSize >= 0);
+ SWR_ASSERT(pMem->blockSize >= 0);
std::unique_lock<std::mutex> l(m_mutex);
- InsertCachedBlock(GetBucketId(pNewBlock->blockSize), pNewBlock);
+ InsertCachedBlock(GetBucketId(pMem->blockSize), pMem);
}
}
@@ -154,7 +160,7 @@ struct CachingAllocatorT : DefaultAllocator
{
ArenaBlock* pNext = pBlock->pNext;
m_oldCachedSize -= pBlock->blockSize;
- m_totalAllocated -= (pBlock->blockSize + ARENA_BLOCK_ALIGN);
+ m_totalAllocated -= pBlock->blockSize;
this->DefaultAllocator::Free(pBlock);
pBlock = pNext;
}
@@ -175,6 +181,11 @@ struct CachingAllocatorT : DefaultAllocator
}
}
+ if (doFree)
+ {
+ SWR_REL_ASSERT(m_oldCachedSize == 0, "Unknown whereabouts of 0x%llx bytes", (uint64_t)m_oldCachedSize);
+ }
+
m_oldCachedSize += m_cachedSize;
m_cachedSize = 0;
}
@@ -304,7 +315,7 @@ private:
// buckets, for block sizes < (1 << (start+1)), < (1 << (start+2)), ...
static const uint32_t CACHE_NUM_BUCKETS = NumBucketsT;
static const uint32_t CACHE_START_BUCKET_BIT = StartBucketBitT;
- static const size_t MAX_UNUSED_SIZE = 20 * sizeof(MEGABYTE);
+ static const size_t MAX_UNUSED_SIZE = sizeof(MEGABYTE);
ArenaBlock m_cachedBlocks[CACHE_NUM_BUCKETS];
ArenaBlock* m_pLastCachedBlocks[CACHE_NUM_BUCKETS];
@@ -346,7 +357,7 @@ public:
if ((offset + size) <= pCurBlock->blockSize)
{
- void* pMem = PtrAdd(pCurBlock, offset + ARENA_BLOCK_ALIGN);
+ void* pMem = PtrAdd(pCurBlock, offset);
m_offset = offset + size;
return pMem;
}
@@ -355,24 +366,21 @@ public:
// a new block
}
- static const size_t ArenaBlockSize = BlockSizeT - ARENA_BLOCK_ALIGN;
- size_t blockSize = std::max(size, ArenaBlockSize);
+ static const size_t ArenaBlockSize = BlockSizeT;
+ size_t blockSize = std::max(size + ARENA_BLOCK_ALIGN, ArenaBlockSize);
// Add in one BLOCK_ALIGN unit to store ArenaBlock in.
blockSize = AlignUp(blockSize, ARENA_BLOCK_ALIGN);
- void *pMem = m_allocator.AllocateAligned(blockSize + ARENA_BLOCK_ALIGN, ARENA_BLOCK_ALIGN); // Arena blocks are always simd byte aligned.
- SWR_ASSERT(pMem != nullptr);
-
- ArenaBlock* pNewBlock = new (pMem) ArenaBlock();
+ ArenaBlock* pNewBlock = m_allocator.AllocateAligned(blockSize, ARENA_BLOCK_ALIGN); // Arena blocks are always simd byte aligned.
+ SWR_ASSERT(pNewBlock != nullptr);
if (pNewBlock != nullptr)
{
- m_offset = 0;
+ m_offset = ARENA_BLOCK_ALIGN;
pNewBlock->pNext = m_pCurBlock;
m_pCurBlock = pNewBlock;
- m_pCurBlock->blockSize = blockSize;
}
return AllocAligned(size, align);
@@ -407,7 +415,7 @@ public:
void Reset(bool removeAll = false)
{
- m_offset = 0;
+ m_offset = ARENA_BLOCK_ALIGN;
if (m_pCurBlock)
{
@@ -431,13 +439,13 @@ public:
bool IsEmpty()
{
- return (m_pCurBlock == nullptr) || (m_offset == 0 && m_pCurBlock->pNext == nullptr);
+ return (m_pCurBlock == nullptr) || (m_offset == ARENA_BLOCK_ALIGN && m_pCurBlock->pNext == nullptr);
}
private:
ArenaBlock* m_pCurBlock = nullptr;
- size_t m_offset = 0;
+ size_t m_offset = ARENA_BLOCK_ALIGN;
/// @note Mutex is only used by sync allocation functions.
std::mutex m_mutex;
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.cpp b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
index d8ffdaa..a2212ba 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
@@ -80,9 +80,10 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroup
SWR_ASSERT(pTaskData != nullptr);
// Ensure spill fill memory has been allocated.
- if (pSpillFillBuffer == nullptr)
+ size_t spillFillSize = pDC->pState->state.totalSpillFillSize;
+ if (spillFillSize && pSpillFillBuffer == nullptr)
{
- pSpillFillBuffer = pDC->pArena->AllocAlignedSync(pDC->pState->state.totalSpillFillSize, sizeof(float) * 8);
+ pSpillFillBuffer = pDC->pArena->AllocAlignedSync(spillFillSize, KNOB_SIMD_BYTES);
}
const API_STATE& state = GetApiState(pDC);
--
1.9.1
More information about the mesa-dev
mailing list