diff --git a/src/Common.h b/src/Common.h index f5954c5..ec564d6 100644 --- a/src/Common.h +++ b/src/Common.h @@ -66,6 +66,7 @@ typedef std::chrono::high_resolution_clock::duration duration; extern VkPhysicalDevice g_hPhysicalDevice; extern VkDevice g_hDevice; +extern VkInstance g_hVulkanInstance; extern VmaAllocator g_hAllocator; extern bool g_MemoryAliasingWarningEnabled; diff --git a/src/Tests.cpp b/src/Tests.cpp index 82100fd..2071fa5 100644 --- a/src/Tests.cpp +++ b/src/Tests.cpp @@ -2000,6 +2000,7 @@ void TestHeapSizeLimit() VmaAllocatorCreateInfo allocatorCreateInfo = {}; allocatorCreateInfo.physicalDevice = g_hPhysicalDevice; allocatorCreateInfo.device = g_hDevice; + allocatorCreateInfo.instance = g_hVulkanInstance; allocatorCreateInfo.pHeapSizeLimit = heapSizeLimit; VmaAllocator hAllocator; @@ -2016,8 +2017,8 @@ void TestHeapSizeLimit() VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; bufCreateInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; - // 1. Allocate two blocks of Own Memory, half the size of BLOCK_SIZE. - VmaAllocationInfo ownAllocInfo; + // 1. Allocate two blocks of dedicated memory, half the size of BLOCK_SIZE. + VmaAllocationInfo dedicatedAllocInfo; { VmaAllocationCreateInfo allocCreateInfo = {}; allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; @@ -2028,7 +2029,7 @@ void TestHeapSizeLimit() for(size_t i = 0; i < 2; ++i) { Item item; - res = vmaCreateBuffer(hAllocator, &bufCreateInfo, &allocCreateInfo, &item.hBuf, &item.hAlloc, &ownAllocInfo); + res = vmaCreateBuffer(hAllocator, &bufCreateInfo, &allocCreateInfo, &item.hBuf, &item.hAlloc, &dedicatedAllocInfo); TEST(res == VK_SUCCESS); items.push_back(item); } @@ -2036,7 +2037,7 @@ void TestHeapSizeLimit() // Create pool to make sure allocations must be out of this memory type. VmaPoolCreateInfo poolCreateInfo = {}; - poolCreateInfo.memoryTypeIndex = ownAllocInfo.memoryType; + poolCreateInfo.memoryTypeIndex = dedicatedAllocInfo.memoryType; poolCreateInfo.blockSize = BLOCK_SIZE; VmaPool hPool; @@ -3873,8 +3874,6 @@ static void TestBudget() { wprintf(L"Testing budget...\n"); - uint32_t memTypeIndex = UINT32_MAX; - static const VkDeviceSize BUF_SIZE = 100ull * 1024 * 1024; static const uint32_t BUF_COUNT = 4; @@ -3885,6 +3884,11 @@ static void TestBudget() VmaBudget budgetBeg[VK_MAX_MEMORY_HEAPS] = {}; vmaGetBudget(g_hAllocator, budgetBeg); + for(uint32_t i = 0; i < VK_MAX_MEMORY_HEAPS; ++i) + { + TEST(budgetBeg[i].allocationBytes <= budgetBeg[i].blockBytes); + } + VkBufferCreateInfo bufInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; bufInfo.size = BUF_SIZE; bufInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT; @@ -5234,7 +5238,7 @@ void Test() //PerformCustomPoolTest(file); fclose(file); - + wprintf(L"Done.\n"); } diff --git a/src/VmaUsage.h b/src/VmaUsage.h index 833ba14..ef6eb60 100644 --- a/src/VmaUsage.h +++ b/src/VmaUsage.h @@ -49,6 +49,7 @@ include all public interface declarations. Example: //#define VMA_DEBUG_MIN_BUFFER_IMAGE_GRANULARITY 256 //#define VMA_USE_STL_SHARED_MUTEX 0 //#define VMA_DEBUG_GLOBAL_MUTEX 1 +//#define VMA_MEMORY_BUDGET 0 /* #define VMA_DEBUG_LOG(format, ...) do { \ printf(format, __VA_ARGS__); \ diff --git a/src/VulkanSample.cpp b/src/VulkanSample.cpp index 8a533a3..55ce032 100644 --- a/src/VulkanSample.cpp +++ b/src/VulkanSample.cpp @@ -43,6 +43,7 @@ static const bool USE_CUSTOM_CPU_ALLOCATION_CALLBACKS = true; VkPhysicalDevice g_hPhysicalDevice; VkDevice g_hDevice; VmaAllocator g_hAllocator; +VkInstance g_hVulkanInstance; bool g_MemoryAliasingWarningEnabled = true; static bool g_EnableValidationLayer = true; @@ -56,7 +57,6 @@ bool g_SparseBindingEnabled = false; static HINSTANCE g_hAppInstance; static HWND g_hWnd; static LONG g_SizeX = 1280, g_SizeY = 720; -static VkInstance g_hVulkanInstance; static VkSurfaceKHR g_hSurface; static VkQueue g_hPresentQueue; static VkSurfaceFormatKHR g_SurfaceFormat; @@ -1369,10 +1369,12 @@ static void InitializeApplication() { allocatorInfo.flags |= VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT; } +#if !defined(VMA_MEMORY_BUDGET) || VMA_MEMORY_BUDGET == 1 if(VK_EXT_memory_budget_enabled && VK_KHR_get_physical_device_properties2_enabled) { allocatorInfo.flags |= VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT; } +#endif if(USE_CUSTOM_CPU_ALLOCATION_CALLBACKS) { diff --git a/src/vk_mem_alloc.h b/src/vk_mem_alloc.h index 6e61577..08e2e62 100644 --- a/src/vk_mem_alloc.h +++ b/src/vk_mem_alloc.h @@ -2154,9 +2154,12 @@ typedef struct VmaBudget /** \brief Sum size of all allocations created in particular heap, in bytes. - Always less or equal than `blockBytes`. + Usually less or equal than `blockBytes`. Difference `blockBytes - allocationBytes` is the amount of memory allocated but unused - available for new allocations or wasted due to fragmentation. + + It might be greater than `blockBytes` if there are some allocations in lost state, as they account + to this value as well. */ VkDeviceSize allocationBytes; @@ -5284,6 +5287,7 @@ public: { m_Alignment = 1; m_Size = 0; + m_MemoryTypeIndex = 0; m_pUserData = VMA_NULL; m_LastUseFrameIndex = currentFrameIndex; m_Type = (uint8_t)ALLOCATION_TYPE_NONE; @@ -5310,6 +5314,7 @@ public: VkDeviceSize offset, VkDeviceSize alignment, VkDeviceSize size, + uint32_t memoryTypeIndex, VmaSuballocationType suballocationType, bool mapped, bool canBecomeLost) @@ -5319,6 +5324,7 @@ public: m_Type = (uint8_t)ALLOCATION_TYPE_BLOCK; m_Alignment = alignment; m_Size = size; + m_MemoryTypeIndex = memoryTypeIndex; m_MapCount = mapped ? MAP_COUNT_FLAG_PERSISTENT_MAP : 0; m_SuballocationType = (uint8_t)suballocationType; m_BlockAllocation.m_Block = block; @@ -5331,6 +5337,7 @@ public: VMA_ASSERT(m_Type == ALLOCATION_TYPE_NONE); VMA_ASSERT(m_LastUseFrameIndex.load() == VMA_FRAME_INDEX_LOST); m_Type = (uint8_t)ALLOCATION_TYPE_BLOCK; + m_MemoryTypeIndex = 0; m_BlockAllocation.m_Block = VMA_NULL; m_BlockAllocation.m_Offset = 0; m_BlockAllocation.m_CanBecomeLost = true; @@ -5356,9 +5363,9 @@ public: m_Type = (uint8_t)ALLOCATION_TYPE_DEDICATED; m_Alignment = 0; m_Size = size; + m_MemoryTypeIndex = memoryTypeIndex; m_SuballocationType = (uint8_t)suballocationType; m_MapCount = (pMappedData != VMA_NULL) ? MAP_COUNT_FLAG_PERSISTENT_MAP : 0; - m_DedicatedAllocation.m_MemoryTypeIndex = memoryTypeIndex; m_DedicatedAllocation.m_hMemory = hMemory; m_DedicatedAllocation.m_pMappedData = pMappedData; } @@ -5378,7 +5385,7 @@ public: } VkDeviceSize GetOffset() const; VkDeviceMemory GetMemory() const; - uint32_t GetMemoryTypeIndex() const; + uint32_t GetMemoryTypeIndex() const { return m_MemoryTypeIndex; } bool IsPersistentMap() const { return (m_MapCount & MAP_COUNT_FLAG_PERSISTENT_MAP) != 0; } void* GetMappedData() const; bool CanBecomeLost() const; @@ -5437,6 +5444,7 @@ private: VkDeviceSize m_Size; void* m_pUserData; VMA_ATOMIC_UINT32 m_LastUseFrameIndex; + uint32_t m_MemoryTypeIndex; uint8_t m_Type; // ALLOCATION_TYPE uint8_t m_SuballocationType; // VmaSuballocationType // Bit 0x80 is set when allocation was created with VMA_ALLOCATION_CREATE_MAPPED_BIT. @@ -5455,7 +5463,6 @@ private: // Allocation for an object that has its own private VkDeviceMemory. struct DedicatedAllocation { - uint32_t m_MemoryTypeIndex; VkDeviceMemory m_hMemory; void* m_pMappedData; // Not null means memory is mapped. }; @@ -6964,6 +6971,23 @@ struct VmaCurrentBudgetData #if VMA_MEMORY_BUDGET m_OperationsSinceBudgetFetch = 0; +#endif + } + + void AddAllocation(uint32_t heapIndex, VkDeviceSize allocationSize) + { + m_AllocationBytes[heapIndex] += allocationSize; +#if VMA_MEMORY_BUDGET + ++m_OperationsSinceBudgetFetch; +#endif + } + + void RemoveAllocation(uint32_t heapIndex, VkDeviceSize allocationSize) + { + VMA_ASSERT(m_AllocationBytes[heapIndex] >= allocationSize); // DELME + m_AllocationBytes[heapIndex] -= allocationSize; +#if VMA_MEMORY_BUDGET + ++m_OperationsSinceBudgetFetch; #endif } }; @@ -7715,20 +7739,6 @@ VkDeviceMemory VmaAllocation_T::GetMemory() const } } -uint32_t VmaAllocation_T::GetMemoryTypeIndex() const -{ - switch(m_Type) - { - case ALLOCATION_TYPE_BLOCK: - return m_BlockAllocation.m_Block->GetMemoryTypeIndex(); - case ALLOCATION_TYPE_DEDICATED: - return m_DedicatedAllocation.m_MemoryTypeIndex; - default: - VMA_ASSERT(0); - return UINT32_MAX; - } -} - void* VmaAllocation_T::GetMappedData() const { switch(m_Type) @@ -11840,10 +11850,11 @@ VkResult VmaBlockVector::AllocatePage( freeMemory = (heapBudget.usage < heapBudget.budget) ? (heapBudget.budget - heapBudget.usage) : 0; } + const bool canFallbackToDedicated = !IsCustomPool(); const bool canCreateNewBlock = ((createInfo.flags & VMA_ALLOCATION_CREATE_NEVER_ALLOCATE_BIT) == 0) && (m_Blocks.size() < m_MaxBlockCount) && - freeMemory >= size; + (freeMemory >= size || !canFallbackToDedicated); uint32_t strategy = createInfo.flags & VMA_ALLOCATION_CREATE_STRATEGY_MASK; // If linearAlgorithm is used, canMakeOtherLost is available only when used as ring buffer. @@ -11995,7 +12006,7 @@ VkResult VmaBlockVector::AllocatePage( } size_t newBlockIndex = 0; - VkResult res = newBlockSize <= freeMemory ? + VkResult res = (newBlockSize <= freeMemory || !canFallbackToDedicated) ? CreateBlock(newBlockSize, &newBlockIndex) : VK_ERROR_OUT_OF_DEVICE_MEMORY; // Allocation of this size failed? Try 1/2, 1/4, 1/8 of m_PreferredBlockSize. if(!m_ExplicitBlockSize) @@ -12007,7 +12018,7 @@ VkResult VmaBlockVector::AllocatePage( { newBlockSize = smallerNewBlockSize; ++newBlockSizeShift; - res = newBlockSize <= freeMemory ? + res = (newBlockSize <= freeMemory || !canFallbackToDedicated) ? CreateBlock(newBlockSize, &newBlockIndex) : VK_ERROR_OUT_OF_DEVICE_MEMORY; } else @@ -12162,14 +12173,14 @@ VkResult VmaBlockVector::AllocatePage( bestRequest.offset, alignment, size, + m_MemoryTypeIndex, suballocType, mapped, (createInfo.flags & VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT) != 0); VMA_HEAVY_ASSERT(pBestRequestBlock->Validate()); VMA_DEBUG_LOG(" Returned from existing block"); (*pAllocation)->SetUserData(m_hAllocator, createInfo.pUserData); - m_hAllocator->m_Budget.m_AllocationBytes[m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex)] += size; - ++m_hAllocator->m_Budget.m_OperationsSinceBudgetFetch; + m_hAllocator->m_Budget.AddAllocation(m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex), size); if(VMA_DEBUG_INITIALIZE_ALLOCATIONS) { m_hAllocator->FillAllocation(*pAllocation, VMA_ALLOCATION_FILL_PATTERN_CREATED); @@ -12376,13 +12387,13 @@ VkResult VmaBlockVector::AllocateFromBlock( currRequest.offset, alignment, size, + m_MemoryTypeIndex, suballocType, mapped, (allocFlags & VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT) != 0); VMA_HEAVY_ASSERT(pBlock->Validate()); (*pAllocation)->SetUserData(m_hAllocator, pUserData); - m_hAllocator->m_Budget.m_AllocationBytes[m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex)] += size; - ++m_hAllocator->m_Budget.m_OperationsSinceBudgetFetch; + m_hAllocator->m_Budget.AddAllocation(m_hAllocator->MemoryTypeIndexToHeapIndex(m_MemoryTypeIndex), size); if(VMA_DEBUG_INITIALIZE_ALLOCATIONS) { m_hAllocator->FillAllocation(*pAllocation, VMA_ALLOCATION_FILL_PATTERN_CREATED); @@ -14987,9 +14998,7 @@ VkResult VmaAllocator_T::AllocateDedicatedMemory( */ FreeVulkanMemory(memTypeIndex, currAlloc->GetSize(), hMemory); - const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(memTypeIndex); - m_Budget.m_AllocationBytes[heapIndex] -= currAlloc->GetSize(); - ++m_Budget.m_OperationsSinceBudgetFetch; + m_Budget.RemoveAllocation(MemoryTypeIndexToHeapIndex(memTypeIndex), currAlloc->GetSize()); currAlloc->SetUserData(this, VMA_NULL); currAlloc->Dtor(); m_AllocationObjectAllocator.Free(currAlloc); @@ -15041,9 +15050,7 @@ VkResult VmaAllocator_T::AllocateDedicatedMemoryPage( (*pAllocation)->Ctor(m_CurrentFrameIndex.load(), isUserDataString); (*pAllocation)->InitDedicatedAllocation(memTypeIndex, hMemory, suballocType, pMappedData, size); (*pAllocation)->SetUserData(this, pUserData); - const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(memTypeIndex); - m_Budget.m_AllocationBytes[heapIndex] += size; - ++m_Budget.m_OperationsSinceBudgetFetch; + m_Budget.AddAllocation(MemoryTypeIndexToHeapIndex(memTypeIndex), size); if(VMA_DEBUG_INITIALIZE_ALLOCATIONS) { FillAllocation(*pAllocation, VMA_ALLOCATION_FILL_PATTERN_CREATED); @@ -15311,11 +15318,8 @@ void VmaAllocator_T::FreeMemory( } } - if(allocation->GetLastUseFrameIndex() != VMA_FRAME_INDEX_LOST) - { - m_Budget.m_AllocationBytes[MemoryTypeIndexToHeapIndex(allocation->GetMemoryTypeIndex())] -= allocation->GetSize(); - ++m_Budget.m_OperationsSinceBudgetFetch; - } + // Do this regardless of whether the allocation is lost. Lost allocations still account to Budget.AllocationBytes. + m_Budget.RemoveAllocation(MemoryTypeIndexToHeapIndex(allocation->GetMemoryTypeIndex()), allocation->GetSize()); allocation->SetUserData(this, VMA_NULL); allocation->Dtor(); m_AllocationObjectAllocator.Free(allocation); @@ -15748,7 +15752,7 @@ VkResult VmaAllocator_T::AllocateVulkanMemory(const VkMemoryAllocateInfo* pAlloc const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(pAllocateInfo->memoryTypeIndex); // HeapSizeLimit is in effect for this heap. - if((m_HeapSizeLimitMask | (1u << heapIndex)) != 0) + if((m_HeapSizeLimitMask & (1u << heapIndex)) != 0) { const VkDeviceSize heapSize = m_MemProps.memoryHeaps[heapIndex].size; VkDeviceSize blockBytes = m_Budget.m_BlockBytes[heapIndex]; @@ -15775,7 +15779,9 @@ VkResult VmaAllocator_T::AllocateVulkanMemory(const VkMemoryAllocateInfo* pAlloc if(res == VK_SUCCESS) { +#if VMA_MEMORY_BUDGET ++m_Budget.m_OperationsSinceBudgetFetch; +#endif // Informative callback. if(m_DeviceMemoryCallbacks.pfnAllocate != VMA_NULL) @@ -15802,9 +15808,7 @@ void VmaAllocator_T::FreeVulkanMemory(uint32_t memoryType, VkDeviceSize size, Vk // VULKAN CALL vkFreeMemory. (*m_VulkanFunctions.vkFreeMemory)(m_hDevice, hMemory, GetAllocationCallbacks()); - const uint32_t heapIndex = MemoryTypeIndexToHeapIndex(memoryType); - m_Budget.m_BlockBytes[heapIndex] -= size; - ++m_Budget.m_OperationsSinceBudgetFetch; + m_Budget.m_BlockBytes[MemoryTypeIndexToHeapIndex(memoryType)] -= size; } VkResult VmaAllocator_T::BindVulkanBuffer(