diff --git a/src/common/settings.h b/src/common/settings.h index e02f1c48ff..7f4b637fe4 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -199,7 +199,9 @@ struct Values { MemoryLayout::Memory_4Gb, MemoryLayout::Memory_8Gb, "memory_layout_mode", - Category::Core}; + Category::Core, + Specialization::Default, + false}; SwitchableSetting use_speed_limit{ linkage, true, "use_speed_limit", Category::Core, Specialization::Paired, false, true}; SwitchableSetting speed_limit{linkage, @@ -213,6 +215,11 @@ struct Values { true, &use_speed_limit}; SwitchableSetting sync_core_speed{linkage, false, "sync_core_speed", Category::Core, Specialization::Default}; + SwitchableSetting use_nce{linkage, true, "use_nce", Category::Core}; + + // Memory + SwitchableSetting use_gpu_memory_manager{linkage, false, "use_gpu_memory_manager", Category::Core}; + SwitchableSetting enable_memory_snapshots{linkage, false, "enable_memory_snapshots", Category::Core}; // Cpu SwitchableSetting cpu_backend{linkage, diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 48a64502ed..ccbcc2341f 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -307,6 +307,8 @@ add_library(video_core STATIC vulkan_common/vulkan_library.h vulkan_common/vulkan_memory_allocator.cpp vulkan_common/vulkan_memory_allocator.h + vulkan_common/hybrid_memory.cpp + vulkan_common/hybrid_memory.h vulkan_common/vulkan_surface.cpp vulkan_common/vulkan_surface.h vulkan_common/vulkan_wrapper.cpp diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index e301dd7aef..c9f6f0836c 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -34,6 +34,7 @@ #include "video_core/vulkan_common/vulkan_instance.h" #include "video_core/vulkan_common/vulkan_library.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" +#include "video_core/vulkan_common/hybrid_memory.h" #include "video_core/vulkan_common/vulkan_surface.h" #include "video_core/vulkan_common/vulkan_wrapper.h" #ifdef __ANDROID__ @@ -123,6 +124,7 @@ RendererVulkan::RendererVulkan(Core::Frontend::EmuWindow& emu_window, PresentFiltersForAppletCapture), rasterizer(render_window, gpu, device_memory, device, memory_allocator, state_tracker, scheduler), + hybrid_memory(std::make_unique(device, memory_allocator)), texture_manager(device, memory_allocator), shader_manager(device), applet_frame() { @@ -131,6 +133,28 @@ RendererVulkan::RendererVulkan(Core::Frontend::EmuWindow& emu_window, scheduler.RegisterOnSubmit([this] { turbo_mode->QueueSubmitted(); }); } + // Initialize HybridMemory system + if (Settings::values.use_gpu_memory_manager.GetValue()) { +#if defined(__linux__) || defined(__ANDROID__) + try { + void* guest_memory_base = std::aligned_alloc(4096, 64 * 1024 * 1024); + if (guest_memory_base) { + try { + hybrid_memory->InitializeGuestMemory(guest_memory_base, 64 * 1024 * 1024); + LOG_INFO(Render_Vulkan, "HybridMemory initialized with {} MB of fault-managed memory", 64); + } catch (const std::exception& e) { + std::free(guest_memory_base); + throw; + } + } + } catch (const std::exception& e) { + LOG_ERROR(Render_Vulkan, "Failed to initialize HybridMemory: {}", e.what()); + } +#else + LOG_INFO(Render_Vulkan, "Fault-managed memory not supported on this platform"); +#endif + } + // Initialize enhanced shader compilation system shader_manager.SetScheduler(&scheduler); LOG_INFO(Render_Vulkan, "Enhanced shader compilation system initialized"); @@ -389,6 +413,35 @@ void RendererVulkan::RenderScreenshot(std::span return; } + // If memory snapshots are enabled, take a snapshot with the screenshot + if (Settings::values.enable_memory_snapshots.GetValue() && hybrid_memory) { + try { + const auto now = std::chrono::system_clock::now(); + const auto now_time_t = std::chrono::system_clock::to_time_t(now); + std::tm local_tm; +#ifdef _WIN32 + localtime_s(&local_tm, &now_time_t); +#else + localtime_r(&now_time_t, &local_tm); +#endif + char time_str[128]; + std::strftime(time_str, sizeof(time_str), "%Y%m%d_%H%M%S", &local_tm); + + std::string snapshot_path = fmt::format("snapshots/memory_snapshot_{}.bin", time_str); + hybrid_memory->SaveSnapshot(snapshot_path); + + // Also save a differential snapshot if there's been a previous snapshot + if (Settings::values.use_gpu_memory_manager.GetValue()) { + std::string diff_path = fmt::format("snapshots/diff_snapshot_{}.bin", time_str); + hybrid_memory->SaveDifferentialSnapshot(diff_path); + hybrid_memory->ResetDirtyTracking(); + LOG_INFO(Render_Vulkan, "Memory snapshots saved with screenshot"); + } + } catch (const std::exception& e) { + LOG_ERROR(Render_Vulkan, "Failed to save memory snapshot: {}", e.what()); + } + } + const auto& layout{renderer_settings.screenshot_framebuffer_layout}; const auto dst_buffer = RenderToBuffer(framebuffers, layout, VK_FORMAT_B8G8R8A8_UNORM, layout.width * layout.height * 4); @@ -498,6 +551,23 @@ void RendererVulkan::InitializePlatformSpecific() { #else LOG_INFO(Render_Vulkan, "Platform-specific Vulkan initialization not implemented for this platform"); #endif + + // Create a compute buffer using the HybridMemory system if enabled + if (Settings::values.use_gpu_memory_manager.GetValue()) { + try { + // Create a small compute buffer for testing + const VkDeviceSize buffer_size = 1 * 1024 * 1024; // 1 MB + ComputeBuffer compute_buffer = hybrid_memory->CreateComputeBuffer( + buffer_size, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | + VK_BUFFER_USAGE_TRANSFER_DST_BIT, + MemoryUsage::DeviceLocal); + + LOG_INFO(Render_Vulkan, "Successfully created compute buffer using HybridMemory"); + } catch (const std::exception& e) { + LOG_ERROR(Render_Vulkan, "Failed to create compute buffer: {}", e.what()); + } + } } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index e6efc887d1..983bf8c9b4 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -23,6 +23,7 @@ #include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" +#include "video_core/vulkan_common/hybrid_memory.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Core::Memory { @@ -99,6 +100,9 @@ private: RasterizerVulkan rasterizer; std::optional turbo_mode; + // HybridMemory for advanced memory management + std::unique_ptr hybrid_memory; + // Enhanced texture and shader management TextureManager texture_manager; ShaderManager shader_manager; diff --git a/src/video_core/vulkan_common/hybrid_memory.cpp b/src/video_core/vulkan_common/hybrid_memory.cpp new file mode 100644 index 0000000000..bbc001b012 --- /dev/null +++ b/src/video_core/vulkan_common/hybrid_memory.cpp @@ -0,0 +1,332 @@ +// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include + +#include "common/logging/log.h" +#include "video_core/vulkan_common/hybrid_memory.h" + +#if defined(__linux__) || defined(__ANDROID__) +#include +#include +#include +#include +#include +#include +#endif + +namespace Vulkan { + +void PredictiveReuseManager::RecordUsage(u64 address, u64 size, bool write_access) { + std::lock_guard guard(mutex); + + // Add to history, removing oldest entries if we're past max_history + access_history.push_back({address, size, write_access, current_timestamp++}); + if (access_history.size() > max_history) { + access_history.erase(access_history.begin()); + } +} + +bool PredictiveReuseManager::IsHotRegion(u64 address, u64 size) const { + std::lock_guard guard(mutex); + + // Check if this memory region has been accessed frequently + const u64 end_address = address + size; + int access_count = 0; + + for (const auto& access : access_history) { + const u64 access_end = access.address + access.size; + + // Check for overlap + if (!(end_address <= access.address || address >= access_end)) { + access_count++; + } + } + + // Consider a region "hot" if it has been accessed in at least 10% of recent accesses + return access_count >= static_cast(std::max(1, max_history / 10)); +} + +void PredictiveReuseManager::EvictRegion(u64 address, u64 size) { + std::lock_guard guard(mutex); + + // Remove any history entries that overlap with this region + const u64 end_address = address + size; + + access_history.erase( + std::remove_if(access_history.begin(), access_history.end(), + [address, end_address](const MemoryAccess& access) { + const u64 access_end = access.address + access.size; + // Check for overlap + return !(end_address <= access.address || address >= access_end); + }), + access_history.end() + ); +} + +void PredictiveReuseManager::ClearHistory() { + std::lock_guard guard(mutex); + access_history.clear(); + current_timestamp = 0; +} + +#if defined(__linux__) || defined(__ANDROID__) +void FaultManagedAllocator::Initialize(void* base, size_t size) { + uffd = syscall(SYS_userfaultfd, O_CLOEXEC | O_NONBLOCK); + if (uffd < 0) { + LOG_ERROR(Render_Vulkan, "Failed to create userfaultfd, fault handling disabled"); + return; + } + + struct uffdio_api api = { .api = UFFD_API }; + ioctl(uffd, UFFDIO_API, &api); + + struct uffdio_register reg = { + .range = { .start = (uintptr_t)base, .len = size }, + .mode = UFFDIO_REGISTER_MODE_MISSING + }; + + if (ioctl(uffd, UFFDIO_REGISTER, ®) < 0) { + LOG_ERROR(Render_Vulkan, "Failed to register memory range with userfaultfd"); + close(uffd); + uffd = -1; + return; + } + + running = true; + fault_handler = std::thread(&FaultManagedAllocator::FaultThread, this); +} + +void FaultManagedAllocator::Touch(size_t addr) { + lru.remove(addr); + lru.push_front(addr); + dirty_set.insert(addr); +} + +void FaultManagedAllocator::EnforceLimit() { + while (lru.size() > MaxPages) { + size_t evict = lru.back(); + lru.pop_back(); + + auto it = page_map.find(evict); + if (it != page_map.end()) { + if (dirty_set.count(evict)) { + // Compress and store dirty page before evicting + std::vector compressed((u8*)it->second, (u8*)it->second + PageSize); + compressed_store[evict] = std::move(compressed); + dirty_set.erase(evict); + } + + munmap(it->second, PageSize); + page_map.erase(it); + } + } +} + +void* FaultManagedAllocator::GetOrAlloc(size_t addr) { + std::lock_guard guard(lock); + + if (page_map.count(addr)) { + Touch(addr); + return page_map[addr]; + } + + void* mem = mmap(nullptr, PageSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + if (mem == MAP_FAILED) { + LOG_ERROR(Render_Vulkan, "Failed to mmap memory for fault handler"); + return nullptr; + } + + if (compressed_store.count(addr)) { + // Decompress stored page data + std::memcpy(mem, compressed_store[addr].data(), compressed_store[addr].size()); + compressed_store.erase(addr); + } else { + std::memset(mem, 0, PageSize); + } + + page_map[addr] = mem; + lru.push_front(addr); + dirty_set.insert(addr); + EnforceLimit(); + + return mem; +} + +void FaultManagedAllocator::FaultThread() { + struct pollfd pfd = { uffd, POLLIN, 0 }; + + while (running) { + if (poll(&pfd, 1, 10) > 0) { + struct uffd_msg msg; + read(uffd, &msg, sizeof(msg)); + + if (msg.event == UFFD_EVENT_PAGEFAULT) { + size_t addr = msg.arg.pagefault.address & ~(PageSize - 1); + void* page = GetOrAlloc(addr); + + if (page) { + struct uffdio_copy copy = { + .src = (uintptr_t)page, + .dst = (uintptr_t)addr, + .len = PageSize, + .mode = 0 + }; + + ioctl(uffd, UFFDIO_COPY, ©); + } + } + } + } +} + +void* FaultManagedAllocator::Translate(size_t addr) { + std::lock_guard guard(lock); + + size_t base = addr & ~(PageSize - 1); + if (!page_map.count(base)) { + return nullptr; + } + + Touch(base); + return (u8*)page_map[base] + (addr % PageSize); +} + +void FaultManagedAllocator::SaveSnapshot(const std::string& path) { + std::lock_guard guard(lock); + + std::ofstream out(path, std::ios::binary); + if (!out) { + LOG_ERROR(Render_Vulkan, "Failed to open snapshot file for writing: {}", path); + return; + } + + for (auto& [addr, mem] : page_map) { + out.write(reinterpret_cast(&addr), sizeof(addr)); + out.write(reinterpret_cast(mem), PageSize); + } + + LOG_INFO(Render_Vulkan, "Saved memory snapshot to {}", path); +} + +void FaultManagedAllocator::SaveDifferentialSnapshot(const std::string& path) { + std::lock_guard guard(lock); + + std::ofstream out(path, std::ios::binary); + if (!out) { + LOG_ERROR(Render_Vulkan, "Failed to open diff snapshot file for writing: {}", path); + return; + } + + size_t dirty_count = 0; + for (const auto& addr : dirty_set) { + if (page_map.count(addr)) { + out.write(reinterpret_cast(&addr), sizeof(addr)); + out.write(reinterpret_cast(page_map[addr]), PageSize); + dirty_count++; + } + } + + LOG_INFO(Render_Vulkan, "Saved differential snapshot to {} ({} dirty pages)", + path, dirty_count); +} + +void FaultManagedAllocator::ClearDirtySet() { + std::lock_guard guard(lock); + dirty_set.clear(); + LOG_DEBUG(Render_Vulkan, "Cleared dirty page tracking"); +} + +FaultManagedAllocator::~FaultManagedAllocator() { + running = false; + + if (fault_handler.joinable()) { + fault_handler.join(); + } + + for (auto& [addr, mem] : page_map) { + munmap(mem, PageSize); + } + + if (uffd != -1) { + close(uffd); + } +} +#endif // defined(__linux__) || defined(__ANDROID__) + +HybridMemory::HybridMemory(const Device& device_, MemoryAllocator& allocator, size_t reuse_history) + : device(device_), memory_allocator(allocator), reuse_manager(reuse_history) { +} + +HybridMemory::~HybridMemory() = default; + +void HybridMemory::InitializeGuestMemory(void* base, size_t size) { +#if defined(__linux__) || defined(__ANDROID__) + fmaa.Initialize(base, size); + LOG_INFO(Render_Vulkan, "Initialized fault-managed guest memory at {:p}, size: {}", + base, size); +#else + LOG_INFO(Render_Vulkan, "Fault-managed memory not supported on this platform"); +#endif +} + +void* HybridMemory::TranslateAddress(size_t addr) { +#if defined(__linux__) || defined(__ANDROID__) + return fmaa.Translate(addr); +#else + return nullptr; +#endif +} + +ComputeBuffer HybridMemory::CreateComputeBuffer(VkDeviceSize size, VkBufferUsageFlags usage, + MemoryUsage memory_type) { + ComputeBuffer buffer; + buffer.size = size; + + VkBufferCreateInfo buffer_ci = { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = size, + .usage = usage | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }; + + // Using CreateBuffer directly handles memory allocation internally + buffer.buffer = memory_allocator.CreateBuffer(buffer_ci, memory_type); + + LOG_DEBUG(Render_Vulkan, "Created compute buffer: size={}, usage={:x}", + size, usage); + + return buffer; +} + +void HybridMemory::SaveSnapshot(const std::string& path) { +#if defined(__linux__) || defined(__ANDROID__) + fmaa.SaveSnapshot(path); +#else + LOG_ERROR(Render_Vulkan, "Memory snapshots not supported on this platform"); +#endif +} + +void HybridMemory::SaveDifferentialSnapshot(const std::string& path) { +#if defined(__linux__) || defined(__ANDROID__) + fmaa.SaveDifferentialSnapshot(path); +#else + LOG_ERROR(Render_Vulkan, "Differential memory snapshots not supported on this platform"); +#endif +} + +void HybridMemory::ResetDirtyTracking() { +#if defined(__linux__) || defined(__ANDROID__) + fmaa.ClearDirtySet(); +#endif +} + +} // namespace Vulkan \ No newline at end of file diff --git a/src/video_core/vulkan_common/hybrid_memory.h b/src/video_core/vulkan_common/hybrid_memory.h new file mode 100644 index 0000000000..c258d4f213 --- /dev/null +++ b/src/video_core/vulkan_common/hybrid_memory.h @@ -0,0 +1,103 @@ +// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "common/common_types.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_memory_allocator.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +struct ComputeBuffer { + vk::Buffer buffer{}; + VkDeviceSize size = 0; +}; + +class PredictiveReuseManager { +public: + explicit PredictiveReuseManager(size_t history_size) : max_history{history_size} {} + + void RecordUsage(u64 address, u64 size, bool write_access); + bool IsHotRegion(u64 address, u64 size) const; + void EvictRegion(u64 address, u64 size); + void ClearHistory(); + +private: + struct MemoryAccess { + u64 address; + u64 size; + bool write_access; + u64 timestamp; + }; + + std::vector access_history; + const size_t max_history; + u64 current_timestamp{0}; + mutable std::mutex mutex; +}; + +#if defined(__linux__) || defined(__ANDROID__) +class FaultManagedAllocator { +public: + static constexpr size_t PageSize = 0x1000; + static constexpr size_t MaxPages = 16384; + + void Initialize(void* base, size_t size); + void* Translate(size_t addr); + void SaveSnapshot(const std::string& path); + void SaveDifferentialSnapshot(const std::string& path); + void ClearDirtySet(); + ~FaultManagedAllocator(); + +private: + std::map page_map; + std::list lru; + std::set dirty_set; + std::unordered_map> compressed_store; + std::mutex lock; + int uffd = -1; + std::atomic running{false}; + std::thread fault_handler; + + void Touch(size_t addr); + void EnforceLimit(); + void* GetOrAlloc(size_t addr); + void FaultThread(); +}; +#endif + +class HybridMemory { +public: + explicit HybridMemory(const Device& device, MemoryAllocator& allocator, size_t reuse_history = 32); + ~HybridMemory(); + + void InitializeGuestMemory(void* base, size_t size); + void* TranslateAddress(size_t addr); + + ComputeBuffer CreateComputeBuffer(VkDeviceSize size, VkBufferUsageFlags usage, MemoryUsage memory_type); + + void SaveSnapshot(const std::string& path); + void SaveDifferentialSnapshot(const std::string& path); + void ResetDirtyTracking(); + +private: + const Device& device; + MemoryAllocator& memory_allocator; + PredictiveReuseManager reuse_manager; + +#if defined(__linux__) || defined(__ANDROID__) + FaultManagedAllocator fmaa; +#endif +}; + +} // namespace Vulkan \ No newline at end of file