diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/BooleanSetting.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/BooleanSetting.kt
index ec2759edd5..0a40daa486 100644
--- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/BooleanSetting.kt
+++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/BooleanSetting.kt
@@ -13,6 +13,7 @@ enum class BooleanSetting(override val key: String) : AbstractBooleanSetting {
CORE_SYNC_CORE_SPEED("sync_core_speed"),
RENDERER_USE_SPEED_LIMIT("use_speed_limit"),
USE_DOCKED_MODE("use_docked_mode"),
+ USE_AUTO_STUB("use_auto_stub"),
RENDERER_USE_DISK_SHADER_CACHE("use_disk_shader_cache"),
RENDERER_FORCE_MAX_CLOCK("force_max_clock"),
RENDERER_ASYNCHRONOUS_SHADERS("use_asynchronous_shaders"),
diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt
index 7ccc6500c2..52f63ad3d1 100644
--- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt
+++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt
@@ -273,7 +273,13 @@ abstract class SettingsItem(
descriptionId = R.string.use_docked_mode_description
)
)
-
+ put(
+ SwitchSetting(
+ BooleanSetting.USE_AUTO_STUB,
+ titleId = R.string.use_auto_stub,
+ descriptionId = R.string.use_auto_stub_description
+ )
+ )
put(
SwitchSetting(
syncCoreSpeedSetting,
@@ -354,6 +360,7 @@ abstract class SettingsItem(
valuesId = R.array.vramUsageMethodValues
)
)
+
put(
SingleChoiceSetting(
IntSetting.RENDERER_RESOLUTION,
diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt
index 4a33b176d7..8f136790a6 100644
--- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt
+++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt
@@ -261,6 +261,7 @@ class SettingsFragmentPresenter(
add(BooleanSetting.RENDERER_USE_SPEED_LIMIT.key)
add(ShortSetting.RENDERER_SPEED_LIMIT.key)
add(BooleanSetting.USE_DOCKED_MODE.key)
+ add(BooleanSetting.USE_AUTO_STUB.key)
add(IntSetting.REGION_INDEX.key)
add(IntSetting.LANGUAGE_INDEX.key)
add(BooleanSetting.USE_CUSTOM_RTC.key)
diff --git a/src/android/app/src/main/res/values/strings.xml b/src/android/app/src/main/res/values/strings.xml
index 28f18e1596..2987ea91fc 100644
--- a/src/android/app/src/main/res/values/strings.xml
+++ b/src/android/app/src/main/res/values/strings.xml
@@ -256,6 +256,8 @@
Device name
Docked Mode
Increases resolution, decreasing performance. Handheld Mode is used when disabled, lowering resolution and increasing performance.
+ Use Auto Stub
+ Automatically stub missing services and functions. This may improve compatibility but can cause crashes and stability issues.
Emulated region
Emulated language
Select RTC date
diff --git a/src/common/settings.h b/src/common/settings.h
index ec627b744d..7316b27e2d 100644
--- a/src/common/settings.h
+++ b/src/common/settings.h
@@ -622,7 +622,7 @@ struct Values {
linkage, false, "extended_logging", Category::Debugging, Specialization::Default, false};
Setting use_debug_asserts{linkage, false, "use_debug_asserts", Category::Debugging};
Setting use_auto_stub{
- linkage, false, "use_auto_stub", Category::Debugging, Specialization::Default, false};
+ linkage, false, "use_auto_stub", Category::Debugging};
Setting enable_all_controllers{linkage, false, "enable_all_controllers",
Category::Debugging};
Setting perform_vulkan_check{linkage, true, "perform_vulkan_check", Category::Debugging};
@@ -636,7 +636,7 @@ struct Values {
Category::Network};
// WebService
- Setting web_api_url{linkage, "https://api.ynet-fun.xyz", "web_api_url",
+ Setting web_api_url{linkage, "api.ynet-fun.xyz", "web_api_url",
Category::WebService};
Setting yuzu_username{linkage, std::string(), "yuzu_username",
Category::WebService};
diff --git a/src/core/hle/kernel/k_process.h b/src/core/hle/kernel/k_process.h
index ab1358a129..df3e540dc2 100644
--- a/src/core/hle/kernel/k_process.h
+++ b/src/core/hle/kernel/k_process.h
@@ -84,6 +84,7 @@ private:
std::array m_entropy{};
bool m_is_signaled{};
bool m_is_initialized{};
+ u32 m_pointer_buffer_size = 0x8000; // Default pointer buffer size (can be game-specific later)
bool m_is_application{};
bool m_is_default_application_system_resource{};
bool m_is_hbl{};
@@ -239,6 +240,14 @@ public:
m_is_suspended = suspended;
}
+ u32 GetPointerBufferSize() const {
+ return m_pointer_buffer_size;
+ }
+
+ void SetPointerBufferSize(u32 size) {
+ m_pointer_buffer_size = size;
+ }
+
Result Terminate();
bool IsTerminated() const {
diff --git a/src/core/hle/service/am/service/common_state_getter.cpp b/src/core/hle/service/am/service/common_state_getter.cpp
index c445e5e776..c179102f76 100644
--- a/src/core/hle/service/am/service/common_state_getter.cpp
+++ b/src/core/hle/service/am/service/common_state_getter.cpp
@@ -38,7 +38,7 @@ ICommonStateGetter::ICommonStateGetter(Core::System& system_, std::shared_ptr, "GetReaderLockAccessorEx"},
{32, D<&ICommonStateGetter::GetWriterLockAccessorEx>, "GetWriterLockAccessorEx"},
- {40, D<&ICommonStateGetter::GetCradleFwVersion>, "GetCradleFwVersion"},
+ {40, nullptr, "GetCradleFwVersion"},
{50, D<&ICommonStateGetter::IsVrModeEnabled>, "IsVrModeEnabled"},
{51, D<&ICommonStateGetter::SetVrModeEnabled>, "SetVrModeEnabled"},
{52, D<&ICommonStateGetter::SetLcdBacklighOffEnabled>, "SetLcdBacklighOffEnabled"},
@@ -172,17 +172,6 @@ Result ICommonStateGetter::GetBootMode(Out out_boot_mode) {
R_SUCCEED();
}
-Result ICommonStateGetter::GetCradleFwVersion(OutArray out_version) {
- LOG_DEBUG(Service_AM, "(STUBBED) called");
-
- out_version[0] = 0;
- out_version[1] = 0;
- out_version[2] = 0;
- out_version[3] = 0;
-
- R_SUCCEED();
-}
-
Result ICommonStateGetter::IsVrModeEnabled(Out out_is_vr_mode_enabled) {
LOG_DEBUG(Service_AM, "called");
diff --git a/src/core/hle/service/am/service/common_state_getter.h b/src/core/hle/service/am/service/common_state_getter.h
index a326e11053..0cafbfe176 100644
--- a/src/core/hle/service/am/service/common_state_getter.h
+++ b/src/core/hle/service/am/service/common_state_getter.h
@@ -39,7 +39,6 @@ private:
Result GetHdcpAuthenticationStateChangeEvent(OutCopyHandle out_event);
Result GetOperationMode(Out out_operation_mode);
Result GetPerformanceMode(Out out_performance_mode);
- Result GetCradleFwVersion(OutArray out_version);
Result GetBootMode(Out out_boot_mode);
Result IsVrModeEnabled(Out out_is_vr_mode_enabled);
Result SetVrModeEnabled(bool is_vr_mode_enabled);
diff --git a/src/core/hle/service/sm/sm_controller.cpp b/src/core/hle/service/sm/sm_controller.cpp
index 7f0fb91d04..9e25eae4d4 100644
--- a/src/core/hle/service/sm/sm_controller.cpp
+++ b/src/core/hle/service/sm/sm_controller.cpp
@@ -68,13 +68,46 @@ void Controller::CloneCurrentObjectEx(HLERequestContext& ctx) {
}
void Controller::QueryPointerBufferSize(HLERequestContext& ctx) {
- LOG_WARNING(Service, "(STUBBED) called");
+ LOG_DEBUG(Service, "called");
+
+ auto* process = Kernel::GetCurrentProcessPointer(kernel);
+ ASSERT(process != nullptr);
+
+ u32 buffer_size = process->GetPointerBufferSize();
+ if (buffer_size > std::numeric_limits::max()) {
+ LOG_WARNING(Service, "Pointer buffer size exceeds u16 max, clamping");
+ buffer_size = std::numeric_limits::max();
+ }
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(ResultSuccess);
- rb.Push(0x8000);
+ rb.Push(static_cast(buffer_size));
}
+void Controller::SetPointerBufferSize(HLERequestContext& ctx) {
+ LOG_DEBUG(Service, "called");
+
+ auto* process = Kernel::GetCurrentProcessPointer(kernel);
+ ASSERT(process != nullptr);
+
+ IPC::RequestParser rp{ctx};
+
+ u32 requested_size = rp.PopRaw();
+
+ if (requested_size > std::numeric_limits::max()) {
+ LOG_WARNING(Service, "Requested pointer buffer size too large, clamping to 0xFFFF");
+ requested_size = std::numeric_limits::max();
+ }
+
+ process->SetPointerBufferSize(requested_size);
+
+ LOG_INFO(Service, "Pointer buffer size dynamically updated to {:#x} bytes by process", requested_size);
+
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(ResultSuccess);
+}
+
+
// https://switchbrew.org/wiki/IPC_Marshalling
Controller::Controller(Core::System& system_) : ServiceFramework{system_, "IpcController"} {
static const FunctionInfo functions[] = {
@@ -83,6 +116,7 @@ Controller::Controller(Core::System& system_) : ServiceFramework{system_, "IpcCo
{2, &Controller::CloneCurrentObject, "CloneCurrentObject"},
{3, &Controller::QueryPointerBufferSize, "QueryPointerBufferSize"},
{4, &Controller::CloneCurrentObjectEx, "CloneCurrentObjectEx"},
+ {5, &Controller::SetPointerBufferSize, "SetPointerBufferSize"},
};
RegisterHandlers(functions);
}
diff --git a/src/core/hle/service/sm/sm_controller.h b/src/core/hle/service/sm/sm_controller.h
index 4e748b36d9..f7e529a269 100644
--- a/src/core/hle/service/sm/sm_controller.h
+++ b/src/core/hle/service/sm/sm_controller.h
@@ -21,6 +21,7 @@ private:
void CloneCurrentObject(HLERequestContext& ctx);
void CloneCurrentObjectEx(HLERequestContext& ctx);
void QueryPointerBufferSize(HLERequestContext& ctx);
+ void SetPointerBufferSize(HLERequestContext& ctx);
};
} // namespace Service::SM
diff --git a/src/core/loader/nca.cpp b/src/core/loader/nca.cpp
index de27ec49e2..4a87ab53e7 100644
--- a/src/core/loader/nca.cpp
+++ b/src/core/loader/nca.cpp
@@ -15,9 +15,20 @@
#include "core/loader/deconstructed_rom_directory.h"
#include "core/loader/nca.h"
#include "mbedtls/sha256.h"
+#include "common/literals.h"
namespace Loader {
+static u32 CalculatePointerBufferSize(size_t heap_size) {
+ if (heap_size > 1073741824) { // Games with 1 GiB
+ return 0x10000;
+ } else if (heap_size > 536870912) { // Games with 512 MiB
+ return 0xC000;
+ } else {
+ return 0x8000; // Default for all other games
+ }
+}
+
AppLoader_NCA::AppLoader_NCA(FileSys::VirtualFile file_)
: AppLoader(std::move(file_)), nca(std::make_unique(file)) {}
@@ -52,8 +63,6 @@ AppLoader_NCA::LoadResult AppLoader_NCA::Load(Kernel::KProcess& process, Core::S
if (exefs == nullptr) {
LOG_INFO(Loader, "No ExeFS found in NCA, looking for ExeFS from update");
- // This NCA may be a sparse base of an installed title.
- // Try to fetch the ExeFS from the installed update.
const auto& installed = system.GetContentProvider();
const auto update_nca = installed.GetEntry(FileSys::GetUpdateTitleID(nca->GetTitleId()),
FileSys::ContentRecordType::Program);
@@ -69,11 +78,37 @@ AppLoader_NCA::LoadResult AppLoader_NCA::Load(Kernel::KProcess& process, Core::S
directory_loader = std::make_unique(exefs, true);
+ // Read heap size from main.npdm in ExeFS
+ u64 heap_size = 0;
+
+ if (exefs) {
+ const auto npdm_file = exefs->GetFile("main.npdm");
+ if (npdm_file) {
+ auto npdm_data = npdm_file->ReadAllBytes();
+ if (npdm_data.size() >= 0x30) {
+ heap_size = *reinterpret_cast(&npdm_data[0x28]);
+ LOG_INFO(Loader, "Read heap size {:#x} bytes from main.npdm", heap_size);
+ } else {
+ LOG_WARNING(Loader, "main.npdm too small to read heap size!");
+ }
+ } else {
+ LOG_WARNING(Loader, "No main.npdm found in ExeFS!");
+ }
+ }
+
+ // Set pointer buffer size based on heap size
+ process.SetPointerBufferSize(CalculatePointerBufferSize(heap_size));
+
+ // Load modules
const auto load_result = directory_loader->Load(process, system);
if (load_result.first != ResultStatus::Success) {
return load_result;
}
+ LOG_INFO(Loader, "Set pointer buffer size to {:#x} bytes for ProgramID {:#018x} (Heap size: {:#x})",
+ process.GetPointerBufferSize(), nca->GetTitleId(), heap_size);
+
+ // Register the process in the file system controller
system.GetFileSystemController().RegisterProcess(
process.GetProcessId(), nca->GetTitleId(),
std::make_shared(*this, system.GetContentProvider(),
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 6f7703fced..b033858bf8 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -6,6 +6,8 @@
#include
#include
#include
+#include
+#include
#include "common/assert.h"
#include "common/atomic_ops.h"
@@ -32,6 +34,105 @@ namespace Core::Memory {
namespace {
+inline void FastMemcpy(void* dst, const void* src, std::size_t size) {
+ // Fast path for small copies
+ switch (size) {
+ case 1:
+ *static_cast(dst) = *static_cast(src);
+ break;
+ case 2:
+ *static_cast(dst) = *static_cast(src);
+ break;
+ case 4:
+ *static_cast(dst) = *static_cast(src);
+ break;
+ case 8:
+ *static_cast(dst) = *static_cast(src);
+ break;
+ case 16: {
+ // Optimize for 16-byte copy (common case for SIMD registers)
+ const u64* src_64 = static_cast(src);
+ u64* dst_64 = static_cast(dst);
+ dst_64[0] = src_64[0];
+ dst_64[1] = src_64[1];
+ break;
+ }
+ case 32: {
+ // Optimize for 32-byte copy
+ const u64* src_64 = static_cast(src);
+ u64* dst_64 = static_cast(dst);
+ dst_64[0] = src_64[0];
+ dst_64[1] = src_64[1];
+ dst_64[2] = src_64[2];
+ dst_64[3] = src_64[3];
+ break;
+ }
+ case 64: {
+ // Optimize for 64-byte copy
+ const u64* src_64 = static_cast(src);
+ u64* dst_64 = static_cast(dst);
+ dst_64[0] = src_64[0];
+ dst_64[1] = src_64[1];
+ dst_64[2] = src_64[2];
+ dst_64[3] = src_64[3];
+ dst_64[4] = src_64[4];
+ dst_64[5] = src_64[5];
+ dst_64[6] = src_64[6];
+ dst_64[7] = src_64[7];
+ break;
+ }
+ default:
+ // For larger sizes, use standard memcpy which is usually optimized by the compiler
+ std::memcpy(dst, src, size);
+ break;
+ }
+}
+
+inline void FastMemset(void* dst, int value, std::size_t size) {
+ // Fast path for small fills
+ switch (size) {
+ case 1:
+ *static_cast(dst) = static_cast(value);
+ break;
+ case 2:
+ *static_cast(dst) = static_cast(value);
+ break;
+ case 4:
+ *static_cast(dst) = static_cast(value);
+ break;
+ case 8:
+ *static_cast(dst) = static_cast(value);
+ break;
+ case 16: {
+ // Optimize for 16-byte fill (common case for SIMD registers)
+ u64* dst_64 = static_cast(dst);
+ const u64 val64 = static_cast(value) * 0x0101010101010101ULL;
+ dst_64[0] = val64;
+ dst_64[1] = val64;
+ break;
+ }
+ default:
+ if (size <= 128 && value == 0) {
+ // Fast path for small zero-fills
+ u8* dst_bytes = static_cast(dst);
+ for (std::size_t i = 0; i < size; i += 8) {
+ if (i + 8 <= size) {
+ *reinterpret_cast(dst_bytes + i) = 0;
+ } else {
+ // Handle remaining bytes (less than 8)
+ for (std::size_t j = i; j < size; j++) {
+ dst_bytes[j] = 0;
+ }
+ }
+ }
+ } else {
+ // For larger sizes, use standard memset which is usually optimized by the compiler
+ std::memset(dst, value, size);
+ }
+ break;
+ }
+}
+
bool AddressSpaceContains(const Common::PageTable& table, const Common::ProcessAddress addr,
const std::size_t size) {
const Common::ProcessAddress max_addr = 1ULL << table.GetAddressSpaceBits();
@@ -44,7 +145,11 @@ bool AddressSpaceContains(const Common::PageTable& table, const Common::ProcessA
// from outside classes. This also allows modification to the internals of the memory
// subsystem without needing to rebuild all files that make use of the memory interface.
struct Memory::Impl {
- explicit Impl(Core::System& system_) : system{system_} {}
+ explicit Impl(Core::System& system_) : system{system_} {
+ // Initialize thread count based on available cores for parallel memory operations
+ const unsigned int hw_concurrency = std::thread::hardware_concurrency();
+ thread_count = std::max(2u, std::min(hw_concurrency, 8u)); // Limit to 8 threads max
+ }
void SetCurrentPageTable(Kernel::KProcess& process) {
current_page_table = &process.GetPageTable().GetImpl();
@@ -308,26 +413,70 @@ struct Memory::Impl {
LOG_ERROR(HW_Memory,
"Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
GetInteger(current_vaddr), GetInteger(src_addr), size);
- std::memset(dest_buffer, 0, copy_amount);
+ FastMemset(dest_buffer, 0, copy_amount);
},
[&](const std::size_t copy_amount, const u8* const src_ptr) {
- std::memcpy(dest_buffer, src_ptr, copy_amount);
+ FastMemcpy(dest_buffer, src_ptr, copy_amount);
},
[&](const Common::ProcessAddress current_vaddr, const std::size_t copy_amount,
const u8* const host_ptr) {
if constexpr (!UNSAFE) {
HandleRasterizerDownload(GetInteger(current_vaddr), copy_amount);
}
- std::memcpy(dest_buffer, host_ptr, copy_amount);
+ FastMemcpy(dest_buffer, host_ptr, copy_amount);
},
[&](const std::size_t copy_amount) {
dest_buffer = static_cast(dest_buffer) + copy_amount;
});
}
+ bool ReadBlockParallel(const Common::ProcessAddress src_addr, void* dest_buffer,
+ const std::size_t size) {
+ // Calculate chunk size based on thread count
+ const size_t chunk_size = (size + thread_count - 1) / thread_count;
+
+ // Create threads for parallel processing
+ std::vector threads;
+ threads.reserve(thread_count);
+
+ // Create a vector to store the results of each thread
+ std::vector results(thread_count, true);
+
+ // Split the work among threads
+ for (unsigned int i = 0; i < thread_count; ++i) {
+ const size_t offset = i * chunk_size;
+ if (offset >= size) {
+ break;
+ }
+
+ const size_t current_chunk_size = std::min(chunk_size, size - offset);
+ const Common::ProcessAddress current_addr = src_addr + offset;
+ void* current_dest = static_cast(dest_buffer) + offset;
+
+ // Launch thread
+ threads.emplace_back([this, i, current_addr, current_dest, current_chunk_size, &results] {
+ results[i] = ReadBlockImpl(current_addr, current_dest, current_chunk_size);
+ });
+ }
+
+ // Wait for all threads to complete
+ for (auto& thread : threads) {
+ thread.join();
+ }
+
+ // Check if all operations succeeded
+ return std::all_of(results.begin(), results.end(), [](bool result) { return result; });
+ }
+
bool ReadBlock(const Common::ProcessAddress src_addr, void* dest_buffer,
const std::size_t size) {
- return ReadBlockImpl(src_addr, dest_buffer, size);
+ // For small reads, use the regular implementation
+ if (size < PARALLEL_THRESHOLD) {
+ return ReadBlockImpl(src_addr, dest_buffer, size);
+ }
+
+ // For large reads, use parallel implementation
+ return ReadBlockParallel(src_addr, dest_buffer, size);
}
bool ReadBlockUnsafe(const Common::ProcessAddress src_addr, void* dest_buffer,
@@ -363,23 +512,67 @@ struct Memory::Impl {
GetInteger(current_vaddr), GetInteger(dest_addr), size);
},
[&](const std::size_t copy_amount, u8* const dest_ptr) {
- std::memcpy(dest_ptr, src_buffer, copy_amount);
+ FastMemcpy(dest_ptr, src_buffer, copy_amount);
},
[&](const Common::ProcessAddress current_vaddr, const std::size_t copy_amount,
u8* const host_ptr) {
if constexpr (!UNSAFE) {
HandleRasterizerWrite(GetInteger(current_vaddr), copy_amount);
}
- std::memcpy(host_ptr, src_buffer, copy_amount);
+ FastMemcpy(host_ptr, src_buffer, copy_amount);
},
[&](const std::size_t copy_amount) {
src_buffer = static_cast(src_buffer) + copy_amount;
});
}
+ bool WriteBlockParallel(const Common::ProcessAddress dest_addr, const void* src_buffer,
+ const std::size_t size) {
+ // Calculate chunk size based on thread count
+ const size_t chunk_size = (size + thread_count - 1) / thread_count;
+
+ // Create threads for parallel processing
+ std::vector threads;
+ threads.reserve(thread_count);
+
+ // Create a vector to store the results of each thread
+ std::vector results(thread_count, true);
+
+ // Split the work among threads
+ for (unsigned int i = 0; i < thread_count; ++i) {
+ const size_t offset = i * chunk_size;
+ if (offset >= size) {
+ break;
+ }
+
+ const size_t current_chunk_size = std::min(chunk_size, size - offset);
+ const Common::ProcessAddress current_addr = dest_addr + offset;
+ const void* current_src = static_cast(src_buffer) + offset;
+
+ // Launch thread
+ threads.emplace_back([this, i, current_addr, current_src, current_chunk_size, &results] {
+ results[i] = WriteBlockImpl(current_addr, current_src, current_chunk_size);
+ });
+ }
+
+ // Wait for all threads to complete
+ for (auto& thread : threads) {
+ thread.join();
+ }
+
+ // Check if all operations succeeded
+ return std::all_of(results.begin(), results.end(), [](bool result) { return result; });
+ }
+
bool WriteBlock(const Common::ProcessAddress dest_addr, const void* src_buffer,
const std::size_t size) {
- return WriteBlockImpl(dest_addr, src_buffer, size);
+ // For small writes, use the regular implementation
+ if (size < PARALLEL_THRESHOLD) {
+ return WriteBlockImpl(dest_addr, src_buffer, size);
+ }
+
+ // For large writes, use parallel implementation
+ return WriteBlockParallel(dest_addr, src_buffer, size);
}
bool WriteBlockUnsafe(const Common::ProcessAddress dest_addr, const void* src_buffer,
@@ -397,12 +590,12 @@ struct Memory::Impl {
GetInteger(current_vaddr), GetInteger(dest_addr), size);
},
[](const std::size_t copy_amount, u8* const dest_ptr) {
- std::memset(dest_ptr, 0, copy_amount);
+ FastMemset(dest_ptr, 0, copy_amount);
},
[&](const Common::ProcessAddress current_vaddr, const std::size_t copy_amount,
u8* const host_ptr) {
HandleRasterizerWrite(GetInteger(current_vaddr), copy_amount);
- std::memset(host_ptr, 0, copy_amount);
+ FastMemset(host_ptr, 0, copy_amount);
},
[](const std::size_t copy_amount) {});
}
@@ -733,16 +926,71 @@ struct Memory::Impl {
*/
template
T Read(Common::ProcessAddress vaddr) {
+ // Fast path for aligned reads of common sizes
+ const u64 addr = GetInteger(vaddr);
+ if constexpr (std::is_same_v || std::is_same_v) {
+ // 8-bit reads are always aligned
+ const u8* const ptr = GetPointerImpl(
+ addr,
+ [addr]() {
+ LOG_ERROR(HW_Memory, "Unmapped Read8 @ 0x{:016X}", addr);
+ },
+ [&]() { HandleRasterizerDownload(addr, sizeof(T)); });
+ if (ptr) {
+ return static_cast(*ptr);
+ }
+ return 0;
+ } else if constexpr (std::is_same_v || std::is_same_v) {
+ // Check alignment for 16-bit reads
+ if ((addr & 1) == 0) {
+ const u8* const ptr = GetPointerImpl(
+ addr,
+ [addr]() {
+ LOG_ERROR(HW_Memory, "Unmapped Read16 @ 0x{:016X}", addr);
+ },
+ [&]() { HandleRasterizerDownload(addr, sizeof(T)); });
+ if (ptr) {
+ return static_cast(*reinterpret_cast(ptr));
+ }
+ }
+ } else if constexpr (std::is_same_v || std::is_same_v) {
+ // Check alignment for 32-bit reads
+ if ((addr & 3) == 0) {
+ const u8* const ptr = GetPointerImpl(
+ addr,
+ [addr]() {
+ LOG_ERROR(HW_Memory, "Unmapped Read32 @ 0x{:016X}", addr);
+ },
+ [&]() { HandleRasterizerDownload(addr, sizeof(T)); });
+ if (ptr) {
+ return static_cast(*reinterpret_cast(ptr));
+ }
+ }
+ } else if constexpr (std::is_same_v || std::is_same_v) {
+ // Check alignment for 64-bit reads
+ if ((addr & 7) == 0) {
+ const u8* const ptr = GetPointerImpl(
+ addr,
+ [addr]() {
+ LOG_ERROR(HW_Memory, "Unmapped Read64 @ 0x{:016X}", addr);
+ },
+ [&]() { HandleRasterizerDownload(addr, sizeof(T)); });
+ if (ptr) {
+ return static_cast(*reinterpret_cast(ptr));
+ }
+ }
+ }
+
+ // Fall back to the general case for other types or unaligned access
T result = 0;
const u8* const ptr = GetPointerImpl(
- GetInteger(vaddr),
- [vaddr]() {
- LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:016X}", sizeof(T) * 8,
- GetInteger(vaddr));
+ addr,
+ [addr]() {
+ LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:016X}", sizeof(T) * 8, addr);
},
- [&]() { HandleRasterizerDownload(GetInteger(vaddr), sizeof(T)); });
+ [&]() { HandleRasterizerDownload(addr, sizeof(T)); });
if (ptr) {
- std::memcpy(&result, ptr, sizeof(T));
+ FastMemcpy(&result, ptr, sizeof(T));
}
return result;
}
@@ -758,15 +1006,78 @@ struct Memory::Impl {
*/
template
void Write(Common::ProcessAddress vaddr, const T data) {
+ // Fast path for aligned writes of common sizes
+ const u64 addr = GetInteger(vaddr);
+ if constexpr (std::is_same_v || std::is_same_v) {
+ // 8-bit writes are always aligned
+ u8* const ptr = GetPointerImpl(
+ addr,
+ [addr, data]() {
+ LOG_ERROR(HW_Memory, "Unmapped Write8 @ 0x{:016X} = 0x{:02X}", addr,
+ static_cast(data));
+ },
+ [&]() { HandleRasterizerWrite(addr, sizeof(T)); });
+ if (ptr) {
+ *ptr = static_cast(data);
+ }
+ return;
+ } else if constexpr (std::is_same_v || std::is_same_v) {
+ // Check alignment for 16-bit writes
+ if ((addr & 1) == 0) {
+ u8* const ptr = GetPointerImpl(
+ addr,
+ [addr, data]() {
+ LOG_ERROR(HW_Memory, "Unmapped Write16 @ 0x{:016X} = 0x{:04X}", addr,
+ static_cast(data));
+ },
+ [&]() { HandleRasterizerWrite(addr, sizeof(T)); });
+ if (ptr) {
+ *reinterpret_cast(ptr) = static_cast(data);
+ return;
+ }
+ }
+ } else if constexpr (std::is_same_v || std::is_same_v) {
+ // Check alignment for 32-bit writes
+ if ((addr & 3) == 0) {
+ u8* const ptr = GetPointerImpl(
+ addr,
+ [addr, data]() {
+ LOG_ERROR(HW_Memory, "Unmapped Write32 @ 0x{:016X} = 0x{:08X}", addr,
+ static_cast(data));
+ },
+ [&]() { HandleRasterizerWrite(addr, sizeof(T)); });
+ if (ptr) {
+ *reinterpret_cast(ptr) = static_cast(data);
+ return;
+ }
+ }
+ } else if constexpr (std::is_same_v || std::is_same_v) {
+ // Check alignment for 64-bit writes
+ if ((addr & 7) == 0) {
+ u8* const ptr = GetPointerImpl(
+ addr,
+ [addr, data]() {
+ LOG_ERROR(HW_Memory, "Unmapped Write64 @ 0x{:016X} = 0x{:016X}", addr,
+ static_cast(data));
+ },
+ [&]() { HandleRasterizerWrite(addr, sizeof(T)); });
+ if (ptr) {
+ *reinterpret_cast(ptr) = static_cast(data);
+ return;
+ }
+ }
+ }
+
+ // Fall back to the general case for other types or unaligned access
u8* const ptr = GetPointerImpl(
- GetInteger(vaddr),
- [vaddr, data]() {
+ addr,
+ [addr, data]() {
LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8,
- GetInteger(vaddr), static_cast(data));
+ addr, static_cast(data));
},
- [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(T)); });
+ [&]() { HandleRasterizerWrite(addr, sizeof(T)); });
if (ptr) {
- std::memcpy(ptr, &data, sizeof(T));
+ FastMemcpy(ptr, &data, sizeof(T));
}
}
@@ -878,6 +1189,12 @@ struct Memory::Impl {
Core::System& system;
Tegra::MaxwellDeviceMemoryManager* gpu_device_memory{};
Common::PageTable* current_page_table = nullptr;
+
+ // Number of threads to use for parallel memory operations
+ unsigned int thread_count = 2;
+
+ // Minimum size in bytes for which parallel processing is beneficial
+ static constexpr size_t PARALLEL_THRESHOLD = 64 * 1024; // 64 KB
std::array
rasterizer_read_areas{};
std::array rasterizer_write_areas{};
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index c4fe8235c7..ebc50290a9 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -106,11 +106,20 @@ RendererVulkan::RendererVulkan(Core::Frontend::EmuWindow& emu_window,
std::unique_ptr context_) try
: RendererBase(emu_window, std::move(context_)), device_memory(device_memory_), gpu(gpu_),
library(OpenLibrary(context.get())),
+ // Create raw Vulkan instance first
instance(CreateInstance(*library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
- Settings::values.renderer_debug.GetValue())),
+ Settings::values.renderer_debug.GetValue())),
+ // Now create RAII wrappers for the resources in the correct order
+ managed_instance(MakeManagedInstance(instance, dld)),
+ // Create debug messenger if debug is enabled
debug_messenger(Settings::values.renderer_debug ? CreateDebugUtilsCallback(instance)
- : vk::DebugUtilsMessenger{}),
+ : vk::DebugUtilsMessenger{}),
+ managed_debug_messenger(Settings::values.renderer_debug
+ ? MakeManagedDebugUtilsMessenger(debug_messenger, instance, dld)
+ : ManagedDebugUtilsMessenger{}),
+ // Create surface
surface(CreateSurface(instance, render_window.GetWindowInfo())),
+ managed_surface(MakeManagedSurface(surface, instance, dld)),
device(CreateDevice(instance, dld, *surface)), memory_allocator(device), state_tracker(),
scheduler(device, state_tracker),
swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width,
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index 0a606d6fed..748981cc6f 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -21,6 +21,7 @@
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/hybrid_memory.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
+#include "video_core/vulkan_common/vulkan_raii.h"
namespace Core::Memory {
class Memory;
@@ -76,9 +77,18 @@ private:
std::shared_ptr library;
vk::InstanceDispatch dld;
+ // Keep original handles for compatibility with existing code
vk::Instance instance;
+ // RAII wrapper for instance
+ ManagedInstance managed_instance;
+
vk::DebugUtilsMessenger debug_messenger;
+ // RAII wrapper for debug messenger
+ ManagedDebugUtilsMessenger managed_debug_messenger;
+
vk::SurfaceKHR surface;
+ // RAII wrapper for surface
+ ManagedSurface managed_surface;
Device device;
MemoryAllocator memory_allocator;
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 9426d31faf..dc7223227a 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -290,9 +290,9 @@ std::unordered_map GetFormatProperties(vk::Physica
void OverrideBcnFormats(std::unordered_map& format_properties) {
// These properties are extracted from Adreno driver 512.687.0
constexpr VkFormatFeatureFlags tiling_features{
- VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT |
- VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT | VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
- VK_FORMAT_FEATURE_TRANSFER_DST_BIT};
+ VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT |
+ VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT | VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
+ VK_FORMAT_FEATURE_TRANSFER_DST_BIT};
constexpr VkFormatFeatureFlags buffer_features{VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT};
@@ -388,11 +388,11 @@ void Device::RemoveExtensionFeature(bool& extension, Feature& feature,
// Unload extension.
this->RemoveExtension(extension, extension_name);
- // Save sType and pNext for chain.
+ // Save sType and pNext for chain.
VkStructureType sType = feature.sType;
void* pNext = feature.pNext;
- // Clear feature struct and restore chain.
+ // Clear feature struct and restore chain.
feature = {};
feature.sType = sType;
feature.pNext = pNext;
@@ -442,8 +442,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
SetupFamilies(surface);
const auto queue_cis = GetDeviceQueueCreateInfos();
- // GetSuitability has already configured the linked list of features for us.
- // Reuse it here.
+ // GetSuitability has already configured the linked list of features for us.
+ // Reuse it here.
const void* first_next = &features2;
VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv{};
@@ -484,16 +484,10 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
LOG_WARNING(Render_Vulkan,
"Qualcomm and Turnip drivers have broken VK_EXT_custom_border_color");
//RemoveExtensionFeature(extensions.custom_border_color, features.custom_border_color,
- //VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
+ //VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
}
if (is_qualcomm) {
- must_emulate_scaled_formats = false;
-
- LOG_WARNING(Render_Vulkan, "Qualcomm drivers have broken VK_EXT_extended_dynamic_state");
- //RemoveExtensionFeature(extensions.extended_dynamic_state, features.extended_dynamic_state,
- //VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
-
LOG_WARNING(Render_Vulkan,
"Qualcomm drivers have a slow VK_KHR_push_descriptor implementation");
RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
@@ -521,14 +515,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
#endif
}
- if (is_arm) {
- must_emulate_scaled_formats = false;
-
- LOG_WARNING(Render_Vulkan, "ARM drivers have broken VK_EXT_extended_dynamic_state");
- //RemoveExtensionFeature(extensions.extended_dynamic_state, features.extended_dynamic_state,
- //VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
- }
-
if (is_nvidia) {
const u32 nv_major_version = (properties.properties.driverVersion >> 22) & 0x3ff;
const auto arch = GetNvidiaArch();
@@ -553,8 +539,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
LOG_WARNING(Render_Vulkan,
"RADV versions older than 21.2 have broken VK_EXT_extended_dynamic_state");
//RemoveExtensionFeature(extensions.extended_dynamic_state,
- //features.extended_dynamic_state,
- //VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
+ //features.extended_dynamic_state,
+ //VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
}
}
if (extensions.extended_dynamic_state2 && is_radv) {
@@ -563,9 +549,9 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
LOG_WARNING(
Render_Vulkan,
"RADV versions older than 22.3.1 have broken VK_EXT_extended_dynamic_state2");
- // RemoveExtensionFeature(extensions.extended_dynamic_state2,
- // features.extended_dynamic_state2,
- // VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME);
+ // RemoveExtensionFeature(extensions.extended_dynamic_state2,
+ // features.extended_dynamic_state2,
+ // VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME);
}
}
if (extensions.extended_dynamic_state2 && is_qualcomm) {
@@ -576,8 +562,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
LOG_WARNING(Render_Vulkan,
"Qualcomm Adreno 7xx drivers have broken VK_EXT_extended_dynamic_state2");
//RemoveExtensionFeature(extensions.extended_dynamic_state2,
- //features.extended_dynamic_state2,
- //VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME);
+ //features.extended_dynamic_state2,
+ //VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME);
}
}
if (extensions.extended_dynamic_state3 && is_radv) {
@@ -610,9 +596,9 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
if (is_rdna2) {
LOG_WARNING(Render_Vulkan,
"RADV has broken VK_EXT_vertex_input_dynamic_state on RDNA2 hardware");
- // RemoveExtensionFeature(extensions.vertex_input_dynamic_state,
- // features.vertex_input_dynamic_state,
- // VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
+ // RemoveExtensionFeature(extensions.vertex_input_dynamic_state,
+ // features.vertex_input_dynamic_state,
+ // VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
}
}
if (extensions.vertex_input_dynamic_state && is_qualcomm) {
@@ -620,8 +606,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
LOG_WARNING(Render_Vulkan,
"Qualcomm drivers have broken VK_EXT_vertex_input_dynamic_state");
//RemoveExtensionFeature(extensions.vertex_input_dynamic_state,
- // features.vertex_input_dynamic_state,
- // VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
+ // features.vertex_input_dynamic_state,
+ // VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
}
sets_per_pool = 64;
@@ -725,6 +711,37 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
dynamic_state3_enables = true;
}
+ if (Settings::values.dyna_state.GetValue() == 0) {
+ must_emulate_scaled_formats = true;
+ LOG_INFO(Render_Vulkan, "Dynamic state is disabled (dyna_state = 0), forcing scaled format emulation ON");
+
+ // Remove all dynamic state 1-2 extensions and features
+ RemoveExtensionFeature(extensions.custom_border_color, features.custom_border_color,
+ VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
+
+ RemoveExtensionFeature(extensions.extended_dynamic_state, features.extended_dynamic_state,
+ VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
+
+ RemoveExtensionFeature(extensions.extended_dynamic_state2, features.extended_dynamic_state2,
+ VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME);
+
+ RemoveExtensionFeature(extensions.vertex_input_dynamic_state, features.vertex_input_dynamic_state,
+ VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
+
+ // Disable extended dynamic state 3 features
+ features.extended_dynamic_state3.extendedDynamicState3ColorBlendEnable = false;
+ features.extended_dynamic_state3.extendedDynamicState3ColorBlendEquation = false;
+ features.extended_dynamic_state3.extendedDynamicState3DepthClampEnable = false;
+
+ dynamic_state3_blending = false;
+ dynamic_state3_enables = false;
+
+ LOG_INFO(Render_Vulkan, "Dynamic state extensions and features have been fully disabled");
+ } else {
+ must_emulate_scaled_formats = false;
+ LOG_INFO(Render_Vulkan, "Dynamic state is enabled (dyna_state = 1-3), disabling scaled format emulation");
+ }
+
logical = vk::Device::Create(physical, queue_cis, ExtensionListForVulkan(loaded_extensions),
first_next, dld);
@@ -782,7 +799,7 @@ VkFormat Device::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags
return alternative;
}
- // No alternatives found, panic
+ // No alternatives found, panic
LOG_ERROR(Render_Vulkan,
"Format={} with usage={} and type={} is not supported by the host hardware and "
"doesn't support any of the alternatives",
@@ -793,7 +810,7 @@ VkFormat Device::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags
void Device::ReportLoss() const {
LOG_CRITICAL(Render_Vulkan, "Device loss occurred!");
- // Wait for the log to flush and for Nsight Aftermath to dump the results
+ // Wait for the log to flush and for Nsight Aftermath to dump the results
std::this_thread::sleep_for(std::chrono::seconds{15});
}
@@ -825,9 +842,9 @@ bool Device::ComputeIsOptimalAstcSupported() const {
return false;
}
const auto format_feature_usage{
- VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT |
- VK_FORMAT_FEATURE_BLIT_DST_BIT | VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
- VK_FORMAT_FEATURE_TRANSFER_DST_BIT};
+ VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT |
+ VK_FORMAT_FEATURE_BLIT_DST_BIT | VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
+ VK_FORMAT_FEATURE_TRANSFER_DST_BIT};
for (const auto format : astc_formats) {
const auto physical_format_properties{physical.GetFormatProperties(format)};
if ((physical_format_properties.optimalTilingFeatures & format_feature_usage) == 0) {
@@ -946,36 +963,36 @@ bool Device::GetSuitability(bool requires_swapchain) {
// Assume we will be suitable.
bool suitable = true;
- // Configure properties.
+ // Configure properties.
properties.properties = physical.GetProperties();
- // Set instance version.
+ // Set instance version.
instance_version = properties.properties.apiVersion;
- // Minimum of API version 1.1 is required. (This is well-supported.)
+ // Minimum of API version 1.1 is required. (This is well-supported.)
ASSERT(instance_version >= VK_API_VERSION_1_1);
- // Get available extensions.
+ // Get available extensions.
auto extension_properties = physical.EnumerateDeviceExtensionProperties();
- // Get the set of supported extensions.
+ // Get the set of supported extensions.
supported_extensions.clear();
for (const VkExtensionProperties& property : extension_properties) {
supported_extensions.insert(property.extensionName);
}
- // Generate list of extensions to load.
+ // Generate list of extensions to load.
loaded_extensions.clear();
#define EXTENSION(prefix, macro_name, var_name) \
if (supported_extensions.contains(VK_##prefix##_##macro_name##_EXTENSION_NAME)) { \
- loaded_extensions.insert(VK_##prefix##_##macro_name##_EXTENSION_NAME); \
- extensions.var_name = true; \
+ loaded_extensions.insert(VK_##prefix##_##macro_name##_EXTENSION_NAME); \
+ extensions.var_name = true; \
}
#define FEATURE_EXTENSION(prefix, struct_name, macro_name, var_name) \
if (supported_extensions.contains(VK_##prefix##_##macro_name##_EXTENSION_NAME)) { \
- loaded_extensions.insert(VK_##prefix##_##macro_name##_EXTENSION_NAME); \
- extensions.var_name = true; \
+ loaded_extensions.insert(VK_##prefix##_##macro_name##_EXTENSION_NAME); \
+ extensions.var_name = true; \
}
if (instance_version < VK_API_VERSION_1_2) {
@@ -991,16 +1008,16 @@ bool Device::GetSuitability(bool requires_swapchain) {
#undef FEATURE_EXTENSION
#undef EXTENSION
- // Some extensions are mandatory. Check those.
+ // Some extensions are mandatory. Check those.
#define CHECK_EXTENSION(extension_name) \
if (!loaded_extensions.contains(extension_name)) { \
- LOG_ERROR(Render_Vulkan, "Missing required extension {}", extension_name); \
- suitable = false; \
+ LOG_ERROR(Render_Vulkan, "Missing required extension {}", extension_name); \
+ suitable = false; \
}
#define LOG_EXTENSION(extension_name) \
if (!loaded_extensions.contains(extension_name)) { \
- LOG_INFO(Render_Vulkan, "Device doesn't support extension {}", extension_name); \
+ LOG_INFO(Render_Vulkan, "Device doesn't support extension {}", extension_name); \
}
FOR_EACH_VK_RECOMMENDED_EXTENSION(LOG_EXTENSION);
@@ -1013,24 +1030,24 @@ bool Device::GetSuitability(bool requires_swapchain) {
#undef LOG_EXTENSION
#undef CHECK_EXTENSION
- // Generate the linked list of features to test.
+ // Generate the linked list of features to test.
features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
- // Set next pointer.
+ // Set next pointer.
void** next = &features2.pNext;
- // Test all features we know about. If the feature is not available in core at our
- // current API version, and was not enabled by an extension, skip testing the feature.
- // We set the structure sType explicitly here as it is zeroed by the constructor.
+ // Test all features we know about. If the feature is not available in core at our
+ // current API version, and was not enabled by an extension, skip testing the feature.
+ // We set the structure sType explicitly here as it is zeroed by the constructor.
#define FEATURE(prefix, struct_name, macro_name, var_name) \
features.var_name.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_##macro_name##_FEATURES; \
- SetNext(next, features.var_name);
+ SetNext(next, features.var_name);
#define EXT_FEATURE(prefix, struct_name, macro_name, var_name) \
if (extensions.var_name) { \
- features.var_name.sType = \
+ features.var_name.sType = \
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_##macro_name##_FEATURES_##prefix; \
- SetNext(next, features.var_name); \
+ SetNext(next, features.var_name); \
}
FOR_EACH_VK_FEATURE_1_1(FEATURE);
@@ -1049,20 +1066,20 @@ bool Device::GetSuitability(bool requires_swapchain) {
#undef EXT_FEATURE
#undef FEATURE
- // Perform the feature test.
+ // Perform the feature test.
physical.GetFeatures2(features2);
features.features = features2.features;
- // Some features are mandatory. Check those.
+ // Some features are mandatory. Check those.
#define CHECK_FEATURE(feature, name) \
if (!features.feature.name) { \
- LOG_ERROR(Render_Vulkan, "Missing required feature {}", #name); \
- suitable = false; \
+ LOG_ERROR(Render_Vulkan, "Missing required feature {}", #name); \
+ suitable = false; \
}
#define LOG_FEATURE(feature, name) \
if (!features.feature.name) { \
- LOG_INFO(Render_Vulkan, "Device doesn't support feature {}", #name); \
+ LOG_INFO(Render_Vulkan, "Device doesn't support feature {}", #name); \
}
FOR_EACH_VK_RECOMMENDED_FEATURE(LOG_FEATURE);
@@ -1071,21 +1088,21 @@ bool Device::GetSuitability(bool requires_swapchain) {
#undef LOG_FEATURE
#undef CHECK_FEATURE
- // Generate linked list of properties.
+ // Generate linked list of properties.
properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
- // Set next pointer.
+ // Set next pointer.
next = &properties2.pNext;
- // Get driver info.
+ // Get driver info.
properties.driver.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES;
SetNext(next, properties.driver);
- // Retrieve subgroup properties.
+ // Retrieve subgroup properties.
properties.subgroup_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
SetNext(next, properties.subgroup_properties);
- // Retrieve relevant extension properties.
+ // Retrieve relevant extension properties.
if (extensions.shader_float_controls) {
properties.float_controls.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES;
@@ -1107,14 +1124,14 @@ bool Device::GetSuitability(bool requires_swapchain) {
SetNext(next, properties.transform_feedback);
}
- // Perform the property fetch.
+ // Perform the property fetch.
physical.GetProperties2(properties2);
properties.properties = properties2.properties;
- // Unload extensions if feature support is insufficient.
+ // Unload extensions if feature support is insufficient.
RemoveUnsuitableExtensions();
- // Check limits.
+ // Check limits.
struct Limit {
u32 minimum;
u32 value;
@@ -1123,11 +1140,11 @@ bool Device::GetSuitability(bool requires_swapchain) {
const VkPhysicalDeviceLimits& limits{properties.properties.limits};
const std::array limits_report{
- Limit{65536, limits.maxUniformBufferRange, "maxUniformBufferRange"},
- Limit{16, limits.maxViewports, "maxViewports"},
- Limit{8, limits.maxColorAttachments, "maxColorAttachments"},
- Limit{8, limits.maxClipDistances, "maxClipDistances"},
- };
+ Limit{65536, limits.maxUniformBufferRange, "maxUniformBufferRange"},
+ Limit{16, limits.maxViewports, "maxViewports"},
+ Limit{8, limits.maxColorAttachments, "maxColorAttachments"},
+ Limit{8, limits.maxClipDistances, "maxClipDistances"},
+ };
for (const auto& [min, value, name] : limits_report) {
if (value < min) {
@@ -1136,7 +1153,7 @@ bool Device::GetSuitability(bool requires_swapchain) {
}
}
- // Return whether we were suitable.
+ // Return whether we were suitable.
return suitable;
}
@@ -1147,31 +1164,31 @@ void Device::RemoveUnsuitableExtensions() {
RemoveExtensionFeatureIfUnsuitable(extensions.custom_border_color, features.custom_border_color,
VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
- // VK_EXT_depth_bias_control
+ // VK_EXT_depth_bias_control
extensions.depth_bias_control =
features.depth_bias_control.depthBiasControl &&
features.depth_bias_control.leastRepresentableValueForceUnormRepresentation;
RemoveExtensionFeatureIfUnsuitable(extensions.depth_bias_control, features.depth_bias_control,
VK_EXT_DEPTH_BIAS_CONTROL_EXTENSION_NAME);
- // VK_EXT_depth_clip_control
+ // VK_EXT_depth_clip_control
extensions.depth_clip_control = features.depth_clip_control.depthClipControl;
RemoveExtensionFeatureIfUnsuitable(extensions.depth_clip_control, features.depth_clip_control,
VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME);
- /* */ // VK_EXT_extended_dynamic_state
+ /* */ // VK_EXT_extended_dynamic_state
extensions.extended_dynamic_state = features.extended_dynamic_state.extendedDynamicState;
RemoveExtensionFeatureIfUnsuitable(extensions.extended_dynamic_state,
features.extended_dynamic_state,
VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
- // VK_EXT_extended_dynamic_state2
+ // VK_EXT_extended_dynamic_state2
extensions.extended_dynamic_state2 = features.extended_dynamic_state2.extendedDynamicState2;
RemoveExtensionFeatureIfUnsuitable(extensions.extended_dynamic_state2,
features.extended_dynamic_state2,
VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME);
- // VK_EXT_extended_dynamic_state3
+ // VK_EXT_extended_dynamic_state3
dynamic_state3_blending =
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEnable &&
features.extended_dynamic_state3.extendedDynamicState3ColorBlendEquation &&
@@ -1187,27 +1204,27 @@ void Device::RemoveUnsuitableExtensions() {
features.extended_dynamic_state3,
VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
- // VK_EXT_provoking_vertex
+ // VK_EXT_provoking_vertex
extensions.provoking_vertex =
features.provoking_vertex.provokingVertexLast &&
features.provoking_vertex.transformFeedbackPreservesProvokingVertex;
RemoveExtensionFeatureIfUnsuitable(extensions.provoking_vertex, features.provoking_vertex,
VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME);
- // VK_KHR_shader_atomic_int64
+ // VK_KHR_shader_atomic_int64
extensions.shader_atomic_int64 = features.shader_atomic_int64.shaderBufferInt64Atomics &&
features.shader_atomic_int64.shaderSharedInt64Atomics;
RemoveExtensionFeatureIfUnsuitable(extensions.shader_atomic_int64, features.shader_atomic_int64,
VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME);
- // VK_EXT_shader_demote_to_helper_invocation
+ // VK_EXT_shader_demote_to_helper_invocation
extensions.shader_demote_to_helper_invocation =
features.shader_demote_to_helper_invocation.shaderDemoteToHelperInvocation;
RemoveExtensionFeatureIfUnsuitable(extensions.shader_demote_to_helper_invocation,
features.shader_demote_to_helper_invocation,
VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME);
- // VK_EXT_subgroup_size_control
+ // VK_EXT_subgroup_size_control
extensions.subgroup_size_control =
features.subgroup_size_control.subgroupSizeControl &&
properties.subgroup_size_control.minSubgroupSize <= GuestWarpSize &&
@@ -1216,7 +1233,7 @@ void Device::RemoveUnsuitableExtensions() {
features.subgroup_size_control,
VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME);
- // VK_EXT_transform_feedback
+ // VK_EXT_transform_feedback
extensions.transform_feedback =
features.transform_feedback.transformFeedback &&
features.transform_feedback.geometryStreams &&
@@ -1227,14 +1244,14 @@ void Device::RemoveUnsuitableExtensions() {
RemoveExtensionFeatureIfUnsuitable(extensions.transform_feedback, features.transform_feedback,
VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME);
- // VK_EXT_vertex_input_dynamic_state
+ // VK_EXT_vertex_input_dynamic_state
extensions.vertex_input_dynamic_state =
features.vertex_input_dynamic_state.vertexInputDynamicState;
RemoveExtensionFeatureIfUnsuitable(extensions.vertex_input_dynamic_state,
features.vertex_input_dynamic_state,
VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
- // VK_KHR_pipeline_executable_properties
+ // VK_KHR_pipeline_executable_properties
if (Settings::values.renderer_shader_feedback.GetValue()) {
extensions.pipeline_executable_properties =
features.pipeline_executable_properties.pipelineExecutableInfo;
@@ -1247,7 +1264,7 @@ void Device::RemoveUnsuitableExtensions() {
VK_KHR_PIPELINE_EXECUTABLE_PROPERTIES_EXTENSION_NAME);
}
- // VK_KHR_workgroup_memory_explicit_layout
+ // VK_KHR_workgroup_memory_explicit_layout
extensions.workgroup_memory_explicit_layout =
features.features.shaderInt16 &&
features.workgroup_memory_explicit_layout.workgroupMemoryExplicitLayout &&
diff --git a/src/video_core/vulkan_common/vulkan_raii.h b/src/video_core/vulkan_common/vulkan_raii.h
new file mode 100644
index 0000000000..cf5e268b68
--- /dev/null
+++ b/src/video_core/vulkan_common/vulkan_raii.h
@@ -0,0 +1,231 @@
+// SPDX-FileCopyrightText: Copyright 2025 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include
+#include
+#include
+#include
+
+#include "common/logging/log.h"
+
+#include "video_core/vulkan_common/vulkan_wrapper.h"
+
+namespace Vulkan {
+
+/**
+ * RAII wrapper for Vulkan resources.
+ * Automatically manages the lifetime of Vulkan objects using RAII principles.
+ */
+template
+class VulkanRaii {
+public:
+ using DeleterFunc = std::function;
+
+ // Default constructor - creates a null handle
+ VulkanRaii() : handle{}, deleter{}, dispatch{} {}
+
+ // Constructor with handle and deleter
+ VulkanRaii(T handle_, DeleterFunc deleter_, const Dispatch& dispatch_, const char* resource_name = "Vulkan resource")
+ : handle{handle_}, deleter{std::move(deleter_)}, dispatch{dispatch_} {
+ LOG_DEBUG(Render_Vulkan, "RAII wrapper created for {}", resource_name);
+ }
+
+ // Move constructor
+ VulkanRaii(VulkanRaii&& other) noexcept
+ : handle{std::exchange(other.handle, VK_NULL_HANDLE)},
+ deleter{std::move(other.deleter)},
+ dispatch{other.dispatch} {
+ }
+
+ // Move assignment
+ VulkanRaii& operator=(VulkanRaii&& other) noexcept {
+ if (this != &other) {
+ cleanup();
+ handle = std::exchange(other.handle, VK_NULL_HANDLE);
+ deleter = std::move(other.deleter);
+ dispatch = other.dispatch;
+ }
+ return *this;
+ }
+
+ // Destructor - automatically cleans up the resource
+ ~VulkanRaii() {
+ cleanup();
+ }
+
+ // Disallow copying
+ VulkanRaii(const VulkanRaii&) = delete;
+ VulkanRaii& operator=(const VulkanRaii&) = delete;
+
+ // Get the underlying handle
+ T get() const noexcept {
+ return handle;
+ }
+
+ // Check if the handle is valid
+ bool valid() const noexcept {
+ return handle != VK_NULL_HANDLE;
+ }
+
+ // Release ownership of the handle without destroying it
+ T release() noexcept {
+ return std::exchange(handle, VK_NULL_HANDLE);
+ }
+
+ // Reset the handle (destroying the current one if it exists)
+ void reset(T new_handle = VK_NULL_HANDLE, DeleterFunc new_deleter = {}) {
+ cleanup();
+ handle = new_handle;
+ deleter = std::move(new_deleter);
+ }
+
+ // Implicit conversion to handle type
+ operator T() const noexcept {
+ return handle;
+ }
+
+ // Dereference operator for pointer-like access
+ T operator->() const noexcept {
+ return handle;
+ }
+
+private:
+ // Optimized cleanup function
+ void cleanup() noexcept {
+ if (handle != VK_NULL_HANDLE && deleter) {
+ deleter(handle, dispatch);
+ handle = VK_NULL_HANDLE;
+ }
+ }
+
+ T handle;
+ DeleterFunc deleter;
+ Dispatch dispatch;
+};
+
+// Common type aliases for Vulkan RAII wrappers with clearer names
+using ManagedInstance = VulkanRaii;
+using ManagedDevice = VulkanRaii;
+using ManagedSurface = VulkanRaii;
+using ManagedSwapchain = VulkanRaii;
+using ManagedCommandPool = VulkanRaii;
+using ManagedBuffer = VulkanRaii;
+using ManagedImage = VulkanRaii;
+using ManagedImageView = VulkanRaii;
+using ManagedSampler = VulkanRaii;
+using ManagedShaderModule = VulkanRaii;
+using ManagedPipeline = VulkanRaii;
+using ManagedPipelineLayout = VulkanRaii;
+using ManagedDescriptorSetLayout = VulkanRaii;
+using ManagedDescriptorPool = VulkanRaii;
+using ManagedSemaphore = VulkanRaii;
+using ManagedFence = VulkanRaii;
+using ManagedDebugUtilsMessenger = VulkanRaii;
+
+// Helper functions to create RAII wrappers
+
+/**
+ * Creates an RAII wrapper for a Vulkan instance
+ */
+inline ManagedInstance MakeManagedInstance(const vk::Instance& instance, const vk::InstanceDispatch& dispatch) {
+ auto deleter = [](VkInstance handle, const vk::InstanceDispatch& dld) {
+ dld.vkDestroyInstance(handle, nullptr);
+ };
+ return ManagedInstance(*instance, deleter, dispatch, "VkInstance");
+}
+
+/**
+ * Creates an RAII wrapper for a Vulkan device
+ */
+inline ManagedDevice MakeManagedDevice(const vk::Device& device, const vk::DeviceDispatch& dispatch) {
+ auto deleter = [](VkDevice handle, const vk::DeviceDispatch& dld) {
+ dld.vkDestroyDevice(handle, nullptr);
+ };
+ return ManagedDevice(*device, deleter, dispatch, "VkDevice");
+}
+
+/**
+ * Creates an RAII wrapper for a Vulkan surface
+ */
+inline ManagedSurface MakeManagedSurface(const vk::SurfaceKHR& surface, const vk::Instance& instance, const vk::InstanceDispatch& dispatch) {
+ auto deleter = [instance_ptr = *instance](VkSurfaceKHR handle, const vk::InstanceDispatch& dld) {
+ dld.vkDestroySurfaceKHR(instance_ptr, handle, nullptr);
+ };
+ return ManagedSurface(*surface, deleter, dispatch, "VkSurfaceKHR");
+}
+
+/**
+ * Creates an RAII wrapper for a Vulkan debug messenger
+ */
+inline ManagedDebugUtilsMessenger MakeManagedDebugUtilsMessenger(const vk::DebugUtilsMessenger& messenger,
+ const vk::Instance& instance,
+ const vk::InstanceDispatch& dispatch) {
+ auto deleter = [instance_ptr = *instance](VkDebugUtilsMessengerEXT handle, const vk::InstanceDispatch& dld) {
+ dld.vkDestroyDebugUtilsMessengerEXT(instance_ptr, handle, nullptr);
+ };
+ return ManagedDebugUtilsMessenger(*messenger, deleter, dispatch, "VkDebugUtilsMessengerEXT");
+}
+
+/**
+ * Creates an RAII wrapper for a Vulkan swapchain
+ */
+inline ManagedSwapchain MakeManagedSwapchain(VkSwapchainKHR swapchain_handle, VkDevice device_handle, const vk::DeviceDispatch& dispatch) {
+ auto deleter = [device_handle](VkSwapchainKHR handle, const vk::DeviceDispatch& dld) {
+ dld.vkDestroySwapchainKHR(device_handle, handle, nullptr);
+ };
+ return ManagedSwapchain(swapchain_handle, deleter, dispatch, "VkSwapchainKHR");
+}
+
+/**
+ * Creates an RAII wrapper for a Vulkan buffer
+ */
+inline ManagedBuffer MakeManagedBuffer(VkBuffer buffer_handle, VkDevice device_handle, const vk::DeviceDispatch& dispatch) {
+ auto deleter = [device_handle](VkBuffer handle, const vk::DeviceDispatch& dld) {
+ dld.vkDestroyBuffer(device_handle, handle, nullptr);
+ };
+ return ManagedBuffer(buffer_handle, deleter, dispatch, "VkBuffer");
+}
+
+/**
+ * Creates an RAII wrapper for a Vulkan image
+ */
+inline ManagedImage MakeManagedImage(VkImage image_handle, VkDevice device_handle, const vk::DeviceDispatch& dispatch) {
+ auto deleter = [device_handle](VkImage handle, const vk::DeviceDispatch& dld) {
+ dld.vkDestroyImage(device_handle, handle, nullptr);
+ };
+ return ManagedImage(image_handle, deleter, dispatch, "VkImage");
+}
+
+/**
+ * Creates an RAII wrapper for a Vulkan image view
+ */
+inline ManagedImageView MakeManagedImageView(VkImageView view_handle, VkDevice device_handle, const vk::DeviceDispatch& dispatch) {
+ auto deleter = [device_handle](VkImageView handle, const vk::DeviceDispatch& dld) {
+ dld.vkDestroyImageView(device_handle, handle, nullptr);
+ };
+ return ManagedImageView(view_handle, deleter, dispatch, "VkImageView");
+}
+
+/**
+ * Creates an RAII wrapper for a Vulkan semaphore
+ */
+inline ManagedSemaphore MakeManagedSemaphore(VkSemaphore semaphore_handle, VkDevice device_handle, const vk::DeviceDispatch& dispatch) {
+ auto deleter = [device_handle](VkSemaphore handle, const vk::DeviceDispatch& dld) {
+ dld.vkDestroySemaphore(device_handle, handle, nullptr);
+ };
+ return ManagedSemaphore(semaphore_handle, deleter, dispatch, "VkSemaphore");
+}
+
+/**
+ * Creates an RAII wrapper for a Vulkan fence
+ */
+inline ManagedFence MakeManagedFence(VkFence fence_handle, VkDevice device_handle, const vk::DeviceDispatch& dispatch) {
+ auto deleter = [device_handle](VkFence handle, const vk::DeviceDispatch& dld) {
+ dld.vkDestroyFence(device_handle, handle, nullptr);
+ };
+ return ManagedFence(fence_handle, deleter, dispatch, "VkFence");
+}
+
+} // namespace Vulkan
\ No newline at end of file