Memory Tracking: Optimize tracking to only use atomic writes when contested with the host GPU

This commit is contained in:
Fernando Sahmkow 2023-06-28 19:32:50 +02:00
parent 47d0d292d5
commit da440da9f5
19 changed files with 153 additions and 38 deletions

View file

@ -3,6 +3,7 @@
#include <algorithm>
#include <cstring>
#include <span>
#include "common/assert.h"
#include "common/atomic_ops.h"
@ -679,7 +680,7 @@ struct Memory::Impl {
LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8,
GetInteger(vaddr), static_cast<u64>(data));
},
[&]() { system.CurrentGPUDirtyMemoryManager().Collect(GetInteger(vaddr), sizeof(T)); });
[&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(T)); });
if (ptr) {
std::memcpy(ptr, &data, sizeof(T));
}
@ -693,7 +694,7 @@ struct Memory::Impl {
LOG_ERROR(HW_Memory, "Unmapped WriteExclusive{} @ 0x{:016X} = 0x{:016X}",
sizeof(T) * 8, GetInteger(vaddr), static_cast<u64>(data));
},
[&]() { system.CurrentGPUDirtyMemoryManager().Collect(GetInteger(vaddr), sizeof(T)); });
[&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(T)); });
if (ptr) {
const auto volatile_pointer = reinterpret_cast<volatile T*>(ptr);
return Common::AtomicCompareAndSwap(volatile_pointer, data, expected);
@ -708,7 +709,7 @@ struct Memory::Impl {
LOG_ERROR(HW_Memory, "Unmapped WriteExclusive128 @ 0x{:016X} = 0x{:016X}{:016X}",
GetInteger(vaddr), static_cast<u64>(data[1]), static_cast<u64>(data[0]));
},
[&]() { system.CurrentGPUDirtyMemoryManager().Collect(GetInteger(vaddr), sizeof(u128)); });
[&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(u128)); });
if (ptr) {
const auto volatile_pointer = reinterpret_cast<volatile u64*>(ptr);
return Common::AtomicCompareAndSwap(volatile_pointer, data, expected);
@ -718,7 +719,7 @@ struct Memory::Impl {
void HandleRasterizerDownload(VAddr address, size_t size) {
const size_t core = system.GetCurrentHostThreadID();
auto& current_area = rasterizer_areas[core];
auto& current_area = rasterizer_read_areas[core];
const VAddr end_address = address + size;
if (current_area.start_address <= address && end_address <= current_area.end_address)
[[likely]] {
@ -727,9 +728,31 @@ struct Memory::Impl {
current_area = system.GPU().OnCPURead(address, size);
}
Common::PageTable* current_page_table = nullptr;
std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES> rasterizer_areas{};
void HandleRasterizerWrite(VAddr address, size_t size) {
const size_t core = system.GetCurrentHostThreadID();
auto& current_area = rasterizer_write_areas[core];
VAddr subaddress = address >> YUZU_PAGEBITS;
bool do_collection = current_area.last_address == subaddress;
if (!do_collection) [[unlikely]] {
do_collection = system.GPU().OnCPUWrite(address, size);
if (!do_collection) {
return;
}
current_area.last_address = subaddress;
}
gpu_dirty_managers[core].Collect(address, size);
}
struct GPUDirtyState {
VAddr last_address;
};
Core::System& system;
Common::PageTable* current_page_table = nullptr;
std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES>
rasterizer_read_areas{};
std::array<GPUDirtyState, Core::Hardware::NUM_CPU_CORES> rasterizer_write_areas{};
std::span<Core::GPUDirtyMemoryManager> gpu_dirty_managers;
};
Memory::Memory(Core::System& system_) : system{system_} {
@ -877,6 +900,10 @@ void Memory::ZeroBlock(Common::ProcessAddress dest_addr, const std::size_t size)
impl->ZeroBlock(*system.ApplicationProcess(), dest_addr, size);
}
void Memory::SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers) {
impl->gpu_dirty_managers = managers;
}
Result Memory::InvalidateDataCache(Common::ProcessAddress dest_addr, const std::size_t size) {
return impl->InvalidateDataCache(*system.ApplicationProcess(), dest_addr, size);
}