From d777ec15f822e206279b6420bbb7becb7563069b Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Thu, 5 Jan 2023 06:43:54 -0500
Subject: [PATCH] BufferBase: Don't ignore GPU pages.

---
 src/tests/video_core/buffer_base.cpp             |  2 +-
 src/video_core/buffer_cache/buffer_base.h        | 14 ++++++--------
 src/video_core/engines/maxwell_dma.cpp           |  4 ++--
 src/video_core/invalidation_accumulator.h        | 13 +++++++------
 src/video_core/memory_manager.cpp                |  2 +-
 src/video_core/memory_manager.h                  |  2 +-
 src/video_core/rasterizer_interface.h            |  2 +-
 src/video_core/renderer_vulkan/vk_rasterizer.cpp |  6 +++---
 8 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/src/tests/video_core/buffer_base.cpp b/src/tests/video_core/buffer_base.cpp
index f7236afabf..5cd0628f22 100644
--- a/src/tests/video_core/buffer_base.cpp
+++ b/src/tests/video_core/buffer_base.cpp
@@ -538,7 +538,7 @@ TEST_CASE("BufferBase: Cached write downloads") {
     int num = 0;
     buffer.ForEachDownloadRangeAndClear(c, WORD, [&](u64 offset, u64 size) { ++num; });
     buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
-    REQUIRE(num == 0);
+    REQUIRE(num == 1);
     REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
     REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE));
     buffer.FlushCachedWrites();
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h
index 92d77eef2d..c47b7d8666 100644
--- a/src/video_core/buffer_cache/buffer_base.h
+++ b/src/video_core/buffer_cache/buffer_base.h
@@ -430,7 +430,7 @@ private:
         if (query_begin >= SizeBytes() || size < 0) {
             return;
         }
-        u64* const untracked_words = Array<Type::Untracked>();
+        [[maybe_unused]] u64* const untracked_words = Array<Type::Untracked>();
         u64* const state_words = Array<type>();
         const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes());
         u64* const words_begin = state_words + query_begin / BYTES_PER_WORD;
@@ -483,7 +483,7 @@ private:
                 NotifyRasterizer<true>(word_index, current_bits, ~u64{0});
             }
             // Exclude CPU modified pages when visiting GPU pages
-            const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0);
+            const u64 word = current_word;
             u64 page = page_begin;
             page_begin = 0;
 
@@ -531,7 +531,7 @@ private:
     [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
         static_assert(type != Type::Untracked);
 
-        const u64* const untracked_words = Array<Type::Untracked>();
+        [[maybe_unused]] const u64* const untracked_words = Array<Type::Untracked>();
         const u64* const state_words = Array<type>();
         const u64 num_query_words = size / BYTES_PER_WORD + 1;
         const u64 word_begin = offset / BYTES_PER_WORD;
@@ -539,8 +539,7 @@ private:
         const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE);
         u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD;
         for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) {
-            const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
-            const u64 word = state_words[word_index] & ~off_word;
+            const u64 word = state_words[word_index];
             if (word == 0) {
                 continue;
             }
@@ -564,7 +563,7 @@ private:
     [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept {
         static_assert(type != Type::Untracked);
 
-        const u64* const untracked_words = Array<Type::Untracked>();
+        [[maybe_unused]] const u64* const untracked_words = Array<Type::Untracked>();
         const u64* const state_words = Array<type>();
         const u64 num_query_words = size / BYTES_PER_WORD + 1;
         const u64 word_begin = offset / BYTES_PER_WORD;
@@ -574,8 +573,7 @@ private:
         u64 begin = std::numeric_limits<u64>::max();
         u64 end = 0;
         for (u64 word_index = word_begin; word_index < word_end; ++word_index) {
-            const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
-            const u64 word = state_words[word_index] & ~off_word;
+            const u64 word = state_words[word_index];
             if (word == 0) {
                 continue;
             }
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 7bf08e3e0d..7762c7d96f 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -123,7 +123,7 @@ void MaxwellDMA::Launch() {
                         convert_linear_2_blocklinear_addr(regs.offset_in + offset),
                         tmp_buffer.data(), tmp_buffer.size());
                     memory_manager.WriteBlockCached(regs.offset_out + offset, tmp_buffer.data(),
-                                              tmp_buffer.size());
+                                                    tmp_buffer.size());
                 }
             } else if (is_src_pitch && !is_dst_pitch) {
                 UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0);
@@ -143,7 +143,7 @@ void MaxwellDMA::Launch() {
                     memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
                                                    regs.line_length_in);
                     memory_manager.WriteBlockCached(regs.offset_out, tmp_buffer.data(),
-                                              regs.line_length_in);
+                                                    regs.line_length_in);
                 }
             }
         }
diff --git a/src/video_core/invalidation_accumulator.h b/src/video_core/invalidation_accumulator.h
index 42420e31c8..2c2aaf7bb0 100644
--- a/src/video_core/invalidation_accumulator.h
+++ b/src/video_core/invalidation_accumulator.h
@@ -3,6 +3,7 @@
 
 #pragma once
 
+#include <utility>
 #include <vector>
 
 #include "common/common_types.h"
@@ -26,8 +27,8 @@ public:
         if (address >= start_address && address + size <= last_collection) [[likely]] {
             return;
         }
-        size = (address + size + atomicy_side_mask) & atomicy_mask - address;
-        address = address & atomicy_mask;
+        size = ((address + size + atomicity_size_mask) & atomicity_mask) - address;
+        address = address & atomicity_mask;
         if (!has_collected) [[unlikely]] {
             reset_values();
             has_collected = true;
@@ -64,10 +65,10 @@ public:
     }
 
 private:
-    static constexpr size_t atomicy_bits = 5;
-    static constexpr size_t atomicy_size = 1ULL << atomicy_bits;
-    static constexpr size_t atomicy_side_mask = atomicy_size - 1;
-    static constexpr size_t atomicy_mask = ~atomicy_side_mask;
+    static constexpr size_t atomicity_bits = 5;
+    static constexpr size_t atomicity_size = 1ULL << atomicity_bits;
+    static constexpr size_t atomicity_size_mask = atomicity_size - 1;
+    static constexpr size_t atomicity_mask = ~atomicity_size_mask;
     GPUVAddr start_address{};
     GPUVAddr last_collection{};
     size_t accumulated_size{};
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 0a63900543..3bcae3503b 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -46,7 +46,7 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64
     big_page_table_cpu.resize(big_page_table_size);
     big_page_continous.resize(big_page_table_size / continous_bits, 0);
     entries.resize(page_table_size / 32, 0);
-    if (!Settings::IsGPULevelExtreme()) {
+    if (!Settings::IsGPULevelExtreme() && Settings::IsFastmemEnabled()) {
         fastmem_arena = system.DeviceMemory().buffer.VirtualBasePointer();
     } else {
         fastmem_arena = nullptr;
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index ff9e3c0b3a..2936364f09 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -107,7 +107,7 @@ public:
      * will be returned;
      */
     std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr,
-                                                                 std::size_t size) const;
+                                                                    std::size_t size) const;
 
     GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size,
                  PTEKind kind = PTEKind::INVALID, bool is_big_pages = true);
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 6b66ad7b60..1735b61645 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -97,7 +97,7 @@ public:
                                   VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0;
 
     virtual void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) {
-        for (const auto [cpu_addr, size] : sequences) {
+        for (const auto& [cpu_addr, size] : sequences) {
             InvalidateRegion(cpu_addr, size);
         }
     }
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 6c4d745649..ed4a721668 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -486,18 +486,18 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache
 void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) {
     {
         std::scoped_lock lock{texture_cache.mutex};
-        for (const auto [addr, size] : sequences) {
+        for (const auto& [addr, size] : sequences) {
             texture_cache.WriteMemory(addr, size);
         }
     }
     {
         std::scoped_lock lock{buffer_cache.mutex};
-        for (const auto [addr, size] : sequences) {
+        for (const auto& [addr, size] : sequences) {
             buffer_cache.WriteMemory(addr, size);
         }
     }
     {
-        for (const auto [addr, size] : sequences) {
+        for (const auto& [addr, size] : sequences) {
             query_cache.InvalidateRegion(addr, size);
             pipeline_cache.InvalidateRegion(addr, size);
         }