From 0da6a7e2348843027019934ae208753324532fa1 Mon Sep 17 00:00:00 2001
From: Tony Wasserka <NeoBrainX@gmail.com>
Date: Thu, 1 Jan 2015 19:58:18 +0100
Subject: [PATCH] GPU: Properly implement memory fills.

---
 src/core/hle/service/gsp_gpu.cpp | 32 +++++++++++++-------------
 src/core/hle/service/gsp_gpu.h   |  4 ++++
 src/core/hw/gpu.cpp              | 39 ++++++++++++++++++++++----------
 src/core/hw/gpu.h                | 32 +++++++++++++++++++++++---
 4 files changed, 76 insertions(+), 31 deletions(-)

diff --git a/src/core/hle/service/gsp_gpu.cpp b/src/core/hle/service/gsp_gpu.cpp
index 31e61391ff..c23cfa3c8d 100644
--- a/src/core/hle/service/gsp_gpu.cpp
+++ b/src/core/hle/service/gsp_gpu.cpp
@@ -368,28 +368,28 @@ static void ExecuteCommand(const Command& command, u32 thread_id) {
     case CommandId::SET_MEMORY_FILL:
     {
         auto& params = command.memory_fill;
-        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[0].address_start)), 
-                Memory::VirtualToPhysicalAddress(params.start1) >> 3);
-        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[0].address_end)), 
-                Memory::VirtualToPhysicalAddress(params.end1) >> 3);
-        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[0].size)), params.end1 - params.start1);
-        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[0].value)), params.value1);
+        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[0].address_start)),
+                         Memory::VirtualToPhysicalAddress(params.start1) >> 3);
+        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[0].address_end)),
+                         Memory::VirtualToPhysicalAddress(params.end1) >> 3);
+        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[0].value_32bit)), params.value1);
+        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[0].control)), params.control1);
 
-        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[1].address_start)), 
-                Memory::VirtualToPhysicalAddress(params.start2) >> 3);
-        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[1].address_end)), 
-                Memory::VirtualToPhysicalAddress(params.end2) >> 3);
-        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[1].size)), params.end2 - params.start2);
-        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[1].value)), params.value2);
+        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[1].address_start)),
+                         Memory::VirtualToPhysicalAddress(params.start2) >> 3);
+        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[1].address_end)),
+                         Memory::VirtualToPhysicalAddress(params.end2) >> 3);
+        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[1].value_32bit)), params.value2);
+        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(memory_fill_config[1].control)), params.control2);
         break;
     }
 
     case CommandId::SET_DISPLAY_TRANSFER:
     {
         auto& params = command.image_copy;
-        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_address)), 
+        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_address)),
                 Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3);
-        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_address)), 
+        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_address)),
                 Memory::VirtualToPhysicalAddress(params.out_buffer_address) >> 3);
         WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_size)), params.in_buffer_size);
         WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_size)), params.out_buffer_size);
@@ -402,9 +402,9 @@ static void ExecuteCommand(const Command& command, u32 thread_id) {
     case CommandId::SET_TEXTURE_COPY:
     {
         auto& params = command.image_copy;
-        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_address)), 
+        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_address)),
                 Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3);
-        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_address)), 
+        WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_address)),
                 Memory::VirtualToPhysicalAddress(params.out_buffer_address) >> 3);
         WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_size)), params.in_buffer_size);
         WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_size)), params.out_buffer_size);
diff --git a/src/core/hle/service/gsp_gpu.h b/src/core/hle/service/gsp_gpu.h
index 65abb194ab..a435d418aa 100644
--- a/src/core/hle/service/gsp_gpu.h
+++ b/src/core/hle/service/gsp_gpu.h
@@ -109,9 +109,13 @@ struct Command {
             u32 start1;
             u32 value1;
             u32 end1;
+
             u32 start2;
             u32 value2;
             u32 end2;
+
+            u16 control1;
+            u16 control2;
         } memory_fill;
 
         struct {
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index aad0e5d0d9..bd7d92cd1f 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -67,23 +67,38 @@ inline void Write(u32 addr, const T data) {
     switch (index) {
 
     // Memory fills are triggered once the fill value is written.
-    // NOTE: This is not verified.
-    case GPU_REG_INDEX_WORKAROUND(memory_fill_config[0].value, 0x00004 + 0x3):
-    case GPU_REG_INDEX_WORKAROUND(memory_fill_config[1].value, 0x00008 + 0x3):
+    case GPU_REG_INDEX_WORKAROUND(memory_fill_config[0].trigger, 0x00004 + 0x3):
+    case GPU_REG_INDEX_WORKAROUND(memory_fill_config[1].trigger, 0x00008 + 0x3):
     {
-        const bool is_second_filler = (index != GPU_REG_INDEX(memory_fill_config[0].value));
-        const auto& config = g_regs.memory_fill_config[is_second_filler];
+        const bool is_second_filler = (index != GPU_REG_INDEX(memory_fill_config[0].trigger));
+        auto& config = g_regs.memory_fill_config[is_second_filler];
 
-        // TODO: Not sure if this check should be done at GSP level instead
-        if (config.address_start) {
-            // TODO: Not sure if this algorithm is correct, particularly because it doesn't use the size member at all
-            u32* start = (u32*)Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetStartAddress()));
-            u32* end = (u32*)Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetEndAddress()));
-            for (u32* ptr = start; ptr < end; ++ptr)
-                *ptr = bswap32(config.value); // TODO: This is just a workaround to missing framebuffer format emulation
+        if (config.address_start && config.trigger) {
+            u8* start = Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetStartAddress()));
+            u8* end = Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetEndAddress()));
+
+            if (config.fill_24bit) {
+                // fill with 24-bit values
+                for (u8* ptr = start; ptr < end; ptr += 3) {
+                    ptr[0] = config.value_24bit_b;
+                    ptr[1] = config.value_24bit_g;
+                    ptr[2] = config.value_24bit_r;
+                }
+            } else if (config.fill_32bit) {
+                // fill with 32-bit values
+                for (u32* ptr = (u32*)start; ptr < (u32*)end; ++ptr)
+                    *ptr = config.value_32bit;
+            } else {
+                // fill with 16-bit values
+                for (u16* ptr = (u16*)start; ptr < (u16*)end; ++ptr)
+                    *ptr = config.value_16bit;
+            }
 
             LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(), config.GetEndAddress());
 
+            config.trigger = 0;
+            config.finished = 1;
+
             if (!is_second_filler) {
                 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC0);
             } else {
diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h
index 9fd694f652..df9aa0d71d 100644
--- a/src/core/hw/gpu.h
+++ b/src/core/hw/gpu.h
@@ -84,9 +84,35 @@ struct Regs {
 
     struct {
         u32 address_start;
-        u32 address_end; // ?
-        u32 size;
-        u32 value; // ?
+        u32 address_end;
+
+        union {
+            u32 value_32bit;
+
+            BitField<0, 16, u32> value_16bit;
+
+            // TODO: Verify component order
+            BitField< 0, 8, u32> value_24bit_r;
+            BitField< 8, 8, u32> value_24bit_g;
+            BitField<16, 8, u32> value_24bit_b;
+        };
+
+        union {
+            u32 control;
+
+            // Setting this field to 1 triggers the memory fill.
+            // This field also acts as a status flag, and gets reset to 0 upon completion.
+            BitField<0, 1, u32> trigger;
+
+            // Set to 1 upon completion.
+            BitField<0, 1, u32> finished;
+
+            // 0: fill with 16- or 32-bit wide values; 1: fill with 24-bit wide values
+            BitField<8, 1, u32> fill_24bit;
+
+            // 0: fill with 16-bit wide values; 1: fill with 32-bit wide values
+            BitField<9, 1, u32> fill_32bit;
+        };
 
         inline u32 GetStartAddress() const {
             return DecodeAddressRegister(address_start);