Compare commits

..

6 commits
master ... msaa

Author SHA1 Message Date
25af81fee5 Still on swizzle 2025-05-15 01:45:45 -03:00
0df8b5cda5 I hate swizzle 2025-05-14 05:16:05 -03:00
a894c4e7cf swizzle try fixes 2025-05-14 05:07:11 -03:00
7488437bdc use direct upload and download for small images 2025-05-14 04:10:48 -03:00
1ffa98a40d some MSAA fixes 2025-05-13 06:03:10 -03:00
38d18af8ba Initial MSAA fix 2025-05-13 06:03:10 -03:00
2 changed files with 441 additions and 86 deletions

View file

@ -103,6 +103,8 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
switch (VideoCore::Surface::GetFormatType(format)) { switch (VideoCore::Surface::GetFormatType(format)) {
case VideoCore::Surface::SurfaceType::ColorTexture: case VideoCore::Surface::SurfaceType::ColorTexture:
usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
// Always add storage usage for color textures to handle storage descriptor validation
usage |= VK_IMAGE_USAGE_STORAGE_BIT;
break; break;
case VideoCore::Surface::SurfaceType::Depth: case VideoCore::Surface::SurfaceType::Depth:
case VideoCore::Surface::SurfaceType::Stencil: case VideoCore::Surface::SurfaceType::Stencil:
@ -247,7 +249,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
} }
} }
[[nodiscard]] VkComponentSwizzle ComponentSwizzle(SwizzleSource swizzle) { [[nodiscard]] [[maybe_unused]] VkComponentSwizzle ComponentSwizzle(SwizzleSource swizzle) {
switch (swizzle) { switch (swizzle) {
case SwizzleSource::Zero: case SwizzleSource::Zero:
return VK_COMPONENT_SWIZZLE_ZERO; return VK_COMPONENT_SWIZZLE_ZERO;
@ -1529,6 +1531,24 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset,
if (is_rescaled) { if (is_rescaled) {
ScaleDown(true); ScaleDown(true);
} }
// Handle MSAA upload if necessary
if (info.num_samples > 1 && runtime->CanUploadMSAA()) {
// Only use MSAA copy pass for color formats
// Depth/stencil formats need special handling
if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) {
// Fast path for small uploads (under 512x512)
// For small uploads, we can just upload directly to the MSAA image
bool use_fast_path = true;
for (const auto& copy : copies) {
if (copy.image_extent.width > 512 || copy.image_extent.height > 512) {
use_fast_path = false;
break;
}
}
if (use_fast_path) {
// Direct upload to MSAA image
scheduler->RequestOutsideRenderPassOperationContext(); scheduler->RequestOutsideRenderPassOperationContext();
auto vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask); auto vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask);
const VkBuffer src_buffer = buffer; const VkBuffer src_buffer = buffer;
@ -1539,6 +1559,66 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset,
vk_copies](vk::CommandBuffer cmdbuf) { vk_copies](vk::CommandBuffer cmdbuf) {
CopyBufferToImage(cmdbuf, src_buffer, vk_image, vk_aspect_mask, is_initialized, vk_copies); CopyBufferToImage(cmdbuf, src_buffer, vk_image, vk_aspect_mask, is_initialized, vk_copies);
}); });
return;
}
// Create a temporary non-MSAA image to upload the data first
ImageInfo temp_info = info;
temp_info.num_samples = 1;
// Create image with same usage flags as the target image to avoid validation errors
VkImageCreateInfo image_ci = MakeImageCreateInfo(runtime->device, temp_info);
image_ci.usage = original_image.UsageFlags();
vk::Image temp_image = runtime->memory_allocator.CreateImage(image_ci);
// Upload to the temporary non-MSAA image
scheduler->RequestOutsideRenderPassOperationContext();
auto vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask);
const VkBuffer src_buffer = buffer;
const VkImage temp_vk_image = *temp_image;
const VkImageAspectFlags vk_aspect_mask = aspect_mask;
scheduler->Record([src_buffer, temp_vk_image, vk_aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) {
CopyBufferToImage(cmdbuf, src_buffer, temp_vk_image, vk_aspect_mask, false, vk_copies);
});
// Now use MSAACopyPass to convert from non-MSAA to MSAA
std::vector<VideoCommon::ImageCopy> image_copies;
for (const auto& copy : copies) {
VideoCommon::ImageCopy image_copy;
image_copy.src_offset = {0, 0, 0}; // Use zero offset for source
image_copy.dst_offset = copy.image_offset;
image_copy.src_subresource = copy.image_subresource;
image_copy.dst_subresource = copy.image_subresource;
image_copy.extent = copy.image_extent;
image_copies.push_back(image_copy);
}
// Create a wrapper Image for the temporary image
Image temp_wrapper(*runtime, temp_info, 0, 0);
temp_wrapper.original_image = std::move(temp_image);
temp_wrapper.current_image = &Image::original_image;
temp_wrapper.aspect_mask = aspect_mask;
temp_wrapper.initialized = true;
// Use MSAACopyPass to convert from non-MSAA to MSAA
runtime->msaa_copy_pass->CopyImage(*this, temp_wrapper, image_copies, false);
std::exchange(initialized, true);
return;
}
// For depth/stencil formats, fall back to regular upload
} else {
// Regular non-MSAA upload
scheduler->RequestOutsideRenderPassOperationContext();
auto vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask);
const VkBuffer src_buffer = buffer;
const VkImage vk_image = *original_image;
const VkImageAspectFlags vk_aspect_mask = aspect_mask;
const bool is_initialized = std::exchange(initialized, true);
scheduler->Record([src_buffer, vk_image, vk_aspect_mask, is_initialized,
vk_copies](vk::CommandBuffer cmdbuf) {
CopyBufferToImage(cmdbuf, src_buffer, vk_image, vk_aspect_mask, is_initialized, vk_copies);
});
}
if (is_rescaled) { if (is_rescaled) {
ScaleUp(); ScaleUp();
} }
@ -1565,6 +1645,196 @@ void Image::DownloadMemory(std::span<VkBuffer> buffers_span, std::span<size_t> o
if (is_rescaled) { if (is_rescaled) {
ScaleDown(); ScaleDown();
} }
// Handle MSAA download if necessary
if (info.num_samples > 1 && runtime->msaa_copy_pass) {
// Only use MSAA copy pass for color formats
// Depth/stencil formats need special handling
if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) {
// Fast path for small downloads (under 512x512)
// For small downloads, we can read directly from the MSAA image
bool use_fast_path = true;
for (const auto& copy : copies) {
if (copy.image_extent.width > 512 || copy.image_extent.height > 512) {
use_fast_path = false;
break;
}
}
if (use_fast_path) {
// Direct download from MSAA image
boost::container::small_vector<VkBuffer, 8> buffers_vector{};
boost::container::small_vector<boost::container::small_vector<VkBufferImageCopy, 16>, 8> vk_copies;
// Pre-allocate to minimize reallocations
buffers_vector.reserve(buffers_span.size());
vk_copies.reserve(buffers_span.size());
for (size_t index = 0; index < buffers_span.size(); index++) {
buffers_vector.emplace_back(buffers_span[index]);
vk_copies.emplace_back(TransformBufferImageCopies(copies, offsets_span[index], aspect_mask));
}
scheduler->RequestOutsideRenderPassOperationContext();
scheduler->Record([buffers = std::move(buffers_vector), image = *original_image,
aspect_mask_ = aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) {
const VkImageMemoryBarrier read_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange{
.aspectMask = aspect_mask_,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, read_barrier);
for (size_t index = 0; index < buffers.size(); index++) {
cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffers[index],
vk_copies[index]);
}
const VkMemoryBarrier memory_write_barrier{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
};
const VkImageMemoryBarrier image_write_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = 0,
.dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange{
.aspectMask = aspect_mask_,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
0, memory_write_barrier, nullptr, image_write_barrier);
});
return;
}
// Create a temporary non-MSAA image to download the data
ImageInfo temp_info = info;
temp_info.num_samples = 1;
// Create image with same usage flags as the target image to avoid validation errors
VkImageCreateInfo image_ci = MakeImageCreateInfo(runtime->device, temp_info);
image_ci.usage = original_image.UsageFlags();
vk::Image temp_image = runtime->memory_allocator.CreateImage(image_ci);
// Create a wrapper Image for the temporary image
Image temp_wrapper(*runtime, temp_info, 0, 0);
temp_wrapper.original_image = std::move(temp_image);
temp_wrapper.current_image = &Image::original_image;
temp_wrapper.aspect_mask = aspect_mask;
temp_wrapper.initialized = true;
// Convert from MSAA to non-MSAA using MSAACopyPass
std::vector<VideoCommon::ImageCopy> image_copies;
for (const auto& copy : copies) {
VideoCommon::ImageCopy image_copy;
image_copy.src_offset = copy.image_offset;
image_copy.dst_offset = copy.image_offset;
image_copy.src_subresource = copy.image_subresource;
image_copy.dst_subresource = copy.image_subresource;
image_copy.extent = copy.image_extent;
image_copies.push_back(image_copy);
}
// Use MSAACopyPass to convert from MSAA to non-MSAA
runtime->msaa_copy_pass->CopyImage(temp_wrapper, *this, image_copies, true);
// Now download from the non-MSAA image
boost::container::small_vector<VkBuffer, 8> buffers_vector{};
boost::container::small_vector<boost::container::small_vector<VkBufferImageCopy, 16>, 8>
vk_copies;
for (size_t index = 0; index < buffers_span.size(); index++) {
buffers_vector.emplace_back(buffers_span[index]);
vk_copies.emplace_back(
TransformBufferImageCopies(copies, offsets_span[index], aspect_mask));
}
scheduler->RequestOutsideRenderPassOperationContext();
scheduler->Record([buffers = std::move(buffers_vector), image = *temp_wrapper.original_image,
aspect_mask_ = aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) {
const VkImageMemoryBarrier read_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange{
.aspectMask = aspect_mask_,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, read_barrier);
for (size_t index = 0; index < buffers.size(); index++) {
cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffers[index],
vk_copies[index]);
}
const VkMemoryBarrier memory_write_barrier{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
};
const VkImageMemoryBarrier image_write_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = 0,
.dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange{
.aspectMask = aspect_mask_,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
0, memory_write_barrier, nullptr, image_write_barrier);
});
return;
}
// For depth/stencil formats, fall back to regular download
} else {
// Regular non-MSAA download
boost::container::small_vector<VkBuffer, 8> buffers_vector{}; boost::container::small_vector<VkBuffer, 8> buffers_vector{};
boost::container::small_vector<boost::container::small_vector<VkBufferImageCopy, 16>, 8> boost::container::small_vector<boost::container::small_vector<VkBufferImageCopy, 16>, 8>
vk_copies; vk_copies;
@ -1629,6 +1899,8 @@ void Image::DownloadMemory(std::span<VkBuffer> buffers_span, std::span<size_t> o
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
0, memory_write_barrier, nullptr, image_write_barrier); 0, memory_write_barrier, nullptr, image_write_barrier);
}); });
}
if (is_rescaled) { if (is_rescaled) {
ScaleUp(true); ScaleUp(true);
} }
@ -1827,6 +2099,12 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
.pNext = nullptr, .pNext = nullptr,
.usage = image.UsageFlags(), .usage = image.UsageFlags(),
}; };
// Check if the image might be used as a storage image or input attachment
const bool is_storage = (image.UsageFlags() & (VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) != 0;
// For storage images and input attachments, we must use identity swizzles to avoid Vulkan validation errors
// (VUID-VkWriteDescriptorSet-descriptorType-00336)
// For normal texture sampling, we can use the custom swizzles
const VkImageViewCreateInfo create_info{ const VkImageViewCreateInfo create_info{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = &image_view_usage, .pNext = &image_view_usage,
@ -1834,7 +2112,14 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
.image = image.Handle(), .image = image.Handle(),
.viewType = VkImageViewType{}, .viewType = VkImageViewType{},
.format = format_info.format, .format = format_info.format,
.components{ .components = is_storage ?
VkComponentMapping{
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
} :
VkComponentMapping{
.r = ComponentSwizzle(swizzle[0]), .r = ComponentSwizzle(swizzle[0]),
.g = ComponentSwizzle(swizzle[1]), .g = ComponentSwizzle(swizzle[1]),
.b = ComponentSwizzle(swizzle[2]), .b = ComponentSwizzle(swizzle[2]),
@ -1921,7 +2206,23 @@ VkImageView ImageView::DepthView() {
return *depth_view; return *depth_view;
} }
const auto& info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format); const auto& info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format);
depth_view = MakeView(info.format, VK_IMAGE_ASPECT_DEPTH_BIT); // Always create depth views with identity swizzles to avoid Vulkan validation errors
// when used as storage images or input attachments
depth_view = device->GetLogical().CreateImageView({
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.image = image_handle,
.viewType = ImageViewType(type),
.format = info.format,
.components = {
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
},
.subresourceRange = MakeSubresourceRange(VK_IMAGE_ASPECT_DEPTH_BIT, range),
});
return *depth_view; return *depth_view;
} }
@ -1933,7 +2234,23 @@ VkImageView ImageView::StencilView() {
return *stencil_view; return *stencil_view;
} }
const auto& info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format); const auto& info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format);
stencil_view = MakeView(info.format, VK_IMAGE_ASPECT_STENCIL_BIT); // Always create stencil views with identity swizzles to avoid Vulkan validation errors
// when used as storage images or input attachments
stencil_view = device->GetLogical().CreateImageView({
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.image = image_handle,
.viewType = ImageViewType(type),
.format = info.format,
.components = {
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
},
.subresourceRange = MakeSubresourceRange(VK_IMAGE_ASPECT_STENCIL_BIT, range),
});
return *stencil_view; return *stencil_view;
} }
@ -1944,7 +2261,23 @@ VkImageView ImageView::ColorView() {
if (color_view) { if (color_view) {
return *color_view; return *color_view;
} }
color_view = MakeView(VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_ASPECT_COLOR_BIT); // Always create color views with identity swizzles to avoid Vulkan validation errors
// when used as storage images or input attachments
color_view = device->GetLogical().CreateImageView({
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.image = image_handle,
.viewType = ImageViewType(type),
.format = VK_FORMAT_R8G8B8A8_UNORM,
.components = {
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
},
.subresourceRange = MakeSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, range),
});
return *color_view; return *color_view;
} }
@ -1966,7 +2299,30 @@ VkImageView ImageView::StorageView(Shader::TextureType texture_type,
if (view) { if (view) {
return *view; return *view;
} }
view = MakeView(Format(image_format), VK_IMAGE_ASPECT_COLOR_BIT);
// Storage images must have identity swizzle components according to the Vulkan spec
// Create a view with identity swizzles for all components regardless of the format
const VkFormat vk_format = Format(image_format);
// Determine the appropriate aspect mask based on the format
// Use the existing ImageAspectMask function which properly handles format detection
VkImageAspectFlags aspect_mask = ImageAspectMask(format);
view = device->GetLogical().CreateImageView({
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.image = image_handle,
.viewType = ImageViewType(texture_type),
.format = vk_format,
.components = {
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
},
.subresourceRange = MakeSubresourceRange(aspect_mask, range),
});
return *view; return *view;
} }

View file

@ -82,8 +82,7 @@ public:
} }
bool CanUploadMSAA() const noexcept { bool CanUploadMSAA() const noexcept {
// TODO: Implement buffer to MSAA uploads return msaa_copy_pass.operator bool();
return false;
} }
void AccelerateImageUpload(Image&, const StagingBufferRef&, void AccelerateImageUpload(Image&, const StagingBufferRef&,