GPU: Simultaneous compute pass read-write (#10965)

This commit is contained in:
Evan Hemsley 2024-09-27 12:49:37 -07:00 committed by GitHub
parent d39acc6d1e
commit 05d0656bd6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 241 additions and 186 deletions

View file

@ -464,7 +464,12 @@ typedef enum SDL_GPUTextureFormat
* A texture must have at least one usage flag. Note that some usage flag
* combinations are invalid.
*
* \since This enum is available since SDL 3.0.0
* With regards to compute storage usage, READ | WRITE means that you can have shader A that only writes into the texture and shader B that only reads from the texture and bind the same texture to either shader respectively.
* SIMULTANEOUS means that you can do reads and writes within the same shader or compute pass. It also implies that atomic ops can be used, since those are read-modify-write operations.
* If you use SIMULTANEOUS, you are responsible for avoiding data races, as there is no data synchronization within a compute pass.
* Note that SIMULTANEOUS usage is only supported by a limited number of texture formats.
*
* \since This datatype is available since SDL 3.0.0
*
* \sa SDL_CreateGPUTexture
*/
@ -476,6 +481,7 @@ typedef Uint32 SDL_GPUTextureUsageFlags;
#define SDL_GPU_TEXTUREUSAGE_GRAPHICS_STORAGE_READ (1u << 3) /**< Texture supports storage reads in graphics stages. */
#define SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_READ (1u << 4) /**< Texture supports storage reads in the compute stage. */
#define SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE (1u << 5) /**< Texture supports storage writes in the compute stage. */
#define SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_SIMULTANEOUS_READ_WRITE (1u << 6) /**< Texture supports reads and writes in the same compute shader. This is NOT equivalent to READ | WRITE. */
/**
* Specifies the type of a texture.
@ -536,7 +542,10 @@ typedef enum SDL_GPUCubeMapFace
* A buffer must have at least one usage flag. Note that some usage flag
* combinations are invalid.
*
* \since This enum is available since SDL 3.0.0
* Unlike textures, READ | WRITE can be used for simultaneous read-write usage.
* The same data synchronization concerns as textures apply.
*
* \since This datatype is available since SDL 3.0.0
*
* \sa SDL_CreateGPUBuffer
*/
@ -811,7 +820,7 @@ typedef enum SDL_GPUBlendFactor
/**
* Specifies which color components are written in a graphics pipeline.
*
* \since This enum is available since SDL 3.0.0
* \since This datatype is available since SDL 3.0.0
*
* \sa SDL_CreateGPUGraphicsPipeline
*/
@ -1489,8 +1498,8 @@ typedef struct SDL_GPUComputePipelineCreateInfo
Uint32 num_samplers; /**< The number of samplers defined in the shader. */
Uint32 num_readonly_storage_textures; /**< The number of readonly storage textures defined in the shader. */
Uint32 num_readonly_storage_buffers; /**< The number of readonly storage buffers defined in the shader. */
Uint32 num_writeonly_storage_textures; /**< The number of writeonly storage textures defined in the shader. */
Uint32 num_writeonly_storage_buffers; /**< The number of writeonly storage buffers defined in the shader. */
Uint32 num_readwrite_storage_textures; /**< The number of read-write storage textures defined in the shader. */
Uint32 num_readwrite_storage_buffers; /**< The number of read-write storage buffers defined in the shader. */
Uint32 num_uniform_buffers; /**< The number of uniform buffers defined in the shader. */
Uint32 threadcount_x; /**< The number of threads in the X dimension. This should match the value in the shader. */
Uint32 threadcount_y; /**< The number of threads in the Y dimension. This should match the value in the shader. */
@ -1667,14 +1676,14 @@ typedef struct SDL_GPUTextureSamplerBinding
*
* \sa SDL_BeginGPUComputePass
*/
typedef struct SDL_GPUStorageBufferWriteOnlyBinding
typedef struct SDL_GPUStorageBufferReadWriteBinding
{
SDL_GPUBuffer *buffer; /**< The buffer to bind. Must have been created with SDL_GPU_BUFFERUSAGE_COMPUTE_STORAGE_WRITE. */
bool cycle; /**< true cycles the buffer if it is already bound. */
Uint8 padding1;
Uint8 padding2;
Uint8 padding3;
} SDL_GPUStorageBufferWriteOnlyBinding;
} SDL_GPUStorageBufferReadWriteBinding;
/**
* A structure specifying parameters related to binding textures in a compute
@ -1684,16 +1693,16 @@ typedef struct SDL_GPUStorageBufferWriteOnlyBinding
*
* \sa SDL_BeginGPUComputePass
*/
typedef struct SDL_GPUStorageTextureWriteOnlyBinding
typedef struct SDL_GPUStorageTextureReadWriteBinding
{
SDL_GPUTexture *texture; /**< The texture to bind. Must have been created with SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE. */
SDL_GPUTexture *texture; /**< The texture to bind. Must have been created with SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE or SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_SIMULTANEOUS_READ_WRITE. */
Uint32 mip_level; /**< The mip level index to bind. */
Uint32 layer; /**< The layer index to bind. */
bool cycle; /**< true cycles the texture if it is already bound. */
Uint8 padding1;
Uint8 padding2;
Uint8 padding3;
} SDL_GPUStorageTextureWriteOnlyBinding;
} SDL_GPUStorageTextureReadWriteBinding;
/* Functions */
@ -2807,17 +2816,22 @@ extern SDL_DECLSPEC void SDLCALL SDL_EndGPURenderPass(
* Begins a compute pass on a command buffer.
*
* A compute pass is defined by a set of texture subresources and buffers that
* will be written to by compute pipelines. These textures and buffers must
* have been created with the COMPUTE_STORAGE_WRITE bit. All operations
* may be written to by compute pipelines. These textures and buffers must
* have been created with the COMPUTE_STORAGE_WRITE bit or the COMPUTE_STORAGE_SIMULTANEOUS_READ_WRITE bit.
* If you do not create a texture with COMPUTE_STORAGE_SIMULTANEOUS_READ_WRITE, you must not read from the texture in the compute pass.
* All operations
* related to compute pipelines must take place inside of a compute pass. You
* must not begin another compute pass, or a render pass or copy pass before
* ending the compute pass.
*
* A VERY IMPORTANT NOTE Textures and buffers bound as write-only MUST NOT be
* read from during the compute pass. Doing so will result in undefined
* behavior. If your compute work requires reading the output from a previous
* A VERY IMPORTANT NOTE - Reads and writes in compute passes are NOT implicitly synchronized.
* This means you may cause data races by both reading and writing a resource region in a compute pass,
* or by writing multiple times to a resource region.
* If your compute work depends on reading the completed output from a previous
* dispatch, you MUST end the current compute pass and begin a new one before
* you can safely access the data.
* you can safely access the data. Otherwise you will receive unexpected results.
* Reading and writing a texture in the same compute pass is only supported by specific texture formats.
* Make sure you check the format support!
*
* \param command_buffer a command buffer.
* \param storage_texture_bindings an array of writeable storage texture
@ -2836,9 +2850,9 @@ extern SDL_DECLSPEC void SDLCALL SDL_EndGPURenderPass(
*/
extern SDL_DECLSPEC SDL_GPUComputePass *SDLCALL SDL_BeginGPUComputePass(
SDL_GPUCommandBuffer *command_buffer,
const SDL_GPUStorageTextureWriteOnlyBinding *storage_texture_bindings,
const SDL_GPUStorageTextureReadWriteBinding *storage_texture_bindings,
Uint32 num_storage_texture_bindings,
const SDL_GPUStorageBufferWriteOnlyBinding *storage_buffer_bindings,
const SDL_GPUStorageBufferReadWriteBinding *storage_buffer_bindings,
Uint32 num_storage_buffer_bindings);
/**

View file

@ -63,7 +63,7 @@ SDL_DYNAPI_PROC(SDL_TimerID,SDL_AddTimerNS,(Uint64 a, SDL_NSTimerCallback b, voi
SDL_DYNAPI_PROC(bool,SDL_AddVulkanRenderSemaphores,(SDL_Renderer *a, Uint32 b, Sint64 c, Sint64 d),(a,b,c,d),return)
SDL_DYNAPI_PROC(SDL_JoystickID,SDL_AttachVirtualJoystick,(const SDL_VirtualJoystickDesc *a),(a),return)
SDL_DYNAPI_PROC(bool,SDL_AudioDevicePaused,(SDL_AudioDeviceID a),(a),return)
SDL_DYNAPI_PROC(SDL_GPUComputePass*,SDL_BeginGPUComputePass,(SDL_GPUCommandBuffer *a, const SDL_GPUStorageTextureWriteOnlyBinding *b, Uint32 c, const SDL_GPUStorageBufferWriteOnlyBinding *d, Uint32 e),(a,b,c,d,e),return)
SDL_DYNAPI_PROC(SDL_GPUComputePass*,SDL_BeginGPUComputePass,(SDL_GPUCommandBuffer *a, const SDL_GPUStorageTextureReadWriteBinding *b, Uint32 c, const SDL_GPUStorageBufferReadWriteBinding *d, Uint32 e),(a,b,c,d,e),return)
SDL_DYNAPI_PROC(SDL_GPUCopyPass*,SDL_BeginGPUCopyPass,(SDL_GPUCommandBuffer *a),(a),return)
SDL_DYNAPI_PROC(SDL_GPURenderPass*,SDL_BeginGPURenderPass,(SDL_GPUCommandBuffer *a, const SDL_GPUColorTargetInfo *b, Uint32 c, const SDL_GPUDepthStencilTargetInfo *d),(a,b,c,d),return)
SDL_DYNAPI_PROC(bool,SDL_BindAudioStream,(SDL_AudioDeviceID a, SDL_AudioStream *b),(a,b),return)

View file

@ -705,11 +705,11 @@ SDL_GPUComputePipeline *SDL_CreateGPUComputePipeline(
SDL_assert_release(!"Incompatible shader format for GPU backend");
return NULL;
}
if (createinfo->num_writeonly_storage_textures > MAX_COMPUTE_WRITE_TEXTURES) {
if (createinfo->num_readwrite_storage_textures > MAX_COMPUTE_WRITE_TEXTURES) {
SDL_assert_release(!"Compute pipeline write-only texture count cannot be higher than 8!");
return NULL;
}
if (createinfo->num_writeonly_storage_buffers > MAX_COMPUTE_WRITE_BUFFERS) {
if (createinfo->num_readwrite_storage_buffers > MAX_COMPUTE_WRITE_BUFFERS) {
SDL_assert_release(!"Compute pipeline write-only buffer count cannot be higher than 8!");
return NULL;
}
@ -1868,9 +1868,9 @@ void SDL_EndGPURenderPass(
SDL_GPUComputePass *SDL_BeginGPUComputePass(
SDL_GPUCommandBuffer *command_buffer,
const SDL_GPUStorageTextureWriteOnlyBinding *storage_texture_bindings,
const SDL_GPUStorageTextureReadWriteBinding *storage_texture_bindings,
Uint32 num_storage_texture_bindings,
const SDL_GPUStorageBufferWriteOnlyBinding *storage_buffer_bindings,
const SDL_GPUStorageBufferReadWriteBinding *storage_buffer_bindings,
Uint32 num_storage_buffer_bindings)
{
CommandBufferCommonHeader *commandBufferHeader;
@ -1898,6 +1898,16 @@ SDL_GPUComputePass *SDL_BeginGPUComputePass(
if (COMMAND_BUFFER_DEVICE->debug_mode) {
CHECK_COMMAND_BUFFER_RETURN_NULL
CHECK_ANY_PASS_IN_PROGRESS("Cannot begin compute pass during another pass!", NULL)
for (Uint32 i = 0; i < num_storage_texture_bindings; i += 1) {
TextureCommonHeader *header = (TextureCommonHeader *)storage_texture_bindings[i].texture;
if (!(header->info.usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE) && !(header->info.usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_SIMULTANEOUS_READ_WRITE)) {
SDL_assert_release(!"Texture must be created with COMPUTE_STORAGE_WRITE or COMPUTE_STORAGE_SIMULTANEOUS_READ_WRITE flag");
return NULL;
}
}
// TODO: validate buffer usage?
}
COMMAND_BUFFER_DEVICE->BeginComputePass(

View file

@ -503,9 +503,9 @@ struct SDL_GPUDevice
void (*BeginComputePass)(
SDL_GPUCommandBuffer *commandBuffer,
const SDL_GPUStorageTextureWriteOnlyBinding *storageTextureBindings,
const SDL_GPUStorageTextureReadWriteBinding *storageTextureBindings,
Uint32 numStorageTextureBindings,
const SDL_GPUStorageBufferWriteOnlyBinding *storageBufferBindings,
const SDL_GPUStorageBufferReadWriteBinding *storageBufferBindings,
Uint32 numStorageBufferBindings);
void (*BindComputePipeline)(

View file

@ -529,9 +529,9 @@ typedef struct D3D11ComputePipeline
Uint32 numSamplers;
Uint32 numReadonlyStorageTextures;
Uint32 numWriteonlyStorageTextures;
Uint32 numReadWriteStorageTextures;
Uint32 numReadonlyStorageBuffers;
Uint32 numWriteonlyStorageBuffers;
Uint32 numReadWriteStorageBuffers;
Uint32 numUniformBuffers;
} D3D11ComputePipeline;
@ -687,8 +687,8 @@ typedef struct D3D11CommandBuffer
D3D11Sampler *computeSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE];
D3D11Texture *computeReadOnlyStorageTextures[MAX_STORAGE_TEXTURES_PER_STAGE];
D3D11Buffer *computeReadOnlyStorageBuffers[MAX_STORAGE_BUFFERS_PER_STAGE];
D3D11TextureSubresource *computeWriteOnlyStorageTextureSubresources[MAX_COMPUTE_WRITE_TEXTURES];
D3D11Buffer *computeWriteOnlyStorageBuffers[MAX_COMPUTE_WRITE_BUFFERS];
D3D11TextureSubresource *computeReadWriteStorageTextureSubresources[MAX_COMPUTE_WRITE_TEXTURES];
D3D11Buffer *computeReadWriteStorageBuffers[MAX_COMPUTE_WRITE_BUFFERS];
// Uniform buffers
D3D11UniformBuffer *vertexUniformBuffers[MAX_UNIFORM_BUFFERS_PER_STAGE];
@ -1524,9 +1524,9 @@ static SDL_GPUComputePipeline *D3D11_CreateComputePipeline(
pipeline->computeShader = shader;
pipeline->numSamplers = createinfo->num_samplers;
pipeline->numReadonlyStorageTextures = createinfo->num_readonly_storage_textures;
pipeline->numWriteonlyStorageTextures = createinfo->num_writeonly_storage_textures;
pipeline->numReadWriteStorageTextures = createinfo->num_readwrite_storage_textures;
pipeline->numReadonlyStorageBuffers = createinfo->num_readonly_storage_buffers;
pipeline->numWriteonlyStorageBuffers = createinfo->num_writeonly_storage_buffers;
pipeline->numReadWriteStorageBuffers = createinfo->num_readwrite_storage_buffers;
pipeline->numUniformBuffers = createinfo->num_uniform_buffers;
// thread counts are ignored in d3d11
@ -1891,7 +1891,8 @@ static D3D11Texture *D3D11_INTERNAL_CreateTexture(
(createInfo->usage & SDL_GPU_TEXTUREUSAGE_GRAPHICS_STORAGE_READ) ||
(createInfo->usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_READ);
needSubresourceUAV =
(createInfo->usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE);
(createInfo->usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE) ||
(createInfo->usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_SIMULTANEOUS_READ_WRITE);
isMultisample = createInfo->sample_count > SDL_GPU_SAMPLECOUNT_1;
isStaging = createInfo->usage == 0;
isMippable =
@ -3224,8 +3225,8 @@ static SDL_GPUCommandBuffer *D3D11_AcquireCommandBuffer(
SDL_zeroa(commandBuffer->computeSamplerTextures);
SDL_zeroa(commandBuffer->computeReadOnlyStorageTextures);
SDL_zeroa(commandBuffer->computeReadOnlyStorageBuffers);
SDL_zeroa(commandBuffer->computeWriteOnlyStorageTextureSubresources);
SDL_zeroa(commandBuffer->computeWriteOnlyStorageBuffers);
SDL_zeroa(commandBuffer->computeReadWriteStorageTextureSubresources);
SDL_zeroa(commandBuffer->computeReadWriteStorageBuffers);
bool acquireFenceResult = D3D11_INTERNAL_AcquireFence(commandBuffer);
commandBuffer->autoReleaseFence = 1;
@ -4284,9 +4285,9 @@ static void D3D11_Blit(
static void D3D11_BeginComputePass(
SDL_GPUCommandBuffer *commandBuffer,
const SDL_GPUStorageTextureWriteOnlyBinding *storageTextureBindings,
const SDL_GPUStorageTextureReadWriteBinding *storageTextureBindings,
Uint32 numStorageTextureBindings,
const SDL_GPUStorageBufferWriteOnlyBinding *storageBufferBindings,
const SDL_GPUStorageBufferReadWriteBinding *storageBufferBindings,
Uint32 numStorageBufferBindings)
{
D3D11CommandBuffer *d3d11CommandBuffer = (D3D11CommandBuffer *)commandBuffer;
@ -4310,7 +4311,7 @@ static void D3D11_BeginComputePass(
d3d11CommandBuffer,
textureSubresource->parent);
d3d11CommandBuffer->computeWriteOnlyStorageTextureSubresources[i] = textureSubresource;
d3d11CommandBuffer->computeReadWriteStorageTextureSubresources[i] = textureSubresource;
}
for (Uint32 i = 0; i < numStorageBufferBindings; i += 1) {
@ -4325,15 +4326,15 @@ static void D3D11_BeginComputePass(
d3d11CommandBuffer,
buffer);
d3d11CommandBuffer->computeWriteOnlyStorageBuffers[i] = buffer;
d3d11CommandBuffer->computeReadWriteStorageBuffers[i] = buffer;
}
for (Uint32 i = 0; i < numStorageTextureBindings; i += 1) {
uavs[i] = d3d11CommandBuffer->computeWriteOnlyStorageTextureSubresources[i]->uav;
uavs[i] = d3d11CommandBuffer->computeReadWriteStorageTextureSubresources[i]->uav;
}
for (Uint32 i = 0; i < numStorageBufferBindings; i += 1) {
uavs[numStorageTextureBindings + i] = d3d11CommandBuffer->computeWriteOnlyStorageBuffers[i]->uav;
uavs[numStorageTextureBindings + i] = d3d11CommandBuffer->computeReadWriteStorageBuffers[i]->uav;
}
ID3D11DeviceContext_CSSetUnorderedAccessViews(
@ -4622,8 +4623,8 @@ static void D3D11_EndComputePass(
SDL_zeroa(d3d11CommandBuffer->computeSamplerTextures);
SDL_zeroa(d3d11CommandBuffer->computeReadOnlyStorageTextures);
SDL_zeroa(d3d11CommandBuffer->computeReadOnlyStorageBuffers);
SDL_zeroa(d3d11CommandBuffer->computeWriteOnlyStorageTextureSubresources);
SDL_zeroa(d3d11CommandBuffer->computeWriteOnlyStorageBuffers);
SDL_zeroa(d3d11CommandBuffer->computeReadWriteStorageTextureSubresources);
SDL_zeroa(d3d11CommandBuffer->computeReadWriteStorageBuffers);
}
// Fence Cleanup
@ -5763,6 +5764,7 @@ static bool D3D11_SupportsTextureFormat(
DXGI_FORMAT dxgiFormat = SDLToD3D11_TextureFormat[format];
DXGI_FORMAT typelessFormat = D3D11_INTERNAL_GetTypelessFormat(dxgiFormat);
UINT formatSupport, sampleableFormatSupport;
D3D11_FEATURE_DATA_FORMAT_SUPPORT2 formatSupport2 = { dxgiFormat, 0 };
HRESULT res;
res = ID3D11Device_CheckFormatSupport(
@ -5787,6 +5789,19 @@ static bool D3D11_SupportsTextureFormat(
}
}
// Checks for SIMULTANEOUS_READ_WRITE support
if (usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_SIMULTANEOUS_READ_WRITE) {
res = ID3D11Device_CheckFeatureSupport(
renderer->device,
D3D11_FEATURE_FORMAT_SUPPORT2,
&formatSupport2,
sizeof(formatSupport2));
if (FAILED(res)) {
// Format is apparently unknown
return false;
}
}
// Is the texture type supported?
if (type == SDL_GPU_TEXTURETYPE_2D && !(formatSupport & D3D11_FORMAT_SUPPORT_TEXTURE2D)) {
return false;
@ -5815,6 +5830,9 @@ static bool D3D11_SupportsTextureFormat(
// TYPED_UNORDERED_ACCESS_VIEW implies support for typed UAV stores
return false;
}
if ((usage & (SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_SIMULTANEOUS_READ_WRITE) && !(formatSupport2.OutFormatSupport2 & D3D11_FORMAT_SUPPORT2_UAV_TYPED_LOAD))) {
return false;
}
if ((usage & SDL_GPU_TEXTUREUSAGE_COLOR_TARGET) && !(formatSupport & D3D11_FORMAT_SUPPORT_RENDER_TARGET)) {
return false;
}

View file

@ -732,10 +732,10 @@ struct D3D12CommandBuffer
D3D12Sampler *computeSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE];
D3D12Texture *computeReadOnlyStorageTextures[MAX_STORAGE_TEXTURES_PER_STAGE];
D3D12Buffer *computeReadOnlyStorageBuffers[MAX_STORAGE_BUFFERS_PER_STAGE];
D3D12TextureSubresource *computeWriteOnlyStorageTextureSubresources[MAX_COMPUTE_WRITE_TEXTURES];
Uint32 computeWriteOnlyStorageTextureSubresourceCount;
D3D12Buffer *computeWriteOnlyStorageBuffers[MAX_COMPUTE_WRITE_BUFFERS];
Uint32 computeWriteOnlyStorageBufferCount;
D3D12TextureSubresource *computeReadWriteStorageTextureSubresources[MAX_COMPUTE_WRITE_TEXTURES];
Uint32 computeReadWriteStorageTextureSubresourceCount;
D3D12Buffer *computeReadWriteStorageBuffers[MAX_COMPUTE_WRITE_BUFFERS];
Uint32 computeReadWriteStorageBufferCount;
D3D12UniformBuffer *computeUniformBuffers[MAX_UNIFORM_BUFFERS_PER_STAGE];
// Resource tracking
@ -825,8 +825,8 @@ typedef struct D3D12ComputeRootSignature
Sint32 samplerTextureRootIndex;
Sint32 readOnlyStorageTextureRootIndex;
Sint32 readOnlyStorageBufferRootIndex;
Sint32 writeOnlyStorageTextureRootIndex;
Sint32 writeOnlyStorageBufferRootIndex;
Sint32 readWriteStorageTextureRootIndex;
Sint32 readWriteStorageBufferRootIndex;
Sint32 uniformBufferRootIndex[MAX_UNIFORM_BUFFERS_PER_STAGE];
} D3D12ComputeRootSignature;
@ -838,8 +838,8 @@ struct D3D12ComputePipeline
Uint32 numSamplers;
Uint32 numReadOnlyStorageTextures;
Uint32 numReadOnlyStorageBuffers;
Uint32 numWriteOnlyStorageTextures;
Uint32 numWriteOnlyStorageBuffers;
Uint32 numReadWriteStorageTextures;
Uint32 numReadWriteStorageBuffers;
Uint32 numUniformBuffers;
SDL_AtomicInt referenceCount;
@ -1521,13 +1521,17 @@ static void D3D12_INTERNAL_TextureSubresourceBarrier(
D3D12_RESOURCE_STATES destinationState,
D3D12TextureSubresource *textureSubresource)
{
bool needsUAVBarrier =
(textureSubresource->parent->container->header.info.usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE) ||
(textureSubresource->parent->container->header.info.usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_SIMULTANEOUS_READ_WRITE);
D3D12_INTERNAL_ResourceBarrier(
commandBuffer,
sourceState,
destinationState,
textureSubresource->parent->resource,
textureSubresource->index,
textureSubresource->parent->container->header.info.usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE);
needsUAVBarrier);
}
static D3D12_RESOURCE_STATES D3D12_INTERNAL_DefaultTextureResourceState(
@ -1547,6 +1551,8 @@ static D3D12_RESOURCE_STATES D3D12_INTERNAL_DefaultTextureResourceState(
return D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
} else if (usageFlags & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE) {
return D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
} else if (usageFlags & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_SIMULTANEOUS_READ_WRITE) {
return D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
} else {
SDL_LogError(SDL_LOG_CATEGORY_GPU, "Texture has no default usage mode!");
return D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE;
@ -2174,8 +2180,8 @@ static D3D12ComputeRootSignature *D3D12_INTERNAL_CreateComputeRootSignature(
d3d12ComputeRootSignature->samplerTextureRootIndex = -1;
d3d12ComputeRootSignature->readOnlyStorageTextureRootIndex = -1;
d3d12ComputeRootSignature->readOnlyStorageBufferRootIndex = -1;
d3d12ComputeRootSignature->writeOnlyStorageTextureRootIndex = -1;
d3d12ComputeRootSignature->writeOnlyStorageBufferRootIndex = -1;
d3d12ComputeRootSignature->readWriteStorageTextureRootIndex = -1;
d3d12ComputeRootSignature->readWriteStorageBufferRootIndex = -1;
for (Uint32 i = 0; i < MAX_UNIFORM_BUFFERS_PER_STAGE; i += 1) {
d3d12ComputeRootSignature->uniformBufferRootIndex[i] = -1;
@ -2251,9 +2257,9 @@ static D3D12ComputeRootSignature *D3D12_INTERNAL_CreateComputeRootSignature(
parameterCount += 1;
}
if (createInfo->num_writeonly_storage_textures) {
if (createInfo->num_readwrite_storage_textures) {
descriptorRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
descriptorRange.NumDescriptors = createInfo->num_writeonly_storage_textures;
descriptorRange.NumDescriptors = createInfo->num_readwrite_storage_textures;
descriptorRange.BaseShaderRegister = 0;
descriptorRange.RegisterSpace = 1;
descriptorRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
@ -2264,15 +2270,15 @@ static D3D12ComputeRootSignature *D3D12_INTERNAL_CreateComputeRootSignature(
rootParameter.DescriptorTable.pDescriptorRanges = &descriptorRanges[rangeCount];
rootParameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; // ALL is used for compute
rootParameters[parameterCount] = rootParameter;
d3d12ComputeRootSignature->writeOnlyStorageTextureRootIndex = parameterCount;
d3d12ComputeRootSignature->readWriteStorageTextureRootIndex = parameterCount;
rangeCount += 1;
parameterCount += 1;
}
if (createInfo->num_writeonly_storage_buffers) {
if (createInfo->num_readwrite_storage_buffers) {
descriptorRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
descriptorRange.NumDescriptors = createInfo->num_writeonly_storage_buffers;
descriptorRange.BaseShaderRegister = createInfo->num_writeonly_storage_textures;
descriptorRange.NumDescriptors = createInfo->num_readwrite_storage_buffers;
descriptorRange.BaseShaderRegister = createInfo->num_readwrite_storage_textures;
descriptorRange.RegisterSpace = 1;
descriptorRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
descriptorRanges[rangeCount] = descriptorRange;
@ -2282,7 +2288,7 @@ static D3D12ComputeRootSignature *D3D12_INTERNAL_CreateComputeRootSignature(
rootParameter.DescriptorTable.pDescriptorRanges = &descriptorRanges[rangeCount];
rootParameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; // ALL is used for compute
rootParameters[parameterCount] = rootParameter;
d3d12ComputeRootSignature->writeOnlyStorageBufferRootIndex = parameterCount;
d3d12ComputeRootSignature->readWriteStorageBufferRootIndex = parameterCount;
rangeCount += 1;
parameterCount += 1;
}
@ -2409,8 +2415,8 @@ static SDL_GPUComputePipeline *D3D12_CreateComputePipeline(
computePipeline->numSamplers = createinfo->num_samplers;
computePipeline->numReadOnlyStorageTextures = createinfo->num_readonly_storage_textures;
computePipeline->numReadOnlyStorageBuffers = createinfo->num_readonly_storage_buffers;
computePipeline->numWriteOnlyStorageTextures = createinfo->num_writeonly_storage_textures;
computePipeline->numWriteOnlyStorageBuffers = createinfo->num_writeonly_storage_buffers;
computePipeline->numReadWriteStorageTextures = createinfo->num_readwrite_storage_textures;
computePipeline->numReadWriteStorageBuffers = createinfo->num_readwrite_storage_buffers;
computePipeline->numUniformBuffers = createinfo->num_uniform_buffers;
SDL_SetAtomicInt(&computePipeline->referenceCount, 0);
@ -2792,6 +2798,9 @@ static D3D12Texture *D3D12_INTERNAL_CreateTexture(
D3D12_RESOURCE_STATES initialState = (D3D12_RESOURCE_STATES)0;
D3D12_CLEAR_VALUE clearValue;
bool useClearValue = false;
bool needsUAV =
(createinfo->usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE) ||
(createinfo->usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_SIMULTANEOUS_READ_WRITE);
HRESULT res;
texture = (D3D12Texture *)SDL_calloc(1, sizeof(D3D12Texture));
@ -2819,7 +2828,7 @@ static D3D12Texture *D3D12_INTERNAL_CreateTexture(
clearValue.DepthStencil.Stencil = (UINT8)SDL_GetNumberProperty(createinfo->props, SDL_PROP_GPU_CREATETEXTURE_D3D12_CLEAR_STENCIL_UINT8, 0);
}
if (createinfo->usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE) {
if (needsUAV) {
resourceFlags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
}
@ -3025,7 +3034,7 @@ static D3D12Texture *D3D12_INTERNAL_CreateTexture(
}
// Create subresource UAV if necessary
if (createinfo->usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE) {
if (needsUAV) {
D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc;
D3D12_INTERNAL_AssignCpuDescriptorHandle(
@ -4877,26 +4886,23 @@ static void D3D12_EndRenderPass(
static void D3D12_BeginComputePass(
SDL_GPUCommandBuffer *commandBuffer,
const SDL_GPUStorageTextureWriteOnlyBinding *storageTextureBindings,
const SDL_GPUStorageTextureReadWriteBinding *storageTextureBindings,
Uint32 numStorageTextureBindings,
const SDL_GPUStorageBufferWriteOnlyBinding *storageBufferBindings,
const SDL_GPUStorageBufferReadWriteBinding *storageBufferBindings,
Uint32 numStorageBufferBindings)
{
D3D12CommandBuffer *d3d12CommandBuffer = (D3D12CommandBuffer *)commandBuffer;
d3d12CommandBuffer->computeWriteOnlyStorageTextureSubresourceCount = numStorageTextureBindings;
d3d12CommandBuffer->computeWriteOnlyStorageBufferCount = numStorageBufferBindings;
d3d12CommandBuffer->computeReadWriteStorageTextureSubresourceCount = numStorageTextureBindings;
d3d12CommandBuffer->computeReadWriteStorageBufferCount = numStorageBufferBindings;
/* Write-only resources will be actually bound in BindComputePipeline
/* Read-write resources will be actually bound in BindComputePipeline
* after the root signature is set.
* We also have to scan to see which barriers we actually need because depth slices aren't separate subresources
*/
if (numStorageTextureBindings > 0) {
for (Uint32 i = 0; i < numStorageTextureBindings; i += 1) {
D3D12TextureContainer *container = (D3D12TextureContainer *)storageTextureBindings[i].texture;
if (!(container->header.info.usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE)) {
SDL_LogError(SDL_LOG_CATEGORY_GPU, "Attempted to bind read-only texture as compute write texture");
}
D3D12TextureSubresource *subresource = D3D12_INTERNAL_PrepareTextureSubresourceForWrite(
d3d12CommandBuffer,
@ -4906,7 +4912,7 @@ static void D3D12_BeginComputePass(
storageTextureBindings[i].cycle,
D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
d3d12CommandBuffer->computeWriteOnlyStorageTextureSubresources[i] = subresource;
d3d12CommandBuffer->computeReadWriteStorageTextureSubresources[i] = subresource;
D3D12_INTERNAL_TrackTexture(
d3d12CommandBuffer,
@ -4917,16 +4923,14 @@ static void D3D12_BeginComputePass(
if (numStorageBufferBindings > 0) {
for (Uint32 i = 0; i < numStorageBufferBindings; i += 1) {
D3D12BufferContainer *container = (D3D12BufferContainer *)storageBufferBindings[i].buffer;
if (!(container->usage & SDL_GPU_BUFFERUSAGE_COMPUTE_STORAGE_WRITE)) {
SDL_LogError(SDL_LOG_CATEGORY_GPU, "Attempted to bind read-only texture as compute write texture");
}
D3D12Buffer *buffer = D3D12_INTERNAL_PrepareBufferForWrite(
d3d12CommandBuffer,
container,
storageBufferBindings[i].cycle,
D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
d3d12CommandBuffer->computeWriteOnlyStorageBuffers[i] = buffer;
d3d12CommandBuffer->computeReadWriteStorageBuffers[i] = buffer;
D3D12_INTERNAL_TrackBuffer(
d3d12CommandBuffer,
@ -4972,39 +4976,39 @@ static void D3D12_BindComputePipeline(
D3D12_INTERNAL_TrackComputePipeline(d3d12CommandBuffer, pipeline);
// Bind write-only resources after setting root signature
if (pipeline->numWriteOnlyStorageTextures > 0) {
for (Uint32 i = 0; i < pipeline->numWriteOnlyStorageTextures; i += 1) {
cpuHandles[i] = d3d12CommandBuffer->computeWriteOnlyStorageTextureSubresources[i]->uavHandle.cpuHandle;
if (pipeline->numReadWriteStorageTextures > 0) {
for (Uint32 i = 0; i < pipeline->numReadWriteStorageTextures; i += 1) {
cpuHandles[i] = d3d12CommandBuffer->computeReadWriteStorageTextureSubresources[i]->uavHandle.cpuHandle;
}
D3D12_INTERNAL_WriteGPUDescriptors(
d3d12CommandBuffer,
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
cpuHandles,
d3d12CommandBuffer->computeWriteOnlyStorageTextureSubresourceCount,
d3d12CommandBuffer->computeReadWriteStorageTextureSubresourceCount,
&gpuDescriptorHandle);
ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(
d3d12CommandBuffer->graphicsCommandList,
d3d12CommandBuffer->currentComputePipeline->rootSignature->writeOnlyStorageTextureRootIndex,
d3d12CommandBuffer->currentComputePipeline->rootSignature->readWriteStorageTextureRootIndex,
gpuDescriptorHandle);
}
if (pipeline->numWriteOnlyStorageBuffers > 0) {
for (Uint32 i = 0; i < pipeline->numWriteOnlyStorageBuffers; i += 1) {
cpuHandles[i] = d3d12CommandBuffer->computeWriteOnlyStorageBuffers[i]->uavDescriptor.cpuHandle;
if (pipeline->numReadWriteStorageBuffers > 0) {
for (Uint32 i = 0; i < pipeline->numReadWriteStorageBuffers; i += 1) {
cpuHandles[i] = d3d12CommandBuffer->computeReadWriteStorageBuffers[i]->uavDescriptor.cpuHandle;
}
D3D12_INTERNAL_WriteGPUDescriptors(
d3d12CommandBuffer,
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
cpuHandles,
d3d12CommandBuffer->computeWriteOnlyStorageBufferCount,
d3d12CommandBuffer->computeReadWriteStorageBufferCount,
&gpuDescriptorHandle);
ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(
d3d12CommandBuffer->graphicsCommandList,
d3d12CommandBuffer->currentComputePipeline->rootSignature->writeOnlyStorageBufferRootIndex,
d3d12CommandBuffer->currentComputePipeline->rootSignature->readWriteStorageBufferRootIndex,
gpuDescriptorHandle);
}
}
@ -5257,29 +5261,29 @@ static void D3D12_EndComputePass(
{
D3D12CommandBuffer *d3d12CommandBuffer = (D3D12CommandBuffer *)commandBuffer;
for (Uint32 i = 0; i < d3d12CommandBuffer->computeWriteOnlyStorageTextureSubresourceCount; i += 1) {
if (d3d12CommandBuffer->computeWriteOnlyStorageTextureSubresources[i]) {
for (Uint32 i = 0; i < d3d12CommandBuffer->computeReadWriteStorageTextureSubresourceCount; i += 1) {
if (d3d12CommandBuffer->computeReadWriteStorageTextureSubresources[i]) {
D3D12_INTERNAL_TextureSubresourceTransitionToDefaultUsage(
d3d12CommandBuffer,
D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
d3d12CommandBuffer->computeWriteOnlyStorageTextureSubresources[i]);
d3d12CommandBuffer->computeReadWriteStorageTextureSubresources[i]);
d3d12CommandBuffer->computeWriteOnlyStorageTextureSubresources[i] = NULL;
d3d12CommandBuffer->computeReadWriteStorageTextureSubresources[i] = NULL;
}
}
d3d12CommandBuffer->computeWriteOnlyStorageTextureSubresourceCount = 0;
d3d12CommandBuffer->computeReadWriteStorageTextureSubresourceCount = 0;
for (Uint32 i = 0; i < d3d12CommandBuffer->computeWriteOnlyStorageBufferCount; i += 1) {
if (d3d12CommandBuffer->computeWriteOnlyStorageBuffers[i]) {
for (Uint32 i = 0; i < d3d12CommandBuffer->computeReadWriteStorageBufferCount; i += 1) {
if (d3d12CommandBuffer->computeReadWriteStorageBuffers[i]) {
D3D12_INTERNAL_BufferTransitionToDefaultUsage(
d3d12CommandBuffer,
D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
d3d12CommandBuffer->computeWriteOnlyStorageBuffers[i]);
d3d12CommandBuffer->computeReadWriteStorageBuffers[i]);
d3d12CommandBuffer->computeWriteOnlyStorageBuffers[i] = NULL;
d3d12CommandBuffer->computeReadWriteStorageBuffers[i] = NULL;
}
}
d3d12CommandBuffer->computeWriteOnlyStorageBufferCount = 0;
d3d12CommandBuffer->computeReadWriteStorageBufferCount = 0;
for (Uint32 i = 0; i < MAX_STORAGE_TEXTURES_PER_STAGE; i += 1) {
if (d3d12CommandBuffer->computeReadOnlyStorageTextures[i]) {
@ -6881,8 +6885,8 @@ static SDL_GPUCommandBuffer *D3D12_AcquireCommandBuffer(
SDL_zeroa(commandBuffer->computeSamplers);
SDL_zeroa(commandBuffer->computeReadOnlyStorageTextures);
SDL_zeroa(commandBuffer->computeReadOnlyStorageBuffers);
SDL_zeroa(commandBuffer->computeWriteOnlyStorageTextureSubresources);
SDL_zeroa(commandBuffer->computeWriteOnlyStorageBuffers);
SDL_zeroa(commandBuffer->computeReadWriteStorageTextureSubresources);
SDL_zeroa(commandBuffer->computeReadWriteStorageBuffers);
SDL_zeroa(commandBuffer->computeUniformBuffers);
commandBuffer->autoReleaseFence = true;
@ -7555,6 +7559,9 @@ static bool D3D12_SupportsTextureFormat(
if ((usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE) && !(formatSupport.Support2 & D3D12_FORMAT_SUPPORT2_UAV_TYPED_STORE)) {
return false;
}
if ((usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_SIMULTANEOUS_READ_WRITE) && !(formatSupport.Support2 & D3D12_FORMAT_SUPPORT2_UAV_TYPED_LOAD)) {
return false;
}
if ((usage & SDL_GPU_TEXTUREUSAGE_COLOR_TARGET) && !(formatSupport.Support1 & D3D12_FORMAT_SUPPORT1_RENDER_TARGET)) {
return false;
}

View file

@ -453,9 +453,9 @@ typedef struct MetalComputePipeline
id<MTLComputePipelineState> handle;
Uint32 numSamplers;
Uint32 numReadonlyStorageTextures;
Uint32 numWriteonlyStorageTextures;
Uint32 numReadWriteStorageTextures;
Uint32 numReadonlyStorageBuffers;
Uint32 numWriteonlyStorageBuffers;
Uint32 numReadWriteStorageBuffers;
Uint32 numUniformBuffers;
Uint32 threadcountX;
Uint32 threadcountY;
@ -548,8 +548,8 @@ typedef struct MetalCommandBuffer
id<MTLSamplerState> computeSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE];
id<MTLTexture> computeReadOnlyTextures[MAX_STORAGE_TEXTURES_PER_STAGE];
id<MTLBuffer> computeReadOnlyBuffers[MAX_STORAGE_BUFFERS_PER_STAGE];
id<MTLTexture> computeWriteOnlyTextures[MAX_COMPUTE_WRITE_TEXTURES];
id<MTLBuffer> computeWriteOnlyBuffers[MAX_COMPUTE_WRITE_BUFFERS];
id<MTLTexture> computeReadWriteTextures[MAX_COMPUTE_WRITE_TEXTURES];
id<MTLBuffer> computeReadWriteBuffers[MAX_COMPUTE_WRITE_BUFFERS];
// Uniform buffers
MetalUniformBuffer *vertexUniformBuffers[MAX_UNIFORM_BUFFERS_PER_STAGE];
@ -999,9 +999,9 @@ static SDL_GPUComputePipeline *METAL_CreateComputePipeline(
pipeline->handle = handle;
pipeline->numSamplers = createinfo->num_samplers;
pipeline->numReadonlyStorageTextures = createinfo->num_readonly_storage_textures;
pipeline->numWriteonlyStorageTextures = createinfo->num_writeonly_storage_textures;
pipeline->numReadWriteStorageTextures = createinfo->num_readwrite_storage_textures;
pipeline->numReadonlyStorageBuffers = createinfo->num_readonly_storage_buffers;
pipeline->numWriteonlyStorageBuffers = createinfo->num_writeonly_storage_buffers;
pipeline->numReadWriteStorageBuffers = createinfo->num_readwrite_storage_buffers;
pipeline->numUniformBuffers = createinfo->num_uniform_buffers;
pipeline->threadcountX = createinfo->threadcount_x;
pipeline->threadcountY = createinfo->threadcount_y;
@ -1380,7 +1380,8 @@ static MetalTexture *METAL_INTERNAL_CreateTexture(
SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_READ)) {
textureDescriptor.usage |= MTLTextureUsageShaderRead;
}
if (createinfo->usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE) {
if (createinfo->usage & (SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE |
SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_SIMULTANEOUS_READ_WRITE)) {
textureDescriptor.usage |= MTLTextureUsageShaderWrite;
}
@ -2653,11 +2654,11 @@ static void METAL_INTERNAL_BindComputeResources(
}
// Bind write-only textures
if (computePipeline->numWriteonlyStorageTextures > 0) {
[commandBuffer->computeEncoder setTextures:commandBuffer->computeWriteOnlyTextures
if (computePipeline->numReadWriteStorageTextures > 0) {
[commandBuffer->computeEncoder setTextures:commandBuffer->computeReadWriteTextures
withRange:NSMakeRange(
computePipeline->numSamplers + computePipeline->numReadonlyStorageTextures,
computePipeline->numWriteonlyStorageTextures)];
computePipeline->numReadWriteStorageTextures)];
}
commandBuffer->needComputeTextureBind = false;
}
@ -2671,13 +2672,13 @@ static void METAL_INTERNAL_BindComputeResources(
computePipeline->numReadonlyStorageBuffers)];
}
// Bind write-only buffers
if (computePipeline->numWriteonlyStorageBuffers > 0) {
[commandBuffer->computeEncoder setBuffers:commandBuffer->computeWriteOnlyBuffers
if (computePipeline->numReadWriteStorageBuffers > 0) {
[commandBuffer->computeEncoder setBuffers:commandBuffer->computeReadWriteBuffers
offsets:offsets
withRange:NSMakeRange(
computePipeline->numUniformBuffers +
computePipeline->numReadonlyStorageBuffers,
computePipeline->numWriteonlyStorageBuffers)];
computePipeline->numReadWriteStorageBuffers)];
}
commandBuffer->needComputeBufferBind = false;
}
@ -2960,9 +2961,9 @@ static void METAL_Blit(
static void METAL_BeginComputePass(
SDL_GPUCommandBuffer *commandBuffer,
const SDL_GPUStorageTextureWriteOnlyBinding *storageTextureBindings,
const SDL_GPUStorageTextureReadWriteBinding *storageTextureBindings,
Uint32 numStorageTextureBindings,
const SDL_GPUStorageBufferWriteOnlyBinding *storageBufferBindings,
const SDL_GPUStorageBufferReadWriteBinding *storageBufferBindings,
Uint32 numStorageBufferBindings)
{
@autoreleasepool {
@ -2990,7 +2991,7 @@ static void METAL_BeginComputePass(
levels:NSMakeRange(storageTextureBindings[i].mip_level, 1)
slices:NSMakeRange(storageTextureBindings[i].layer, 1)];
metalCommandBuffer->computeWriteOnlyTextures[i] = textureView;
metalCommandBuffer->computeReadWriteTextures[i] = textureView;
metalCommandBuffer->needComputeTextureBind = true;
}
@ -3006,7 +3007,7 @@ static void METAL_BeginComputePass(
metalCommandBuffer,
buffer);
metalCommandBuffer->computeWriteOnlyBuffers[i] = buffer->handle;
metalCommandBuffer->computeReadWriteBuffers[i] = buffer->handle;
metalCommandBuffer->needComputeBufferBind = true;
}
}
@ -3182,10 +3183,10 @@ static void METAL_EndComputePass(
metalCommandBuffer->computeSamplerTextures[i] = nil;
}
for (Uint32 i = 0; i < MAX_COMPUTE_WRITE_TEXTURES; i += 1) {
metalCommandBuffer->computeWriteOnlyTextures[i] = nil;
metalCommandBuffer->computeReadWriteTextures[i] = nil;
}
for (Uint32 i = 0; i < MAX_COMPUTE_WRITE_BUFFERS; i += 1) {
metalCommandBuffer->computeWriteOnlyBuffers[i] = nil;
metalCommandBuffer->computeReadWriteBuffers[i] = nil;
}
for (Uint32 i = 0; i < MAX_STORAGE_TEXTURES_PER_STAGE; i += 1) {
metalCommandBuffer->computeReadOnlyTextures[i] = nil;
@ -3282,10 +3283,10 @@ static void METAL_INTERNAL_CleanCommandBuffer(
commandBuffer->computeReadOnlyBuffers[i] = nil;
}
for (i = 0; i < MAX_COMPUTE_WRITE_TEXTURES; i += 1) {
commandBuffer->computeWriteOnlyTextures[i] = nil;
commandBuffer->computeReadWriteTextures[i] = nil;
}
for (i = 0; i < MAX_COMPUTE_WRITE_BUFFERS; i += 1) {
commandBuffer->computeWriteOnlyBuffers[i] = nil;
commandBuffer->computeReadWriteBuffers[i] = nil;
}
// The fence is now available (unless SubmitAndAcquireFence was called)
@ -3780,6 +3781,7 @@ static bool METAL_Wait(
// Format Info
// FIXME: Check simultaneous read-write support
static bool METAL_SupportsTextureFormat(
SDL_GPURenderer *driverData,
SDL_GPUTextureFormat format,

View file

@ -829,8 +829,8 @@ typedef struct ComputePipelineResourceLayoutHashTableKey
Uint32 samplerCount;
Uint32 readonlyStorageTextureCount;
Uint32 readonlyStorageBufferCount;
Uint32 writeonlyStorageTextureCount;
Uint32 writeonlyStorageBufferCount;
Uint32 readWriteStorageTextureCount;
Uint32 readWriteStorageBufferCount;
Uint32 uniformBufferCount;
} ComputePipelineResourceLayoutHashTableKey;
@ -849,8 +849,8 @@ typedef struct VulkanComputePipelineResourceLayout
Uint32 numSamplers;
Uint32 numReadonlyStorageTextures;
Uint32 numReadonlyStorageBuffers;
Uint32 numWriteonlyStorageTextures;
Uint32 numWriteonlyStorageBuffers;
Uint32 numReadWriteStorageTextures;
Uint32 numReadWriteStorageBuffers;
Uint32 numUniformBuffers;
} VulkanComputePipelineResourceLayout;
@ -971,7 +971,7 @@ typedef struct VulkanCommandBuffer
bool needNewFragmentUniformOffsets;
bool needNewComputeReadOnlyDescriptorSet;
bool needNewComputeWriteOnlyDescriptorSet;
bool needNewComputeReadWriteDescriptorSet;
bool needNewComputeUniformDescriptorSet;
bool needNewComputeUniformOffsets;
@ -981,7 +981,7 @@ typedef struct VulkanCommandBuffer
VkDescriptorSet fragmentUniformDescriptorSet;
VkDescriptorSet computeReadOnlyDescriptorSet;
VkDescriptorSet computeWriteOnlyDescriptorSet;
VkDescriptorSet computeReadWriteDescriptorSet;
VkDescriptorSet computeUniformDescriptorSet;
VulkanTexture *vertexSamplerTextures[MAX_TEXTURE_SAMPLERS_PER_STAGE];
@ -994,9 +994,9 @@ typedef struct VulkanCommandBuffer
VulkanTexture *fragmentStorageTextures[MAX_STORAGE_TEXTURES_PER_STAGE];
VulkanBuffer *fragmentStorageBuffers[MAX_STORAGE_BUFFERS_PER_STAGE];
VulkanTextureSubresource *writeOnlyComputeStorageTextureSubresources[MAX_COMPUTE_WRITE_TEXTURES];
Uint32 writeOnlyComputeStorageTextureSubresourceCount;
VulkanBuffer *writeOnlyComputeStorageBuffers[MAX_COMPUTE_WRITE_BUFFERS];
VulkanTextureSubresource *readWriteComputeStorageTextureSubresources[MAX_COMPUTE_WRITE_TEXTURES];
Uint32 readWriteComputeStorageTextureSubresourceCount;
VulkanBuffer *readWriteComputeStorageBuffers[MAX_COMPUTE_WRITE_BUFFERS];
VulkanTexture *computeSamplerTextures[MAX_TEXTURE_SAMPLERS_PER_STAGE];
VulkanSampler *computeSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE];
@ -2723,6 +2723,8 @@ static VulkanTextureUsageMode VULKAN_INTERNAL_DefaultTextureUsageMode(
return VULKAN_TEXTURE_USAGE_MODE_COMPUTE_STORAGE_READ;
} else if (texture->usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE) {
return VULKAN_TEXTURE_USAGE_MODE_COMPUTE_STORAGE_READ_WRITE;
} else if (texture->usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_SIMULTANEOUS_READ_WRITE) {
return VULKAN_TEXTURE_USAGE_MODE_COMPUTE_STORAGE_READ_WRITE;
} else {
SDL_LogError(SDL_LOG_CATEGORY_GPU, "Texture has no default usage mode!");
return VULKAN_TEXTURE_USAGE_MODE_SAMPLER;
@ -3235,8 +3237,8 @@ static Uint32 VULKAN_INTERNAL_ComputePipelineResourceLayoutHashFunction(const vo
result = result * hashFactor + hashTableKey->samplerCount;
result = result * hashFactor + hashTableKey->readonlyStorageTextureCount;
result = result * hashFactor + hashTableKey->readonlyStorageBufferCount;
result = result * hashFactor + hashTableKey->writeonlyStorageTextureCount;
result = result * hashFactor + hashTableKey->writeonlyStorageBufferCount;
result = result * hashFactor + hashTableKey->readWriteStorageTextureCount;
result = result * hashFactor + hashTableKey->readWriteStorageBufferCount;
result = result * hashFactor + hashTableKey->uniformBufferCount;
return result;
}
@ -3863,8 +3865,8 @@ static VulkanComputePipelineResourceLayout *VULKAN_INTERNAL_FetchComputePipeline
key.samplerCount = createinfo->num_samplers;
key.readonlyStorageTextureCount = createinfo->num_readonly_storage_textures;
key.readonlyStorageBufferCount = createinfo->num_readonly_storage_buffers;
key.writeonlyStorageTextureCount = createinfo->num_writeonly_storage_textures;
key.writeonlyStorageBufferCount = createinfo->num_writeonly_storage_buffers;
key.readWriteStorageTextureCount = createinfo->num_readwrite_storage_textures;
key.readWriteStorageBufferCount = createinfo->num_readwrite_storage_buffers;
key.uniformBufferCount = createinfo->num_uniform_buffers;
if (SDL_FindInHashTable(
@ -3896,8 +3898,8 @@ static VulkanComputePipelineResourceLayout *VULKAN_INTERNAL_FetchComputePipeline
0,
0,
0,
createinfo->num_writeonly_storage_textures,
createinfo->num_writeonly_storage_buffers,
createinfo->num_readwrite_storage_textures,
createinfo->num_readwrite_storage_buffers,
0);
pipelineResourceLayout->descriptorSetLayouts[2] = VULKAN_INTERNAL_FetchDescriptorSetLayout(
@ -3917,8 +3919,8 @@ static VulkanComputePipelineResourceLayout *VULKAN_INTERNAL_FetchComputePipeline
pipelineResourceLayout->numSamplers = createinfo->num_samplers;
pipelineResourceLayout->numReadonlyStorageTextures = createinfo->num_readonly_storage_textures;
pipelineResourceLayout->numReadonlyStorageBuffers = createinfo->num_readonly_storage_buffers;
pipelineResourceLayout->numWriteonlyStorageTextures = createinfo->num_writeonly_storage_textures;
pipelineResourceLayout->numWriteonlyStorageBuffers = createinfo->num_writeonly_storage_buffers;
pipelineResourceLayout->numReadWriteStorageTextures = createinfo->num_readwrite_storage_textures;
pipelineResourceLayout->numReadWriteStorageBuffers = createinfo->num_readwrite_storage_buffers;
pipelineResourceLayout->numUniformBuffers = createinfo->num_uniform_buffers;
// Create the pipeline layout
@ -5476,7 +5478,8 @@ static VulkanTexture *VULKAN_INTERNAL_CreateTexture(
}
if (createinfo->usage & (SDL_GPU_TEXTUREUSAGE_GRAPHICS_STORAGE_READ |
SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_READ |
SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE)) {
SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE |
SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_SIMULTANEOUS_READ_WRITE)) {
vkUsageFlags |= VK_IMAGE_USAGE_STORAGE_BIT;
}
@ -5610,7 +5613,7 @@ static VulkanTexture *VULKAN_INTERNAL_CreateTexture(
}
}
if (createinfo->usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE) {
if ((createinfo->usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE) || (createinfo->usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_SIMULTANEOUS_READ_WRITE)) {
if (!VULKAN_INTERNAL_CreateSubresourceView(
renderer,
createinfo,
@ -7815,9 +7818,9 @@ static void VULKAN_EndRenderPass(
static void VULKAN_BeginComputePass(
SDL_GPUCommandBuffer *commandBuffer,
const SDL_GPUStorageTextureWriteOnlyBinding *storageTextureBindings,
const SDL_GPUStorageTextureReadWriteBinding *storageTextureBindings,
Uint32 numStorageTextureBindings,
const SDL_GPUStorageBufferWriteOnlyBinding *storageBufferBindings,
const SDL_GPUStorageBufferReadWriteBinding *storageBufferBindings,
Uint32 numStorageBufferBindings)
{
VulkanCommandBuffer *vulkanCommandBuffer = (VulkanCommandBuffer *)commandBuffer;
@ -7826,7 +7829,7 @@ static void VULKAN_BeginComputePass(
VulkanBuffer *buffer;
Uint32 i;
vulkanCommandBuffer->writeOnlyComputeStorageTextureSubresourceCount = numStorageTextureBindings;
vulkanCommandBuffer->readWriteComputeStorageTextureSubresourceCount = numStorageTextureBindings;
for (i = 0; i < numStorageTextureBindings; i += 1) {
VulkanTextureContainer *textureContainer = (VulkanTextureContainer *)storageTextureBindings[i].texture;
@ -7839,7 +7842,7 @@ static void VULKAN_BeginComputePass(
storageTextureBindings[i].cycle,
VULKAN_TEXTURE_USAGE_MODE_COMPUTE_STORAGE_READ_WRITE);
vulkanCommandBuffer->writeOnlyComputeStorageTextureSubresources[i] = subresource;
vulkanCommandBuffer->readWriteComputeStorageTextureSubresources[i] = subresource;
VULKAN_INTERNAL_TrackTexture(
vulkanCommandBuffer,
@ -7855,7 +7858,7 @@ static void VULKAN_BeginComputePass(
storageBufferBindings[i].cycle,
VULKAN_BUFFER_USAGE_MODE_COMPUTE_STORAGE_READ);
vulkanCommandBuffer->writeOnlyComputeStorageBuffers[i] = buffer;
vulkanCommandBuffer->readWriteComputeStorageBuffers[i] = buffer;
VULKAN_INTERNAL_TrackBuffer(
vulkanCommandBuffer,
@ -7889,7 +7892,7 @@ static void VULKAN_BindComputePipeline(
}
// Mark binding as needed
vulkanCommandBuffer->needNewComputeWriteOnlyDescriptorSet = true;
vulkanCommandBuffer->needNewComputeReadWriteDescriptorSet = true;
vulkanCommandBuffer->needNewComputeReadOnlyDescriptorSet = true;
vulkanCommandBuffer->needNewComputeUniformDescriptorSet = true;
vulkanCommandBuffer->needNewComputeUniformOffsets = true;
@ -8034,7 +8037,7 @@ static void VULKAN_INTERNAL_BindComputeDescriptorSets(
if (
!commandBuffer->needNewComputeReadOnlyDescriptorSet &&
!commandBuffer->needNewComputeWriteOnlyDescriptorSet &&
!commandBuffer->needNewComputeReadWriteDescriptorSet &&
!commandBuffer->needNewComputeUniformDescriptorSet &&
!commandBuffer->needNewComputeUniformOffsets
) {
@ -8123,15 +8126,15 @@ static void VULKAN_INTERNAL_BindComputeDescriptorSets(
commandBuffer->needNewComputeReadOnlyDescriptorSet = false;
}
if (commandBuffer->needNewComputeWriteOnlyDescriptorSet) {
if (commandBuffer->needNewComputeReadWriteDescriptorSet) {
descriptorSetLayout = resourceLayout->descriptorSetLayouts[1];
commandBuffer->computeWriteOnlyDescriptorSet = VULKAN_INTERNAL_FetchDescriptorSet(
commandBuffer->computeReadWriteDescriptorSet = VULKAN_INTERNAL_FetchDescriptorSet(
renderer,
commandBuffer,
descriptorSetLayout);
for (Uint32 i = 0; i < resourceLayout->numWriteonlyStorageTextures; i += 1) {
for (Uint32 i = 0; i < resourceLayout->numReadWriteStorageTextures; i += 1) {
VkWriteDescriptorSet *currentWriteDescriptorSet = &writeDescriptorSets[writeCount];
currentWriteDescriptorSet->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
@ -8140,12 +8143,12 @@ static void VULKAN_INTERNAL_BindComputeDescriptorSets(
currentWriteDescriptorSet->descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
currentWriteDescriptorSet->dstArrayElement = 0;
currentWriteDescriptorSet->dstBinding = i;
currentWriteDescriptorSet->dstSet = commandBuffer->computeWriteOnlyDescriptorSet;
currentWriteDescriptorSet->dstSet = commandBuffer->computeReadWriteDescriptorSet;
currentWriteDescriptorSet->pTexelBufferView = NULL;
currentWriteDescriptorSet->pBufferInfo = NULL;
imageInfos[imageInfoCount].sampler = VK_NULL_HANDLE;
imageInfos[imageInfoCount].imageView = commandBuffer->writeOnlyComputeStorageTextureSubresources[i]->computeWriteView;
imageInfos[imageInfoCount].imageView = commandBuffer->readWriteComputeStorageTextureSubresources[i]->computeWriteView;
imageInfos[imageInfoCount].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
currentWriteDescriptorSet->pImageInfo = &imageInfos[imageInfoCount];
@ -8154,7 +8157,7 @@ static void VULKAN_INTERNAL_BindComputeDescriptorSets(
imageInfoCount += 1;
}
for (Uint32 i = 0; i < resourceLayout->numWriteonlyStorageBuffers; i += 1) {
for (Uint32 i = 0; i < resourceLayout->numReadWriteStorageBuffers; i += 1) {
VkWriteDescriptorSet *currentWriteDescriptorSet = &writeDescriptorSets[writeCount];
currentWriteDescriptorSet->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
@ -8162,12 +8165,12 @@ static void VULKAN_INTERNAL_BindComputeDescriptorSets(
currentWriteDescriptorSet->descriptorCount = 1;
currentWriteDescriptorSet->descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
currentWriteDescriptorSet->dstArrayElement = 0;
currentWriteDescriptorSet->dstBinding = resourceLayout->numWriteonlyStorageTextures + i;
currentWriteDescriptorSet->dstSet = commandBuffer->computeWriteOnlyDescriptorSet;
currentWriteDescriptorSet->dstBinding = resourceLayout->numReadWriteStorageTextures + i;
currentWriteDescriptorSet->dstSet = commandBuffer->computeReadWriteDescriptorSet;
currentWriteDescriptorSet->pTexelBufferView = NULL;
currentWriteDescriptorSet->pImageInfo = NULL;
bufferInfos[bufferInfoCount].buffer = commandBuffer->writeOnlyComputeStorageBuffers[i]->buffer;
bufferInfos[bufferInfoCount].buffer = commandBuffer->readWriteComputeStorageBuffers[i]->buffer;
bufferInfos[bufferInfoCount].offset = 0;
bufferInfos[bufferInfoCount].range = VK_WHOLE_SIZE;
@ -8177,7 +8180,7 @@ static void VULKAN_INTERNAL_BindComputeDescriptorSets(
bufferInfoCount += 1;
}
commandBuffer->needNewComputeWriteOnlyDescriptorSet = false;
commandBuffer->needNewComputeReadWriteDescriptorSet = false;
}
if (commandBuffer->needNewComputeUniformDescriptorSet) {
@ -8229,7 +8232,7 @@ static void VULKAN_INTERNAL_BindComputeDescriptorSets(
VkDescriptorSet sets[3];
sets[0] = commandBuffer->computeReadOnlyDescriptorSet;
sets[1] = commandBuffer->computeWriteOnlyDescriptorSet;
sets[1] = commandBuffer->computeReadWriteDescriptorSet;
sets[2] = commandBuffer->computeUniformDescriptorSet;
renderer->vkCmdBindDescriptorSets(
@ -8288,25 +8291,25 @@ static void VULKAN_EndComputePass(
VulkanCommandBuffer *vulkanCommandBuffer = (VulkanCommandBuffer *)commandBuffer;
Uint32 i;
for (i = 0; i < vulkanCommandBuffer->writeOnlyComputeStorageTextureSubresourceCount; i += 1) {
for (i = 0; i < vulkanCommandBuffer->readWriteComputeStorageTextureSubresourceCount; i += 1) {
VULKAN_INTERNAL_TextureSubresourceTransitionToDefaultUsage(
vulkanCommandBuffer->renderer,
vulkanCommandBuffer,
VULKAN_TEXTURE_USAGE_MODE_COMPUTE_STORAGE_READ_WRITE,
vulkanCommandBuffer->writeOnlyComputeStorageTextureSubresources[i]);
vulkanCommandBuffer->writeOnlyComputeStorageTextureSubresources[i] = NULL;
vulkanCommandBuffer->readWriteComputeStorageTextureSubresources[i]);
vulkanCommandBuffer->readWriteComputeStorageTextureSubresources[i] = NULL;
}
vulkanCommandBuffer->writeOnlyComputeStorageTextureSubresourceCount = 0;
vulkanCommandBuffer->readWriteComputeStorageTextureSubresourceCount = 0;
for (i = 0; i < MAX_COMPUTE_WRITE_BUFFERS; i += 1) {
if (vulkanCommandBuffer->writeOnlyComputeStorageBuffers[i] != NULL) {
if (vulkanCommandBuffer->readWriteComputeStorageBuffers[i] != NULL) {
VULKAN_INTERNAL_BufferTransitionToDefaultUsage(
vulkanCommandBuffer->renderer,
vulkanCommandBuffer,
VULKAN_BUFFER_USAGE_MODE_COMPUTE_STORAGE_READ_WRITE,
vulkanCommandBuffer->writeOnlyComputeStorageBuffers[i]);
vulkanCommandBuffer->readWriteComputeStorageBuffers[i]);
vulkanCommandBuffer->writeOnlyComputeStorageBuffers[i] = NULL;
vulkanCommandBuffer->readWriteComputeStorageBuffers[i] = NULL;
}
}
@ -8341,7 +8344,7 @@ static void VULKAN_EndComputePass(
vulkanCommandBuffer->currentComputePipeline = NULL;
vulkanCommandBuffer->computeReadOnlyDescriptorSet = VK_NULL_HANDLE;
vulkanCommandBuffer->computeWriteOnlyDescriptorSet = VK_NULL_HANDLE;
vulkanCommandBuffer->computeReadWriteDescriptorSet = VK_NULL_HANDLE;
vulkanCommandBuffer->computeUniformDescriptorSet = VK_NULL_HANDLE;
}
@ -8989,7 +8992,7 @@ static bool VULKAN_INTERNAL_AllocateCommandBuffer(
commandBuffer->needNewFragmentUniformDescriptorSet = true;
commandBuffer->needNewFragmentUniformOffsets = true;
commandBuffer->needNewComputeWriteOnlyDescriptorSet = true;
commandBuffer->needNewComputeReadWriteDescriptorSet = true;
commandBuffer->needNewComputeReadOnlyDescriptorSet = true;
commandBuffer->needNewComputeUniformDescriptorSet = true;
commandBuffer->needNewComputeUniformOffsets = true;
@ -9000,7 +9003,7 @@ static bool VULKAN_INTERNAL_AllocateCommandBuffer(
commandBuffer->fragmentUniformDescriptorSet = VK_NULL_HANDLE;
commandBuffer->computeReadOnlyDescriptorSet = VK_NULL_HANDLE;
commandBuffer->computeWriteOnlyDescriptorSet = VK_NULL_HANDLE;
commandBuffer->computeReadWriteDescriptorSet = VK_NULL_HANDLE;
commandBuffer->computeUniformDescriptorSet = VK_NULL_HANDLE;
// Resource tracking
@ -9192,7 +9195,7 @@ static SDL_GPUCommandBuffer *VULKAN_AcquireCommandBuffer(
commandBuffer->fragmentUniformDescriptorSet = VK_NULL_HANDLE;
commandBuffer->computeReadOnlyDescriptorSet = VK_NULL_HANDLE;
commandBuffer->computeWriteOnlyDescriptorSet = VK_NULL_HANDLE;
commandBuffer->computeReadWriteDescriptorSet = VK_NULL_HANDLE;
commandBuffer->computeUniformDescriptorSet = VK_NULL_HANDLE;
SDL_zeroa(commandBuffer->vertexSamplerTextures);
@ -9205,9 +9208,9 @@ static SDL_GPUCommandBuffer *VULKAN_AcquireCommandBuffer(
SDL_zeroa(commandBuffer->fragmentStorageTextures);
SDL_zeroa(commandBuffer->fragmentStorageBuffers);
SDL_zeroa(commandBuffer->writeOnlyComputeStorageTextureSubresources);
commandBuffer->writeOnlyComputeStorageTextureSubresourceCount = 0;
SDL_zeroa(commandBuffer->writeOnlyComputeStorageBuffers);
SDL_zeroa(commandBuffer->readWriteComputeStorageTextureSubresources);
commandBuffer->readWriteComputeStorageTextureSubresourceCount = 0;
SDL_zeroa(commandBuffer->readWriteComputeStorageBuffers);
SDL_zeroa(commandBuffer->computeSamplerTextures);
SDL_zeroa(commandBuffer->computeSamplers);
SDL_zeroa(commandBuffer->readOnlyComputeStorageTextures);
@ -10446,7 +10449,8 @@ static bool VULKAN_SupportsTextureFormat(
}
if (usage & (SDL_GPU_TEXTUREUSAGE_GRAPHICS_STORAGE_READ |
SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_READ |
SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE)) {
SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE |
SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_SIMULTANEOUS_READ_WRITE)) {
vulkanUsage |= VK_IMAGE_USAGE_STORAGE_BIT;
}