diff --git a/include/SDL3/SDL_gpu.h b/include/SDL3/SDL_gpu.h index e6dbdf52c7..df69c0f0d6 100644 --- a/include/SDL3/SDL_gpu.h +++ b/include/SDL3/SDL_gpu.h @@ -1491,6 +1491,7 @@ typedef struct SDL_GPUComputePipelineCreateInfo const Uint8 *code; /**< A pointer to compute shader code. */ const char *entrypoint; /**< A pointer to a null-terminated UTF-8 string specifying the entry point function name for the shader. */ SDL_GPUShaderFormat format; /**< The format of the compute shader code. */ + Uint32 num_samplers; /**< The number of samplers defined in the shader. */ Uint32 num_readonly_storage_textures; /**< The number of readonly storage textures defined in the shader. */ Uint32 num_readonly_storage_buffers; /**< The number of readonly storage buffers defined in the shader. */ Uint32 num_writeonly_storage_textures; /**< The number of writeonly storage textures defined in the shader. */ @@ -1791,13 +1792,13 @@ extern SDL_DECLSPEC SDL_GPUDriver SDLCALL SDL_GetGPUDriver(SDL_GPUDevice *device * * For SPIR-V shaders, use the following resource sets: * - * - 0: Read-only storage textures, followed by read-only storage buffers + * - 0: Sampled textures, followed by read-only storage textures, followed by read-only storage buffers * - 1: Write-only storage textures, followed by write-only storage buffers * - 2: Uniform buffers * * For DXBC Shader Model 5_0 shaders, use the following register order: * - * - t registers: Read-only storage textures, followed by read-only storage + * - t registers: Sampled textures, followed by read-only storage textures, followed by read-only storage * buffers * - u registers: Write-only storage textures, followed by write-only storage * buffers @@ -1805,7 +1806,7 @@ extern SDL_DECLSPEC SDL_GPUDriver SDLCALL SDL_GetGPUDriver(SDL_GPUDevice *device * * For DXIL shaders, use the following register order: * - * - (t[n], space0): Read-only storage textures, followed by read-only storage + * - (t[n], space0): Sampled textures, followed by read-only storage textures, followed by read-only storage * buffers * - (u[n], space1): Write-only storage textures, followed by write-only * storage buffers @@ -1815,7 +1816,7 @@ extern SDL_DECLSPEC SDL_GPUDriver SDLCALL SDL_GetGPUDriver(SDL_GPUDevice *device * * - [[buffer]]: Uniform buffers, followed by write-only storage buffers, * followed by write-only storage buffers - * - [[texture]]: Read-only storage textures, followed by write-only storage + * - [[texture]]: Sampled textures, followed by read-only storage textures, followed by write-only storage * textures * * \param device a GPU Context. @@ -2757,6 +2758,24 @@ extern SDL_DECLSPEC void SDLCALL SDL_BindGPUComputePipeline( SDL_GPUComputePass *compute_pass, SDL_GPUComputePipeline *compute_pipeline); +/** + * Binds texture-sampler pairs for use on the compute shader. + * + * The textures must have been created with SDL_GPU_TEXTUREUSAGE_SAMPLER. + * + * \param compute_pass a compute pass handle. + * \param first_slot the compute sampler slot to begin binding from. + * \param texture_sampler_bindings an array of texture-sampler binding structs. + * \param num_bindings the number of texture-sampler bindings to bind from the array. + * + * \since This function is available since SDL 3.0.0 + */ +extern SDL_DECLSPEC void SDLCALL SDL_BindGPUComputeSamplers( + SDL_GPUComputePass *compute_pass, + Uint32 first_slot, + const SDL_GPUTextureSamplerBinding *texture_sampler_bindings, + Uint32 num_bindings); + /** * Binds storage textures as readonly for use on the compute pipeline. * diff --git a/src/dynapi/SDL_dynapi.sym b/src/dynapi/SDL_dynapi.sym index 1b9d1b280f..8b60ab7b75 100644 --- a/src/dynapi/SDL_dynapi.sym +++ b/src/dynapi/SDL_dynapi.sym @@ -29,6 +29,7 @@ SDL3_0.0.0 { SDL_BindAudioStream; SDL_BindAudioStreams; SDL_BindGPUComputePipeline; + SDL_BindGPUComputeSamplers; SDL_BindGPUComputeStorageBuffers; SDL_BindGPUComputeStorageTextures; SDL_BindGPUFragmentSamplers; diff --git a/src/dynapi/SDL_dynapi_overrides.h b/src/dynapi/SDL_dynapi_overrides.h index 99d95a43b2..2a25777781 100644 --- a/src/dynapi/SDL_dynapi_overrides.h +++ b/src/dynapi/SDL_dynapi_overrides.h @@ -54,6 +54,7 @@ #define SDL_BindAudioStream SDL_BindAudioStream_REAL #define SDL_BindAudioStreams SDL_BindAudioStreams_REAL #define SDL_BindGPUComputePipeline SDL_BindGPUComputePipeline_REAL +#define SDL_BindGPUComputeSamplers SDL_BindGPUComputeSamplers_REAL #define SDL_BindGPUComputeStorageBuffers SDL_BindGPUComputeStorageBuffers_REAL #define SDL_BindGPUComputeStorageTextures SDL_BindGPUComputeStorageTextures_REAL #define SDL_BindGPUFragmentSamplers SDL_BindGPUFragmentSamplers_REAL diff --git a/src/dynapi/SDL_dynapi_procs.h b/src/dynapi/SDL_dynapi_procs.h index 3a60c9f34c..e66887bfe7 100644 --- a/src/dynapi/SDL_dynapi_procs.h +++ b/src/dynapi/SDL_dynapi_procs.h @@ -74,6 +74,7 @@ SDL_DYNAPI_PROC(SDL_GPURenderPass*,SDL_BeginGPURenderPass,(SDL_GPUCommandBuffer SDL_DYNAPI_PROC(SDL_bool,SDL_BindAudioStream,(SDL_AudioDeviceID a, SDL_AudioStream *b),(a,b),return) SDL_DYNAPI_PROC(SDL_bool,SDL_BindAudioStreams,(SDL_AudioDeviceID a, SDL_AudioStream **b, int c),(a,b,c),return) SDL_DYNAPI_PROC(void,SDL_BindGPUComputePipeline,(SDL_GPUComputePass *a, SDL_GPUComputePipeline *b),(a,b),) +SDL_DYNAPI_PROC(void,SDL_BindGPUComputeSamplers,(SDL_GPUComputePass *a, Uint32 b, const SDL_GPUTextureSamplerBinding *c, Uint32 d),(a,b,c,d),) SDL_DYNAPI_PROC(void,SDL_BindGPUComputeStorageBuffers,(SDL_GPUComputePass *a, Uint32 b, SDL_GPUBuffer *const *c, Uint32 d),(a,b,c,d),) SDL_DYNAPI_PROC(void,SDL_BindGPUComputeStorageTextures,(SDL_GPUComputePass *a, Uint32 b, SDL_GPUTexture *const *c, Uint32 d),(a,b,c,d),) SDL_DYNAPI_PROC(void,SDL_BindGPUFragmentSamplers,(SDL_GPURenderPass *a, Uint32 b, const SDL_GPUTextureSamplerBinding *c, Uint32 d),(a,b,c,d),) diff --git a/src/gpu/SDL_gpu.c b/src/gpu/SDL_gpu.c index 7e2396d92b..344794a260 100644 --- a/src/gpu/SDL_gpu.c +++ b/src/gpu/SDL_gpu.c @@ -1782,6 +1782,32 @@ void SDL_BindGPUComputePipeline( commandBufferHeader->compute_pipeline_bound = true; } +void SDL_BindGPUComputeSamplers( + SDL_GPUComputePass *compute_pass, + Uint32 first_slot, + const SDL_GPUTextureSamplerBinding *texture_sampler_bindings, + Uint32 num_bindings) +{ + if (compute_pass == NULL) { + SDL_InvalidParamError("compute_pass"); + return; + } + if (texture_sampler_bindings == NULL && num_bindings > 0) { + SDL_InvalidParamError("texture_sampler_bindings"); + return; + } + + if (COMPUTEPASS_DEVICE->debug_mode) { + CHECK_COMPUTEPASS + } + + COMPUTEPASS_DEVICE->BindComputeSamplers( + COMPUTEPASS_COMMAND_BUFFER, + first_slot, + texture_sampler_bindings, + num_bindings); +} + void SDL_BindGPUComputeStorageTextures( SDL_GPUComputePass *compute_pass, Uint32 first_slot, diff --git a/src/gpu/SDL_sysgpu.h b/src/gpu/SDL_sysgpu.h index 1706071dc2..93a904cd35 100644 --- a/src/gpu/SDL_sysgpu.h +++ b/src/gpu/SDL_sysgpu.h @@ -511,6 +511,12 @@ struct SDL_GPUDevice SDL_GPUCommandBuffer *commandBuffer, SDL_GPUComputePipeline *computePipeline); + void (*BindComputeSamplers)( + SDL_GPUCommandBuffer *commandBuffer, + Uint32 firstSlot, + const SDL_GPUTextureSamplerBinding *textureSamplerBindings, + Uint32 numBindings); + void (*BindComputeStorageTextures)( SDL_GPUCommandBuffer *commandBuffer, Uint32 firstSlot, @@ -740,6 +746,7 @@ struct SDL_GPUDevice ASSIGN_DRIVER_FUNC(EndRenderPass, name) \ ASSIGN_DRIVER_FUNC(BeginComputePass, name) \ ASSIGN_DRIVER_FUNC(BindComputePipeline, name) \ + ASSIGN_DRIVER_FUNC(BindComputeSamplers, name) \ ASSIGN_DRIVER_FUNC(BindComputeStorageTextures, name) \ ASSIGN_DRIVER_FUNC(BindComputeStorageBuffers, name) \ ASSIGN_DRIVER_FUNC(PushComputeUniformData, name) \ diff --git a/src/gpu/d3d11/SDL_gpu_d3d11.c b/src/gpu/d3d11/SDL_gpu_d3d11.c index 711688ae3b..c0cfede639 100644 --- a/src/gpu/d3d11/SDL_gpu_d3d11.c +++ b/src/gpu/d3d11/SDL_gpu_d3d11.c @@ -414,13 +414,13 @@ typedef struct D3D11TextureContainer TextureCommonHeader header; D3D11Texture *activeTexture; - bool canBeCycled; Uint32 textureCapacity; Uint32 textureCount; D3D11Texture **textures; char *debugName; + bool canBeCycled; } D3D11TextureContainer; typedef struct D3D11TextureSubresource @@ -523,6 +523,7 @@ typedef struct D3D11ComputePipeline { ID3D11ComputeShader *computeShader; + Uint32 numSamplers; Uint32 numReadonlyStorageTextures; Uint32 numWriteonlyStorageTextures; Uint32 numReadonlyStorageBuffers; @@ -609,6 +610,11 @@ typedef struct D3D11UniformBuffer Uint32 currentBlockSize; } D3D11UniformBuffer; +typedef struct D3D11Sampler +{ + ID3D11SamplerState *handle; +} D3D11Sampler; + typedef struct D3D11Renderer D3D11Renderer; typedef struct D3D11CommandBuffer @@ -646,35 +652,40 @@ typedef struct D3D11CommandBuffer bool needVertexBufferBind; bool needVertexSamplerBind; - bool needVertexResourceBind; + bool needVertexStorageTextureBind; + bool needVertexStorageBufferBind; bool needVertexUniformBufferBind; bool needFragmentSamplerBind; - bool needFragmentResourceBind; + bool needFragmentStorageTextureBind; + bool needFragmentStorageBufferBind; bool needFragmentUniformBufferBind; - bool needComputeUAVBind; - bool needComputeSRVBind; + bool needComputeSamplerBind; + bool needComputeReadOnlyTextureBind; + bool needComputeReadOnlyBufferBind; bool needComputeUniformBufferBind; ID3D11Buffer *vertexBuffers[MAX_BUFFER_BINDINGS]; Uint32 vertexBufferOffsets[MAX_BUFFER_BINDINGS]; Uint32 vertexBufferCount; - ID3D11SamplerState *vertexSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE]; - ID3D11ShaderResourceView *vertexShaderResourceViews[MAX_TEXTURE_SAMPLERS_PER_STAGE + - MAX_STORAGE_BUFFERS_PER_STAGE + - MAX_STORAGE_TEXTURES_PER_STAGE]; + D3D11Texture *vertexSamplerTextures[MAX_TEXTURE_SAMPLERS_PER_STAGE]; + D3D11Sampler *vertexSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE]; + D3D11Texture *vertexStorageTextures[MAX_STORAGE_TEXTURES_PER_STAGE]; + D3D11Buffer *vertexStorageBuffers[MAX_STORAGE_BUFFERS_PER_STAGE]; - ID3D11SamplerState *fragmentSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE]; - ID3D11ShaderResourceView *fragmentShaderResourceViews[MAX_TEXTURE_SAMPLERS_PER_STAGE + - MAX_STORAGE_BUFFERS_PER_STAGE + - MAX_STORAGE_TEXTURES_PER_STAGE]; + D3D11Texture *fragmentSamplerTextures[MAX_TEXTURE_SAMPLERS_PER_STAGE]; + D3D11Sampler *fragmentSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE]; + D3D11Texture *fragmentStorageTextures[MAX_STORAGE_TEXTURES_PER_STAGE]; + D3D11Buffer *fragmentStorageBuffers[MAX_STORAGE_BUFFERS_PER_STAGE]; - ID3D11ShaderResourceView *computeShaderResourceViews[MAX_STORAGE_TEXTURES_PER_STAGE + - MAX_STORAGE_BUFFERS_PER_STAGE]; - ID3D11UnorderedAccessView *computeUnorderedAccessViews[MAX_COMPUTE_WRITE_TEXTURES + - MAX_COMPUTE_WRITE_BUFFERS]; + D3D11Texture *computeSamplerTextures[MAX_TEXTURE_SAMPLERS_PER_STAGE]; + D3D11Sampler *computeSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE]; + D3D11Texture *computeReadOnlyStorageTextures[MAX_STORAGE_TEXTURES_PER_STAGE]; + D3D11Buffer *computeReadOnlyStorageBuffers[MAX_STORAGE_BUFFERS_PER_STAGE]; + D3D11TextureSubresource *computeWriteOnlyStorageTextureSubresources[MAX_COMPUTE_WRITE_TEXTURES]; + D3D11Buffer *computeWriteOnlyStorageBuffers[MAX_COMPUTE_WRITE_BUFFERS]; // Uniform buffers D3D11UniformBuffer *vertexUniformBuffers[MAX_UNIFORM_BUFFERS_PER_STAGE]; @@ -703,11 +714,6 @@ typedef struct D3D11CommandBuffer Uint32 usedUniformBufferCapacity; } D3D11CommandBuffer; -typedef struct D3D11Sampler -{ - ID3D11SamplerState *handle; -} D3D11Sampler; - struct D3D11Renderer { ID3D11Device1 *device; @@ -772,20 +778,19 @@ struct D3D11Renderer SDL_Mutex *acquireUniformBufferLock; SDL_Mutex *fenceLock; SDL_Mutex *windowLock; -}; -// Null arrays for resetting shader resource slots + // Null arrays for resetting resource slots + ID3D11RenderTargetView *nullRTVs[MAX_COLOR_TARGET_BINDINGS]; -ID3D11RenderTargetView *nullRTVs[MAX_COLOR_TARGET_BINDINGS]; - -ID3D11ShaderResourceView *nullSRVs[MAX_TEXTURE_SAMPLERS_PER_STAGE + + ID3D11ShaderResourceView *nullSRVs[MAX_TEXTURE_SAMPLERS_PER_STAGE * 2 + MAX_STORAGE_TEXTURES_PER_STAGE + MAX_STORAGE_BUFFERS_PER_STAGE]; -ID3D11SamplerState *nullSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE]; + ID3D11SamplerState *nullSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE * 2]; -ID3D11UnorderedAccessView *nullUAVs[MAX_COMPUTE_WRITE_TEXTURES + + ID3D11UnorderedAccessView *nullUAVs[MAX_COMPUTE_WRITE_TEXTURES + MAX_COMPUTE_WRITE_BUFFERS]; +}; // Logging @@ -1527,6 +1532,7 @@ static SDL_GPUComputePipeline *D3D11_CreateComputePipeline( pipeline = SDL_malloc(sizeof(D3D11ComputePipeline)); pipeline->computeShader = shader; + pipeline->numSamplers = createinfo->num_samplers; pipeline->numReadonlyStorageTextures = createinfo->num_readonly_storage_textures; pipeline->numWriteonlyStorageTextures = createinfo->num_writeonly_storage_textures; pipeline->numReadonlyStorageBuffers = createinfo->num_readonly_storage_buffers; @@ -3235,21 +3241,31 @@ static SDL_GPUCommandBuffer *D3D11_AcquireCommandBuffer( } commandBuffer->needVertexSamplerBind = true; - commandBuffer->needVertexResourceBind = true; + commandBuffer->needVertexStorageTextureBind = true; + commandBuffer->needVertexStorageBufferBind = true; commandBuffer->needVertexUniformBufferBind = true; commandBuffer->needFragmentSamplerBind = true; - commandBuffer->needFragmentResourceBind = true; + commandBuffer->needFragmentStorageTextureBind = true; + commandBuffer->needFragmentStorageBufferBind = true; commandBuffer->needFragmentUniformBufferBind = true; - commandBuffer->needComputeUAVBind = true; - commandBuffer->needComputeSRVBind = true; commandBuffer->needComputeUniformBufferBind = true; SDL_zeroa(commandBuffer->vertexSamplers); - SDL_zeroa(commandBuffer->vertexShaderResourceViews); + SDL_zeroa(commandBuffer->vertexSamplerTextures); + SDL_zeroa(commandBuffer->vertexStorageTextures); + SDL_zeroa(commandBuffer->vertexStorageBuffers); + SDL_zeroa(commandBuffer->fragmentSamplers); - SDL_zeroa(commandBuffer->fragmentShaderResourceViews); - SDL_zeroa(commandBuffer->computeShaderResourceViews); - SDL_zeroa(commandBuffer->computeUnorderedAccessViews); + SDL_zeroa(commandBuffer->fragmentSamplerTextures); + SDL_zeroa(commandBuffer->fragmentStorageTextures); + SDL_zeroa(commandBuffer->fragmentStorageBuffers); + + SDL_zeroa(commandBuffer->computeSamplers); + SDL_zeroa(commandBuffer->computeSamplerTextures); + SDL_zeroa(commandBuffer->computeReadOnlyStorageTextures); + SDL_zeroa(commandBuffer->computeReadOnlyStorageBuffers); + SDL_zeroa(commandBuffer->computeWriteOnlyStorageTextureSubresources); + SDL_zeroa(commandBuffer->computeWriteOnlyStorageBuffers); D3D11_INTERNAL_AcquireFence(commandBuffer); commandBuffer->autoReleaseFence = 1; @@ -3489,11 +3505,6 @@ static void D3D11_BeginRenderPass( SDL_GPUViewport viewport; SDL_Rect scissorRect; - d3d11CommandBuffer->needVertexSamplerBind = true; - d3d11CommandBuffer->needVertexResourceBind = true; - d3d11CommandBuffer->needFragmentSamplerBind = true; - d3d11CommandBuffer->needFragmentResourceBind = true; - // Clear the bound targets for the current command buffer for (Uint32 i = 0; i < MAX_COLOR_TARGET_BINDINGS; i += 1) { d3d11CommandBuffer->colorTargetResolveTexture[i] = NULL; @@ -3717,8 +3728,14 @@ static void D3D11_BindGraphicsPipeline( } } - // Mark that uniform bindings are needed + // Mark that bindings are needed + d3d11CommandBuffer->needVertexSamplerBind = true; + d3d11CommandBuffer->needVertexStorageTextureBind = true; + d3d11CommandBuffer->needVertexStorageBufferBind = true; d3d11CommandBuffer->needVertexUniformBufferBind = true; + d3d11CommandBuffer->needFragmentSamplerBind = true; + d3d11CommandBuffer->needFragmentStorageTextureBind = true; + d3d11CommandBuffer->needFragmentStorageBufferBind = true; d3d11CommandBuffer->needFragmentUniformBufferBind = true; } @@ -3776,14 +3793,13 @@ static void D3D11_BindVertexSamplers( textureContainer->activeTexture); d3d11CommandBuffer->vertexSamplers[firstSlot + i] = - ((D3D11Sampler *)textureSamplerBindings[i].sampler)->handle; + (D3D11Sampler *)textureSamplerBindings[i].sampler; - d3d11CommandBuffer->vertexShaderResourceViews[firstSlot + i] = - textureContainer->activeTexture->shaderView; + d3d11CommandBuffer->vertexSamplerTextures[firstSlot + i] = + textureContainer->activeTexture; } d3d11CommandBuffer->needVertexSamplerBind = true; - d3d11CommandBuffer->needVertexResourceBind = true; } static void D3D11_BindVertexStorageTextures( @@ -3801,11 +3817,11 @@ static void D3D11_BindVertexStorageTextures( d3d11CommandBuffer, textureContainer->activeTexture); - d3d11CommandBuffer->vertexShaderResourceViews[firstSlot + i + - d3d11CommandBuffer->graphicsPipeline->vertexSamplerCount] = textureContainer->activeTexture->shaderView; + d3d11CommandBuffer->vertexStorageTextures[firstSlot + i] = + textureContainer->activeTexture; } - d3d11CommandBuffer->needVertexResourceBind = true; + d3d11CommandBuffer->needVertexStorageTextureBind = true; } static void D3D11_BindVertexStorageBuffers( @@ -3825,12 +3841,11 @@ static void D3D11_BindVertexStorageBuffers( d3d11CommandBuffer, bufferContainer->activeBuffer); - d3d11CommandBuffer->vertexShaderResourceViews[firstSlot + i + - d3d11CommandBuffer->graphicsPipeline->vertexSamplerCount + - d3d11CommandBuffer->graphicsPipeline->vertexStorageTextureCount] = bufferContainer->activeBuffer->srv; + d3d11CommandBuffer->vertexStorageBuffers[firstSlot + i] = + bufferContainer->activeBuffer; } - d3d11CommandBuffer->needVertexResourceBind = true; + d3d11CommandBuffer->needVertexStorageBufferBind = true; } static void D3D11_BindFragmentSamplers( @@ -3849,14 +3864,13 @@ static void D3D11_BindFragmentSamplers( textureContainer->activeTexture); d3d11CommandBuffer->fragmentSamplers[firstSlot + i] = - ((D3D11Sampler *)textureSamplerBindings[i].sampler)->handle; + (D3D11Sampler *)textureSamplerBindings[i].sampler; - d3d11CommandBuffer->fragmentShaderResourceViews[firstSlot + i] = - textureContainer->activeTexture->shaderView; + d3d11CommandBuffer->fragmentSamplerTextures[firstSlot + i] = + (D3D11Texture *)textureContainer->activeTexture; } d3d11CommandBuffer->needFragmentSamplerBind = true; - d3d11CommandBuffer->needFragmentResourceBind = true; } static void D3D11_BindFragmentStorageTextures( @@ -3874,11 +3888,11 @@ static void D3D11_BindFragmentStorageTextures( d3d11CommandBuffer, textureContainer->activeTexture); - d3d11CommandBuffer->fragmentShaderResourceViews[firstSlot + i + - d3d11CommandBuffer->graphicsPipeline->fragmentSamplerCount] = textureContainer->activeTexture->shaderView; + d3d11CommandBuffer->fragmentStorageTextures[firstSlot + i] = + textureContainer->activeTexture; } - d3d11CommandBuffer->needFragmentResourceBind = true; + d3d11CommandBuffer->needFragmentStorageTextureBind = true; } static void D3D11_BindFragmentStorageBuffers( @@ -3898,12 +3912,11 @@ static void D3D11_BindFragmentStorageBuffers( d3d11CommandBuffer, bufferContainer->activeBuffer); - d3d11CommandBuffer->fragmentShaderResourceViews[firstSlot + i + - d3d11CommandBuffer->graphicsPipeline->fragmentSamplerCount + - d3d11CommandBuffer->graphicsPipeline->fragmentStorageTextureCount] = bufferContainer->activeBuffer->srv; + d3d11CommandBuffer->fragmentStorageBuffers[firstSlot + i] = + bufferContainer->activeBuffer; } - d3d11CommandBuffer->needFragmentResourceBind = true; + d3d11CommandBuffer->needFragmentStorageBufferBind = true; } static void D3D11_INTERNAL_BindGraphicsResources( @@ -3911,18 +3924,8 @@ static void D3D11_INTERNAL_BindGraphicsResources( { D3D11GraphicsPipeline *graphicsPipeline = commandBuffer->graphicsPipeline; - Uint32 vertexResourceCount = - graphicsPipeline->vertexSamplerCount + - graphicsPipeline->vertexStorageTextureCount + - graphicsPipeline->vertexStorageBufferCount; - - Uint32 fragmentResourceCount = - graphicsPipeline->fragmentSamplerCount + - graphicsPipeline->fragmentStorageTextureCount + - graphicsPipeline->fragmentStorageBufferCount; - ID3D11Buffer *nullBuf = NULL; - Uint32 offsetInConstants, blockSizeInConstants, i; + Uint32 offsetInConstants, blockSizeInConstants; if (commandBuffer->needVertexBufferBind) { ID3D11DeviceContext_IASetVertexBuffers( @@ -3936,30 +3939,68 @@ static void D3D11_INTERNAL_BindGraphicsResources( if (commandBuffer->needVertexSamplerBind) { if (graphicsPipeline->vertexSamplerCount > 0) { + ID3D11SamplerState *samplerStates[MAX_TEXTURE_SAMPLERS_PER_STAGE]; + ID3D11ShaderResourceView *srvs[MAX_TEXTURE_SAMPLERS_PER_STAGE]; + + for (Uint32 i = 0; i < graphicsPipeline->vertexSamplerCount; i += 1) { + samplerStates[i] = commandBuffer->vertexSamplers[i]->handle; + srvs[i] = commandBuffer->vertexSamplerTextures[i]->shaderView; + } + ID3D11DeviceContext_VSSetSamplers( commandBuffer->context, 0, graphicsPipeline->vertexSamplerCount, - commandBuffer->vertexSamplers); + samplerStates); + + ID3D11DeviceContext_VSSetShaderResources( + commandBuffer->context, + 0, + graphicsPipeline->vertexSamplerCount, + srvs); } commandBuffer->needVertexSamplerBind = false; } - if (commandBuffer->needVertexResourceBind) { - if (vertexResourceCount > 0) { + if (commandBuffer->needVertexStorageTextureBind) { + if (graphicsPipeline->vertexStorageTextureCount > 0) { + ID3D11ShaderResourceView *srvs[MAX_STORAGE_TEXTURES_PER_STAGE]; + + for (Uint32 i = 0; i < graphicsPipeline->vertexStorageTextureCount; i += 1) { + srvs[i] = commandBuffer->vertexStorageTextures[i]->shaderView; + } + ID3D11DeviceContext_VSSetShaderResources( commandBuffer->context, - 0, - vertexResourceCount, - commandBuffer->vertexShaderResourceViews); + graphicsPipeline->vertexSamplerCount, + graphicsPipeline->vertexStorageTextureCount, + srvs); } - commandBuffer->needVertexResourceBind = false; + commandBuffer->needVertexStorageTextureBind = false; + } + + if (commandBuffer->needVertexStorageBufferBind) { + if (graphicsPipeline->vertexStorageBufferCount > 0) { + ID3D11ShaderResourceView *srvs[MAX_STORAGE_BUFFERS_PER_STAGE]; + + for (Uint32 i = 0; i < graphicsPipeline->vertexStorageBufferCount; i += 1) { + srvs[i] = commandBuffer->vertexStorageBuffers[i]->srv; + } + + ID3D11DeviceContext_VSSetShaderResources( + commandBuffer->context, + graphicsPipeline->vertexSamplerCount + graphicsPipeline->vertexStorageTextureCount, + graphicsPipeline->vertexStorageBufferCount, + srvs); + } + + commandBuffer->needVertexStorageBufferBind = false; } if (commandBuffer->needVertexUniformBufferBind) { - for (i = 0; i < graphicsPipeline->vertexUniformBufferCount; i += 1) { + for (Uint32 i = 0; i < graphicsPipeline->vertexUniformBufferCount; i += 1) { /* stupid workaround for god awful D3D11 drivers * see: https://learn.microsoft.com/en-us/windows/win32/api/d3d11_1/nf-d3d11_1-id3d11devicecontext1-vssetconstantbuffers1#calling-vssetconstantbuffers1-with-command-list-emulation */ @@ -3986,30 +4027,68 @@ static void D3D11_INTERNAL_BindGraphicsResources( if (commandBuffer->needFragmentSamplerBind) { if (graphicsPipeline->fragmentSamplerCount > 0) { + ID3D11SamplerState *samplerStates[MAX_TEXTURE_SAMPLERS_PER_STAGE]; + ID3D11ShaderResourceView *srvs[MAX_TEXTURE_SAMPLERS_PER_STAGE]; + + for (Uint32 i = 0; i < graphicsPipeline->fragmentSamplerCount; i += 1) { + samplerStates[i] = commandBuffer->fragmentSamplers[i]->handle; + srvs[i] = commandBuffer->fragmentSamplerTextures[i]->shaderView; + } + ID3D11DeviceContext_PSSetSamplers( commandBuffer->context, 0, graphicsPipeline->fragmentSamplerCount, - commandBuffer->fragmentSamplers); + samplerStates); + + ID3D11DeviceContext_PSSetShaderResources( + commandBuffer->context, + 0, + graphicsPipeline->fragmentSamplerCount, + srvs); } commandBuffer->needFragmentSamplerBind = false; } - if (commandBuffer->needFragmentResourceBind) { - if (fragmentResourceCount > 0) { + if (commandBuffer->needFragmentStorageTextureBind) { + if (graphicsPipeline->fragmentStorageTextureCount > 0) { + ID3D11ShaderResourceView *srvs[MAX_STORAGE_TEXTURES_PER_STAGE]; + + for (Uint32 i = 0; i < graphicsPipeline->fragmentStorageTextureCount; i += 1) { + srvs[i] = commandBuffer->fragmentStorageTextures[i]->shaderView; + } + ID3D11DeviceContext_PSSetShaderResources( commandBuffer->context, - 0, - fragmentResourceCount, - commandBuffer->fragmentShaderResourceViews); + graphicsPipeline->fragmentSamplerCount, + graphicsPipeline->fragmentStorageTextureCount, + srvs); } - commandBuffer->needFragmentResourceBind = false; + commandBuffer->needFragmentStorageTextureBind = false; + } + + if (commandBuffer->needFragmentStorageBufferBind) { + if (graphicsPipeline->fragmentStorageBufferCount > 0) { + ID3D11ShaderResourceView *srvs[MAX_STORAGE_BUFFERS_PER_STAGE]; + + for (Uint32 i = 0; i < graphicsPipeline->fragmentStorageBufferCount; i += 1) { + srvs[i] = commandBuffer->fragmentStorageBuffers[i]->srv; + } + + ID3D11DeviceContext_PSSetShaderResources( + commandBuffer->context, + graphicsPipeline->fragmentSamplerCount + graphicsPipeline->fragmentStorageTextureCount, + graphicsPipeline->fragmentStorageBufferCount, + srvs); + } + + commandBuffer->needFragmentStorageBufferBind = false; } if (commandBuffer->needFragmentUniformBufferBind) { - for (i = 0; i < graphicsPipeline->fragmentUniformBufferCount; i += 1) { + for (Uint32 i = 0; i < graphicsPipeline->fragmentUniformBufferCount; i += 1) { /* stupid workaround for god awful D3D11 drivers * see: https://learn.microsoft.com/en-us/windows/win32/api/d3d11_1/nf-d3d11_1-id3d11devicecontext1-pssetconstantbuffers1#calling-pssetconstantbuffers1-with-command-list-emulation */ @@ -4127,6 +4206,7 @@ static void D3D11_EndRenderPass( SDL_GPUCommandBuffer *commandBuffer) { D3D11CommandBuffer *d3d11CommandBuffer = (D3D11CommandBuffer *)commandBuffer; + D3D11Renderer *renderer = d3d11CommandBuffer->renderer; Uint32 i; // Set render target slots to NULL to avoid NULL set behavior @@ -4134,7 +4214,7 @@ static void D3D11_EndRenderPass( ID3D11DeviceContext_OMSetRenderTargets( d3d11CommandBuffer->context, MAX_COLOR_TARGET_BINDINGS, - nullRTVs, + renderer->nullRTVs, NULL); // Resolve MSAA color render targets @@ -4150,16 +4230,44 @@ static void D3D11_EndRenderPass( } } + ID3D11DeviceContext_VSSetSamplers( + d3d11CommandBuffer->context, + 0, + MAX_TEXTURE_SAMPLERS_PER_STAGE, + renderer->nullSamplers); + + ID3D11DeviceContext_VSSetShaderResources( + d3d11CommandBuffer->context, + 0, + MAX_TEXTURE_SAMPLERS_PER_STAGE * 2 + MAX_STORAGE_TEXTURES_PER_STAGE + MAX_STORAGE_BUFFERS_PER_STAGE, + renderer->nullSRVs); + + ID3D11DeviceContext_PSSetSamplers( + d3d11CommandBuffer->context, + 0, + MAX_TEXTURE_SAMPLERS_PER_STAGE, + renderer->nullSamplers); + + ID3D11DeviceContext_PSSetShaderResources( + d3d11CommandBuffer->context, + 0, + MAX_TEXTURE_SAMPLERS_PER_STAGE * 2 + MAX_STORAGE_TEXTURES_PER_STAGE + MAX_STORAGE_BUFFERS_PER_STAGE, + renderer->nullSRVs); + // Reset bind state SDL_zeroa(d3d11CommandBuffer->vertexBuffers); SDL_zeroa(d3d11CommandBuffer->vertexBufferOffsets); d3d11CommandBuffer->vertexBufferCount = 0; SDL_zeroa(d3d11CommandBuffer->vertexSamplers); - SDL_zeroa(d3d11CommandBuffer->vertexShaderResourceViews); + SDL_zeroa(d3d11CommandBuffer->vertexSamplerTextures); + SDL_zeroa(d3d11CommandBuffer->vertexStorageTextures); + SDL_zeroa(d3d11CommandBuffer->vertexStorageBuffers); SDL_zeroa(d3d11CommandBuffer->fragmentSamplers); - SDL_zeroa(d3d11CommandBuffer->fragmentShaderResourceViews); + SDL_zeroa(d3d11CommandBuffer->fragmentSamplerTextures); + SDL_zeroa(d3d11CommandBuffer->fragmentStorageTextures); + SDL_zeroa(d3d11CommandBuffer->fragmentStorageBuffers); } static void D3D11_PushVertexUniformData( @@ -4229,13 +4337,10 @@ static void D3D11_BeginComputePass( D3D11TextureSubresource *textureSubresource; D3D11BufferContainer *bufferContainer; D3D11Buffer *buffer; - Uint32 i; + ID3D11UnorderedAccessView *uavs[MAX_COMPUTE_WRITE_TEXTURES + MAX_COMPUTE_WRITE_BUFFERS]; - for (i = 0; i < numStorageTextureBindings; i += 1) { + for (Uint32 i = 0; i < numStorageTextureBindings; i += 1) { textureContainer = (D3D11TextureContainer *)storageTextureBindings[i].texture; - if (!(textureContainer->header.info.usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE)) { - SDL_LogError(SDL_LOG_CATEGORY_GPU, "Attempted to bind read-only texture as compute write texture"); - } textureSubresource = D3D11_INTERNAL_PrepareTextureSubresourceForWrite( d3d11CommandBuffer->renderer, @@ -4248,10 +4353,10 @@ static void D3D11_BeginComputePass( d3d11CommandBuffer, textureSubresource->parent); - d3d11CommandBuffer->computeUnorderedAccessViews[i] = textureSubresource->uav; + d3d11CommandBuffer->computeWriteOnlyStorageTextureSubresources[i] = textureSubresource; } - for (i = 0; i < numStorageBufferBindings; i += 1) { + for (Uint32 i = 0; i < numStorageBufferBindings; i += 1) { bufferContainer = (D3D11BufferContainer *)storageBufferBindings[i].buffer; buffer = D3D11_INTERNAL_PrepareBufferForWrite( @@ -4263,10 +4368,23 @@ static void D3D11_BeginComputePass( d3d11CommandBuffer, buffer); - d3d11CommandBuffer->computeUnorderedAccessViews[i + numStorageTextureBindings] = buffer->uav; + d3d11CommandBuffer->computeWriteOnlyStorageBuffers[i] = buffer; } - d3d11CommandBuffer->needComputeUAVBind = true; + for (Uint32 i = 0; i < numStorageTextureBindings; i += 1) { + uavs[i] = d3d11CommandBuffer->computeWriteOnlyStorageTextureSubresources[i]->uav; + } + + for (Uint32 i = 0; i < numStorageBufferBindings; i += 1) { + uavs[numStorageTextureBindings + i] = d3d11CommandBuffer->computeWriteOnlyStorageBuffers[i]->uav; + } + + ID3D11DeviceContext_CSSetUnorderedAccessViews( + d3d11CommandBuffer->context, + 0, + numStorageTextureBindings + numStorageBufferBindings, + uavs, + NULL); } static void D3D11_BindComputePipeline( @@ -4292,9 +4410,37 @@ static void D3D11_BindComputePipeline( } } + d3d11CommandBuffer->needComputeSamplerBind = true; + d3d11CommandBuffer->needComputeReadOnlyTextureBind = true; + d3d11CommandBuffer->needComputeReadOnlyBufferBind = true; d3d11CommandBuffer->needComputeUniformBufferBind = true; } +static void D3D11_BindComputeSamplers( + SDL_GPUCommandBuffer *commandBuffer, + Uint32 firstSlot, + const SDL_GPUTextureSamplerBinding *textureSamplerBindings, + Uint32 numBindings) +{ + D3D11CommandBuffer *d3d11CommandBuffer = (D3D11CommandBuffer *)commandBuffer; + + for (Uint32 i = 0; i < numBindings; i += 1) { + D3D11TextureContainer *textureContainer = (D3D11TextureContainer *)textureSamplerBindings[i].texture; + + D3D11_INTERNAL_TrackTexture( + d3d11CommandBuffer, + textureContainer->activeTexture); + + d3d11CommandBuffer->computeSamplers[firstSlot + i] = + (D3D11Sampler *)textureSamplerBindings[i].sampler; + + d3d11CommandBuffer->computeSamplerTextures[firstSlot + i] = + textureContainer->activeTexture; + } + + d3d11CommandBuffer->needComputeSamplerBind = true; +} + static void D3D11_BindComputeStorageTextures( SDL_GPUCommandBuffer *commandBuffer, Uint32 firstSlot, @@ -4310,11 +4456,11 @@ static void D3D11_BindComputeStorageTextures( d3d11CommandBuffer, textureContainer->activeTexture); - d3d11CommandBuffer->computeShaderResourceViews[firstSlot + i] = - textureContainer->activeTexture->shaderView; + d3d11CommandBuffer->computeReadOnlyStorageTextures[firstSlot + i] = + textureContainer->activeTexture; } - d3d11CommandBuffer->needComputeSRVBind = true; + d3d11CommandBuffer->needComputeReadOnlyTextureBind = true; } static void D3D11_BindComputeStorageBuffers( @@ -4334,11 +4480,11 @@ static void D3D11_BindComputeStorageBuffers( d3d11CommandBuffer, bufferContainer->activeBuffer); - d3d11CommandBuffer->computeShaderResourceViews[firstSlot + i + - d3d11CommandBuffer->computePipeline->numReadonlyStorageTextures] = bufferContainer->activeBuffer->srv; + d3d11CommandBuffer->computeReadOnlyStorageBuffers[firstSlot + i] = + bufferContainer->activeBuffer; } - d3d11CommandBuffer->needComputeSRVBind = true; + d3d11CommandBuffer->needComputeReadOnlyBufferBind = true; } static void D3D11_PushComputeUniformData( @@ -4360,40 +4506,73 @@ static void D3D11_INTERNAL_BindComputeResources( { D3D11ComputePipeline *computePipeline = commandBuffer->computePipeline; - Uint32 readOnlyResourceCount = - computePipeline->numReadonlyStorageTextures + - computePipeline->numReadonlyStorageBuffers; - - Uint32 writeOnlyResourceCount = - computePipeline->numWriteonlyStorageTextures + - computePipeline->numWriteonlyStorageBuffers; - ID3D11Buffer *nullBuf = NULL; - Uint32 offsetInConstants, blockSizeInConstants, i; + Uint32 offsetInConstants, blockSizeInConstants; - if (commandBuffer->needComputeUAVBind) { - ID3D11DeviceContext_CSSetUnorderedAccessViews( - commandBuffer->context, - 0, - writeOnlyResourceCount, - commandBuffer->computeUnorderedAccessViews, - NULL); + if (commandBuffer->needComputeSamplerBind) { + if (computePipeline->numSamplers > 0) { + ID3D11SamplerState *samplers[MAX_TEXTURE_SAMPLERS_PER_STAGE]; + ID3D11ShaderResourceView *srvs[MAX_TEXTURE_SAMPLERS_PER_STAGE]; - commandBuffer->needComputeUAVBind = false; + for (Uint32 i = 0; i < computePipeline->numSamplers; i += 1) { + samplers[i] = commandBuffer->computeSamplers[i]->handle; + srvs[i] = commandBuffer->computeSamplerTextures[i]->shaderView; + } + + ID3D11DeviceContext_CSSetSamplers( + commandBuffer->context, + 0, + computePipeline->numSamplers, + samplers); + + ID3D11DeviceContext_CSSetShaderResources( + commandBuffer->context, + 0, + computePipeline->numSamplers, + srvs); + } + + commandBuffer->needComputeSamplerBind = false; } - if (commandBuffer->needComputeSRVBind) { - ID3D11DeviceContext_CSSetShaderResources( - commandBuffer->context, - 0, - readOnlyResourceCount, - commandBuffer->computeShaderResourceViews); + if (commandBuffer->needComputeReadOnlyTextureBind) { + if (computePipeline->numReadonlyStorageTextures > 0) { + ID3D11ShaderResourceView *srvs[MAX_STORAGE_TEXTURES_PER_STAGE]; - commandBuffer->needComputeSRVBind = false; + for (Uint32 i = 0; i < computePipeline->numReadonlyStorageTextures; i += 1) { + srvs[i] = commandBuffer->computeReadOnlyStorageTextures[i]->shaderView; + } + + ID3D11DeviceContext_CSSetShaderResources( + commandBuffer->context, + computePipeline->numSamplers, + computePipeline->numReadonlyStorageTextures, + srvs); + } + + commandBuffer->needComputeReadOnlyTextureBind = false; + } + + if (commandBuffer->needComputeReadOnlyBufferBind) { + if (computePipeline->numReadonlyStorageBuffers > 0) { + ID3D11ShaderResourceView *srvs[MAX_STORAGE_TEXTURES_PER_STAGE]; + + for (Uint32 i = 0; i < computePipeline->numReadonlyStorageBuffers; i += 1) { + srvs[i] = commandBuffer->computeReadOnlyStorageBuffers[i]->srv; + } + + ID3D11DeviceContext_CSSetShaderResources( + commandBuffer->context, + computePipeline->numSamplers + computePipeline->numReadonlyStorageTextures, + computePipeline->numReadonlyStorageBuffers, + srvs); + } + + commandBuffer->needComputeReadOnlyBufferBind = false; } if (commandBuffer->needComputeUniformBufferBind) { - for (i = 0; i < computePipeline->numUniformBuffers; i += 1) { + for (Uint32 i = 0; i < computePipeline->numUniformBuffers; i += 1) { /* stupid workaround for god awful D3D11 drivers * see: https://learn.microsoft.com/en-us/windows/win32/api/d3d11_1/nf-d3d11_1-id3d11devicecontext1-vssetconstantbuffers1#calling-vssetconstantbuffers1-with-command-list-emulation */ @@ -4456,6 +4635,7 @@ static void D3D11_EndComputePass( SDL_GPUCommandBuffer *commandBuffer) { D3D11CommandBuffer *d3d11CommandBuffer = (D3D11CommandBuffer *)commandBuffer; + D3D11Renderer *renderer = d3d11CommandBuffer->renderer; // reset UAV slots to avoid NULL set behavior // https://learn.microsoft.com/en-us/windows/win32/api/d3d11/nf-d3d11-id3d11devicecontext-cssetshaderresources @@ -4463,14 +4643,30 @@ static void D3D11_EndComputePass( d3d11CommandBuffer->context, 0, MAX_COMPUTE_WRITE_TEXTURES + MAX_COMPUTE_WRITE_BUFFERS, - nullUAVs, + renderer->nullUAVs, NULL); + ID3D11DeviceContext_CSSetSamplers( + d3d11CommandBuffer->context, + 0, + MAX_TEXTURE_SAMPLERS_PER_STAGE, + renderer->nullSamplers); + + ID3D11DeviceContext_CSSetShaderResources( + d3d11CommandBuffer->context, + 0, + MAX_TEXTURE_SAMPLERS_PER_STAGE + MAX_STORAGE_TEXTURES_PER_STAGE + MAX_STORAGE_BUFFERS_PER_STAGE, + renderer->nullSRVs); + d3d11CommandBuffer->computePipeline = NULL; // Reset bind state - SDL_zeroa(d3d11CommandBuffer->computeUnorderedAccessViews); - SDL_zeroa(d3d11CommandBuffer->computeShaderResourceViews); + SDL_zeroa(d3d11CommandBuffer->computeSamplers); + SDL_zeroa(d3d11CommandBuffer->computeSamplerTextures); + SDL_zeroa(d3d11CommandBuffer->computeReadOnlyStorageTextures); + SDL_zeroa(d3d11CommandBuffer->computeReadOnlyStorageBuffers); + SDL_zeroa(d3d11CommandBuffer->computeWriteOnlyStorageTextureSubresources); + SDL_zeroa(d3d11CommandBuffer->computeWriteOnlyStorageBuffers); } // Fence Cleanup @@ -6185,10 +6381,10 @@ tryCreateDevice: // Initialize null states - SDL_zeroa(nullRTVs); - SDL_zeroa(nullSRVs); - SDL_zeroa(nullSamplers); - SDL_zeroa(nullUAVs); + SDL_zeroa(renderer->nullRTVs); + SDL_zeroa(renderer->nullSRVs); + SDL_zeroa(renderer->nullSamplers); + SDL_zeroa(renderer->nullUAVs); // Initialize built-in pipelines D3D11_INTERNAL_InitBlitPipelines(renderer); diff --git a/src/gpu/d3d12/SDL_gpu_d3d12.c b/src/gpu/d3d12/SDL_gpu_d3d12.c index 87638011fa..df8e74a8d7 100644 --- a/src/gpu/d3d12/SDL_gpu_d3d12.c +++ b/src/gpu/d3d12/SDL_gpu_d3d12.c @@ -694,6 +694,7 @@ struct D3D12CommandBuffer bool needFragmentStorageBufferBind; bool needFragmentUniformBufferBind[MAX_UNIFORM_BUFFERS_PER_STAGE]; + bool needComputeSamplerBind; bool needComputeReadOnlyStorageTextureBind; bool needComputeReadOnlyStorageBufferBind; bool needComputeUniformBufferBind[MAX_UNIFORM_BUFFERS_PER_STAGE]; @@ -714,6 +715,8 @@ struct D3D12CommandBuffer D3D12Buffer *fragmentStorageBuffers[MAX_STORAGE_BUFFERS_PER_STAGE]; D3D12UniformBuffer *fragmentUniformBuffers[MAX_UNIFORM_BUFFERS_PER_STAGE]; + D3D12Texture *computeSamplerTextures[MAX_TEXTURE_SAMPLERS_PER_STAGE]; + D3D12Sampler *computeSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE]; D3D12Texture *computeReadOnlyStorageTextures[MAX_STORAGE_TEXTURES_PER_STAGE]; D3D12Buffer *computeReadOnlyStorageBuffers[MAX_STORAGE_BUFFERS_PER_STAGE]; D3D12TextureSubresource *computeWriteOnlyStorageTextureSubresources[MAX_COMPUTE_WRITE_TEXTURES]; @@ -805,6 +808,8 @@ typedef struct D3D12ComputeRootSignature { ID3D12RootSignature *handle; + Sint32 samplerRootIndex; + Sint32 samplerTextureRootIndex; Sint32 readOnlyStorageTextureRootIndex; Sint32 readOnlyStorageBufferRootIndex; Sint32 writeOnlyStorageTextureRootIndex; @@ -817,10 +822,11 @@ struct D3D12ComputePipeline ID3D12PipelineState *pipelineState; D3D12ComputeRootSignature *rootSignature; - Uint32 num_readonly_storage_textures; - Uint32 num_readonly_storage_buffers; - Uint32 num_writeonly_storage_textures; - Uint32 num_writeonly_storage_buffers; + Uint32 numSamplers; + Uint32 numReadOnlyStorageTextures; + Uint32 numReadOnlyStorageBuffers; + Uint32 numWriteOnlyStorageTextures; + Uint32 numWriteOnlyStorageBuffers; Uint32 numUniformBuffers; SDL_AtomicInt referenceCount; @@ -2145,6 +2151,8 @@ static D3D12ComputeRootSignature *D3D12_INTERNAL_CreateComputeRootSignature( SDL_zeroa(descriptorRanges); SDL_zero(rootParameter); + d3d12ComputeRootSignature->samplerRootIndex = -1; + d3d12ComputeRootSignature->samplerTextureRootIndex = -1; d3d12ComputeRootSignature->readOnlyStorageTextureRootIndex = -1; d3d12ComputeRootSignature->readOnlyStorageBufferRootIndex = -1; d3d12ComputeRootSignature->writeOnlyStorageTextureRootIndex = -1; @@ -2154,10 +2162,44 @@ static D3D12ComputeRootSignature *D3D12_INTERNAL_CreateComputeRootSignature( d3d12ComputeRootSignature->uniformBufferRootIndex[i] = -1; } + if (createInfo->num_samplers) { + descriptorRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; + descriptorRange.NumDescriptors = createInfo->num_samplers; + descriptorRange.BaseShaderRegister = 0; + descriptorRange.RegisterSpace = 0; + descriptorRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; + descriptorRanges[rangeCount] = descriptorRange; + + rootParameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + rootParameter.DescriptorTable.NumDescriptorRanges = 1; + rootParameter.DescriptorTable.pDescriptorRanges = &descriptorRanges[rangeCount]; + rootParameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; // ALL is used for compute + rootParameters[parameterCount] = rootParameter; + d3d12ComputeRootSignature->samplerRootIndex = parameterCount; + rangeCount += 1; + parameterCount += 1; + + descriptorRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + descriptorRange.NumDescriptors = createInfo->num_samplers; + descriptorRange.BaseShaderRegister = 0; + descriptorRange.RegisterSpace = 0; + descriptorRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; + descriptorRanges[rangeCount] = descriptorRange; + + rootParameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + rootParameter.DescriptorTable.NumDescriptorRanges = 1; + rootParameter.DescriptorTable.pDescriptorRanges = &descriptorRanges[rangeCount]; + rootParameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; // ALL is used for compute + rootParameters[parameterCount] = rootParameter; + d3d12ComputeRootSignature->samplerTextureRootIndex = parameterCount; + rangeCount += 1; + parameterCount += 1; + } + if (createInfo->num_readonly_storage_textures) { descriptorRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; descriptorRange.NumDescriptors = createInfo->num_readonly_storage_textures; - descriptorRange.BaseShaderRegister = 0; + descriptorRange.BaseShaderRegister = createInfo->num_samplers; descriptorRange.RegisterSpace = 0; descriptorRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; descriptorRanges[rangeCount] = descriptorRange; @@ -2175,7 +2217,7 @@ static D3D12ComputeRootSignature *D3D12_INTERNAL_CreateComputeRootSignature( if (createInfo->num_readonly_storage_buffers) { descriptorRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; descriptorRange.NumDescriptors = createInfo->num_readonly_storage_buffers; - descriptorRange.BaseShaderRegister = createInfo->num_readonly_storage_textures; + descriptorRange.BaseShaderRegister = createInfo->num_samplers + createInfo->num_readonly_storage_textures; descriptorRange.RegisterSpace = 0; descriptorRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; descriptorRanges[rangeCount] = descriptorRange; @@ -2346,10 +2388,11 @@ static SDL_GPUComputePipeline *D3D12_CreateComputePipeline( computePipeline->pipelineState = pipelineState; computePipeline->rootSignature = rootSignature; - computePipeline->num_readonly_storage_textures = createinfo->num_readonly_storage_textures; - computePipeline->num_readonly_storage_buffers = createinfo->num_readonly_storage_buffers; - computePipeline->num_writeonly_storage_textures = createinfo->num_writeonly_storage_textures; - computePipeline->num_writeonly_storage_buffers = createinfo->num_writeonly_storage_buffers; + computePipeline->numSamplers = createinfo->num_samplers; + computePipeline->numReadOnlyStorageTextures = createinfo->num_readonly_storage_textures; + computePipeline->numReadOnlyStorageBuffers = createinfo->num_readonly_storage_buffers; + computePipeline->numWriteOnlyStorageTextures = createinfo->num_writeonly_storage_textures; + computePipeline->numWriteOnlyStorageBuffers = createinfo->num_writeonly_storage_buffers; computePipeline->numUniformBuffers = createinfo->num_uniform_buffers; SDL_AtomicSet(&computePipeline->referenceCount, 0); @@ -4889,6 +4932,7 @@ static void D3D12_BindComputePipeline( d3d12CommandBuffer->currentComputePipeline = pipeline; + d3d12CommandBuffer->needComputeSamplerBind = true; d3d12CommandBuffer->needComputeReadOnlyStorageTextureBind = true; d3d12CommandBuffer->needComputeReadOnlyStorageBufferBind = true; @@ -4906,8 +4950,8 @@ static void D3D12_BindComputePipeline( D3D12_INTERNAL_TrackComputePipeline(d3d12CommandBuffer, pipeline); // Bind write-only resources after setting root signature - if (pipeline->num_writeonly_storage_textures > 0) { - for (Uint32 i = 0; i < pipeline->num_writeonly_storage_textures; i += 1) { + if (pipeline->numWriteOnlyStorageTextures > 0) { + for (Uint32 i = 0; i < pipeline->numWriteOnlyStorageTextures; i += 1) { cpuHandles[i] = d3d12CommandBuffer->computeWriteOnlyStorageTextureSubresources[i]->uavHandle.cpuHandle; } @@ -4924,8 +4968,8 @@ static void D3D12_BindComputePipeline( gpuDescriptorHandle); } - if (pipeline->num_writeonly_storage_buffers > 0) { - for (Uint32 i = 0; i < pipeline->num_writeonly_storage_buffers; i += 1) { + if (pipeline->numWriteOnlyStorageBuffers > 0) { + for (Uint32 i = 0; i < pipeline->numWriteOnlyStorageBuffers; i += 1) { cpuHandles[i] = d3d12CommandBuffer->computeWriteOnlyStorageBuffers[i]->uavDescriptor.cpuHandle; } @@ -4943,6 +4987,32 @@ static void D3D12_BindComputePipeline( } } +static void D3D12_BindComputeSamplers( + SDL_GPUCommandBuffer *commandBuffer, + Uint32 firstSlot, + const SDL_GPUTextureSamplerBinding *textureSamplerBindings, + Uint32 numBindings) +{ + D3D12CommandBuffer *d3d12CommandBuffer = (D3D12CommandBuffer *)commandBuffer; + + for (Uint32 i = 0; i < numBindings; i += 1) { + D3D12TextureContainer *container = (D3D12TextureContainer *)textureSamplerBindings[i].texture; + + D3D12_INTERNAL_TrackSampler( + d3d12CommandBuffer, + (D3D12Sampler *)textureSamplerBindings[i].sampler); + + D3D12_INTERNAL_TrackTexture( + d3d12CommandBuffer, + container->activeTexture); + + d3d12CommandBuffer->computeSamplerTextures[firstSlot + i] = container->activeTexture; + d3d12CommandBuffer->computeSamplers[firstSlot + i] = (D3D12Sampler *)textureSamplerBindings[i].sampler; + } + + d3d12CommandBuffer->needComputeSamplerBind = true; +} + static void D3D12_BindComputeStorageTextures( SDL_GPUCommandBuffer *commandBuffer, Uint32 firstSlot, @@ -5033,9 +5103,46 @@ static void D3D12_INTERNAL_BindComputeResources( D3D12_CPU_DESCRIPTOR_HANDLE cpuHandles[MAX_TEXTURE_SAMPLERS_PER_STAGE]; D3D12_GPU_DESCRIPTOR_HANDLE gpuDescriptorHandle; + if (commandBuffer->needComputeSamplerBind) { + if (computePipeline->numSamplers > 0) { + for (Uint32 i = 0; i < computePipeline->numSamplers; i += 1) { + cpuHandles[i] = commandBuffer->computeSamplers[i]->handle.cpuHandle; + } + + D3D12_INTERNAL_WriteGPUDescriptors( + commandBuffer, + D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, + cpuHandles, + computePipeline->numSamplers, + &gpuDescriptorHandle); + + ID3D12GraphicsCommandList_SetComputeRootDescriptorTable( + commandBuffer->graphicsCommandList, + computePipeline->rootSignature->samplerRootIndex, + gpuDescriptorHandle); + + for (Uint32 i = 0; i < computePipeline->numSamplers; i += 1) { + cpuHandles[i] = commandBuffer->computeSamplerTextures[i]->srvHandle.cpuHandle; + } + + D3D12_INTERNAL_WriteGPUDescriptors( + commandBuffer, + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, + cpuHandles, + computePipeline->numSamplers, + &gpuDescriptorHandle); + + ID3D12GraphicsCommandList_SetComputeRootDescriptorTable( + commandBuffer->graphicsCommandList, + computePipeline->rootSignature->samplerTextureRootIndex, + gpuDescriptorHandle); + } + commandBuffer->needComputeSamplerBind = false; + } + if (commandBuffer->needComputeReadOnlyStorageTextureBind) { - if (computePipeline->num_readonly_storage_textures > 0) { - for (Uint32 i = 0; i < computePipeline->num_readonly_storage_textures; i += 1) { + if (computePipeline->numReadOnlyStorageTextures > 0) { + for (Uint32 i = 0; i < computePipeline->numReadOnlyStorageTextures; i += 1) { cpuHandles[i] = commandBuffer->computeReadOnlyStorageTextures[i]->srvHandle.cpuHandle; } @@ -5043,7 +5150,7 @@ static void D3D12_INTERNAL_BindComputeResources( commandBuffer, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, cpuHandles, - computePipeline->num_readonly_storage_textures, + computePipeline->numReadOnlyStorageTextures, &gpuDescriptorHandle); ID3D12GraphicsCommandList_SetComputeRootDescriptorTable( @@ -5055,8 +5162,8 @@ static void D3D12_INTERNAL_BindComputeResources( } if (commandBuffer->needComputeReadOnlyStorageBufferBind) { - if (computePipeline->num_readonly_storage_buffers > 0) { - for (Uint32 i = 0; i < computePipeline->num_readonly_storage_buffers; i += 1) { + if (computePipeline->numReadOnlyStorageBuffers > 0) { + for (Uint32 i = 0; i < computePipeline->numReadOnlyStorageBuffers; i += 1) { cpuHandles[i] = commandBuffer->computeReadOnlyStorageBuffers[i]->srvDescriptor.cpuHandle; } @@ -5064,7 +5171,7 @@ static void D3D12_INTERNAL_BindComputeResources( commandBuffer, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, cpuHandles, - computePipeline->num_readonly_storage_buffers, + computePipeline->numReadOnlyStorageBuffers, &gpuDescriptorHandle); ID3D12GraphicsCommandList_SetComputeRootDescriptorTable( @@ -5174,6 +5281,9 @@ static void D3D12_EndComputePass( } } + SDL_zeroa(d3d12CommandBuffer->computeSamplerTextures); + SDL_zeroa(d3d12CommandBuffer->computeSamplers); + d3d12CommandBuffer->currentComputePipeline = NULL; } @@ -6751,6 +6861,8 @@ static SDL_GPUCommandBuffer *D3D12_AcquireCommandBuffer( SDL_zeroa(commandBuffer->fragmentStorageBuffers); SDL_zeroa(commandBuffer->fragmentUniformBuffers); + SDL_zeroa(commandBuffer->computeSamplerTextures); + SDL_zeroa(commandBuffer->computeSamplers); SDL_zeroa(commandBuffer->computeReadOnlyStorageTextures); SDL_zeroa(commandBuffer->computeReadOnlyStorageBuffers); SDL_zeroa(commandBuffer->computeWriteOnlyStorageTextureSubresources); diff --git a/src/gpu/metal/SDL_gpu_metal.m b/src/gpu/metal/SDL_gpu_metal.m index a71a0f007b..149a0c05be 100644 --- a/src/gpu/metal/SDL_gpu_metal.m +++ b/src/gpu/metal/SDL_gpu_metal.m @@ -403,7 +403,7 @@ typedef struct MetalShader id library; id function; - Uint32 num_samplers; + Uint32 numSamplers; Uint32 numUniformBuffers; Uint32 numStorageBuffers; Uint32 numStorageTextures; @@ -434,14 +434,15 @@ typedef struct MetalGraphicsPipeline typedef struct MetalComputePipeline { id handle; - Uint32 num_readonly_storage_textures; - Uint32 num_writeonly_storage_textures; - Uint32 num_readonly_storage_buffers; - Uint32 num_writeonly_storage_buffers; + Uint32 numSamplers; + Uint32 numReadonlyStorageTextures; + Uint32 numWriteonlyStorageTextures; + Uint32 numReadonlyStorageBuffers; + Uint32 numWriteonlyStorageBuffers; Uint32 numUniformBuffers; - Uint32 threadcount_x; - Uint32 threadcount_y; - Uint32 threadcount_z; + Uint32 threadcountX; + Uint32 threadcountY; + Uint32 threadcountZ; } MetalComputePipeline; typedef struct MetalBuffer @@ -511,6 +512,7 @@ typedef struct MetalCommandBuffer bool needFragmentStorageBufferBind; bool needFragmentUniformBind; + bool needComputeSamplerBind; bool needComputeTextureBind; bool needComputeBufferBind; bool needComputeUniformBind; @@ -525,6 +527,8 @@ typedef struct MetalCommandBuffer id fragmentStorageTextures[MAX_STORAGE_TEXTURES_PER_STAGE]; id fragmentStorageBuffers[MAX_STORAGE_BUFFERS_PER_STAGE]; + id computeSamplerTextures[MAX_TEXTURE_SAMPLERS_PER_STAGE]; + id computeSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE]; id computeReadOnlyTextures[MAX_STORAGE_TEXTURES_PER_STAGE]; id computeReadOnlyBuffers[MAX_STORAGE_BUFFERS_PER_STAGE]; id computeWriteOnlyTextures[MAX_COMPUTE_WRITE_TEXTURES]; @@ -984,14 +988,15 @@ static SDL_GPUComputePipeline *METAL_CreateComputePipeline( pipeline = SDL_calloc(1, sizeof(MetalComputePipeline)); pipeline->handle = handle; - pipeline->num_readonly_storage_textures = createinfo->num_readonly_storage_textures; - pipeline->num_writeonly_storage_textures = createinfo->num_writeonly_storage_textures; - pipeline->num_readonly_storage_buffers = createinfo->num_readonly_storage_buffers; - pipeline->num_writeonly_storage_buffers = createinfo->num_writeonly_storage_buffers; + pipeline->numSamplers = createinfo->num_samplers; + pipeline->numReadonlyStorageTextures = createinfo->num_readonly_storage_textures; + pipeline->numWriteonlyStorageTextures = createinfo->num_writeonly_storage_textures; + pipeline->numReadonlyStorageBuffers = createinfo->num_readonly_storage_buffers; + pipeline->numWriteonlyStorageBuffers = createinfo->num_writeonly_storage_buffers; pipeline->numUniformBuffers = createinfo->num_uniform_buffers; - pipeline->threadcount_x = createinfo->threadcount_x; - pipeline->threadcount_y = createinfo->threadcount_y; - pipeline->threadcount_z = createinfo->threadcount_z; + pipeline->threadcountX = createinfo->threadcount_x; + pipeline->threadcountY = createinfo->threadcount_y; + pipeline->threadcountZ = createinfo->threadcount_z; return (SDL_GPUComputePipeline *)pipeline; } @@ -1123,11 +1128,11 @@ static SDL_GPUGraphicsPipeline *METAL_CreateGraphicsPipeline( result->depth_stencil_state = depthStencilState; result->rasterizerState = createinfo->rasterizer_state; result->primitiveType = createinfo->primitive_type; - result->vertexSamplerCount = vertexShader->num_samplers; + result->vertexSamplerCount = vertexShader->numSamplers; result->vertexUniformBufferCount = vertexShader->numUniformBuffers; result->vertexStorageBufferCount = vertexShader->numStorageBuffers; result->vertexStorageTextureCount = vertexShader->numStorageTextures; - result->fragmentSamplerCount = fragmentShader->num_samplers; + result->fragmentSamplerCount = fragmentShader->numSamplers; result->fragmentUniformBufferCount = fragmentShader->numUniformBuffers; result->fragmentStorageBufferCount = fragmentShader->numStorageBuffers; result->fragmentStorageTextureCount = fragmentShader->numStorageTextures; @@ -1309,7 +1314,7 @@ static SDL_GPUShader *METAL_CreateShader( result = SDL_calloc(1, sizeof(MetalShader)); result->library = libraryFunction.library; result->function = libraryFunction.function; - result->num_samplers = createinfo->num_samplers; + result->numSamplers = createinfo->num_samplers; result->numStorageBuffers = createinfo->num_storage_buffers; result->numStorageTextures = createinfo->num_storage_textures; result->numUniformBuffers = createinfo->num_uniform_buffers; @@ -2042,6 +2047,7 @@ static SDL_GPUCommandBuffer *METAL_AcquireCommandBuffer( commandBuffer->needFragmentStorageTextureBind = true; commandBuffer->needFragmentStorageBufferBind = true; commandBuffer->needFragmentUniformBind = true; + commandBuffer->needComputeSamplerBind = true; commandBuffer->needComputeBufferBind = true; commandBuffer->needComputeTextureBind = true; commandBuffer->needComputeUniformBind = true; @@ -2627,41 +2633,54 @@ static void METAL_INTERNAL_BindComputeResources( MetalCommandBuffer *commandBuffer) { MetalComputePipeline *computePipeline = commandBuffer->compute_pipeline; - NSUInteger offsets[MAX_STORAGE_BUFFERS_PER_STAGE] = { 0 }; // 8 is the max for both read and write-only + NSUInteger offsets[MAX_STORAGE_BUFFERS_PER_STAGE] = { 0 }; + + if (commandBuffer->needComputeSamplerBind) { + // Bind sampler textures + if (computePipeline->numSamplers > 0) { + [commandBuffer->computeEncoder setTextures:commandBuffer->computeSamplerTextures + withRange:NSMakeRange(0, computePipeline->numSamplers)]; + [commandBuffer->computeEncoder setSamplerStates:commandBuffer->computeSamplers + withRange:NSMakeRange(0, computePipeline->numSamplers)]; + } + commandBuffer->needComputeSamplerBind = false; + } if (commandBuffer->needComputeTextureBind) { // Bind read-only textures - if (computePipeline->num_readonly_storage_textures > 0) { + if (computePipeline->numReadonlyStorageTextures > 0) { [commandBuffer->computeEncoder setTextures:commandBuffer->computeReadOnlyTextures - withRange:NSMakeRange(0, computePipeline->num_readonly_storage_textures)]; + withRange:NSMakeRange( + computePipeline->numSamplers, + computePipeline->numReadonlyStorageTextures)]; } // Bind write-only textures - if (computePipeline->num_writeonly_storage_textures > 0) { + if (computePipeline->numWriteonlyStorageTextures > 0) { [commandBuffer->computeEncoder setTextures:commandBuffer->computeWriteOnlyTextures withRange:NSMakeRange( - computePipeline->num_readonly_storage_textures, - computePipeline->num_writeonly_storage_textures)]; + computePipeline->numSamplers + computePipeline->numReadonlyStorageTextures, + computePipeline->numWriteonlyStorageTextures)]; } commandBuffer->needComputeTextureBind = false; } if (commandBuffer->needComputeBufferBind) { // Bind read-only buffers - if (computePipeline->num_readonly_storage_buffers > 0) { + if (computePipeline->numReadonlyStorageBuffers > 0) { [commandBuffer->computeEncoder setBuffers:commandBuffer->computeReadOnlyBuffers offsets:offsets withRange:NSMakeRange(computePipeline->numUniformBuffers, - computePipeline->num_readonly_storage_buffers)]; + computePipeline->numReadonlyStorageBuffers)]; } // Bind write-only buffers - if (computePipeline->num_writeonly_storage_buffers > 0) { + if (computePipeline->numWriteonlyStorageBuffers > 0) { [commandBuffer->computeEncoder setBuffers:commandBuffer->computeWriteOnlyBuffers offsets:offsets withRange:NSMakeRange( computePipeline->numUniformBuffers + - computePipeline->num_readonly_storage_buffers, - computePipeline->num_writeonly_storage_buffers)]; + computePipeline->numReadonlyStorageBuffers, + computePipeline->numWriteonlyStorageBuffers)]; } commandBuffer->needComputeBufferBind = false; } @@ -3020,6 +3039,32 @@ static void METAL_BindComputePipeline( } } +static void METAL_BindComputeSamplers( + SDL_GPUCommandBuffer *commandBuffer, + Uint32 firstSlot, + const SDL_GPUTextureSamplerBinding *textureSamplerBindings, + Uint32 numBindings) +{ + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + MetalTextureContainer *textureContainer; + + for (Uint32 i = 0; i < numBindings; i += 1) { + textureContainer = (MetalTextureContainer *)textureSamplerBindings[i].texture; + + METAL_INTERNAL_TrackTexture( + metalCommandBuffer, + textureContainer->activeTexture); + + metalCommandBuffer->computeSamplers[firstSlot + i] = + ((MetalSampler *)textureSamplerBindings[i].sampler)->handle; + + metalCommandBuffer->computeSamplerTextures[firstSlot + i] = + textureContainer->activeTexture->handle; + } + + metalCommandBuffer->needComputeSamplerBind = true; +} + static void METAL_BindComputeStorageTextures( SDL_GPUCommandBuffer *commandBuffer, Uint32 firstSlot, @@ -3092,9 +3137,9 @@ static void METAL_DispatchCompute( MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; MTLSize threadgroups = MTLSizeMake(groupcountX, groupcountY, groupcountZ); MTLSize threadsPerThreadgroup = MTLSizeMake( - metalCommandBuffer->compute_pipeline->threadcount_x, - metalCommandBuffer->compute_pipeline->threadcount_y, - metalCommandBuffer->compute_pipeline->threadcount_z); + metalCommandBuffer->compute_pipeline->threadcountX, + metalCommandBuffer->compute_pipeline->threadcountY, + metalCommandBuffer->compute_pipeline->threadcountZ); METAL_INTERNAL_BindComputeResources(metalCommandBuffer); @@ -3113,9 +3158,9 @@ static void METAL_DispatchComputeIndirect( MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; MetalBuffer *metalBuffer = ((MetalBufferContainer *)buffer)->activeBuffer; MTLSize threadsPerThreadgroup = MTLSizeMake( - metalCommandBuffer->compute_pipeline->threadcount_x, - metalCommandBuffer->compute_pipeline->threadcount_y, - metalCommandBuffer->compute_pipeline->threadcount_z); + metalCommandBuffer->compute_pipeline->threadcountX, + metalCommandBuffer->compute_pipeline->threadcountY, + metalCommandBuffer->compute_pipeline->threadcountZ); METAL_INTERNAL_BindComputeResources(metalCommandBuffer); @@ -3136,6 +3181,10 @@ static void METAL_EndComputePass( [metalCommandBuffer->computeEncoder endEncoding]; metalCommandBuffer->computeEncoder = nil; + for (Uint32 i = 0; i < MAX_TEXTURE_SAMPLERS_PER_STAGE; i += 1) { + metalCommandBuffer->computeSamplers[i] = nil; + metalCommandBuffer->computeSamplerTextures[i] = nil; + } for (Uint32 i = 0; i < MAX_COMPUTE_WRITE_TEXTURES; i += 1) { metalCommandBuffer->computeWriteOnlyTextures[i] = nil; } @@ -3223,6 +3272,8 @@ static void METAL_INTERNAL_CleanCommandBuffer( commandBuffer->vertexTextures[i] = nil; commandBuffer->fragmentSamplers[i] = nil; commandBuffer->fragmentTextures[i] = nil; + commandBuffer->computeSamplers[i] = nil; + commandBuffer->computeSamplerTextures[i] = nil; } for (i = 0; i < MAX_STORAGE_TEXTURES_PER_STAGE; i += 1) { commandBuffer->vertexStorageTextures[i] = nil; diff --git a/src/gpu/vulkan/SDL_gpu_vulkan.c b/src/gpu/vulkan/SDL_gpu_vulkan.c index 976287ce4a..9138ff5aa2 100644 --- a/src/gpu/vulkan/SDL_gpu_vulkan.c +++ b/src/gpu/vulkan/SDL_gpu_vulkan.c @@ -891,12 +891,13 @@ typedef struct VulkanComputePipelineResourceLayout /* * Descriptor set layout is as follows: - * 0: read-only textures, then read-only buffers + * 0: samplers, then read-only textures, then read-only buffers * 1: write-only textures, then write-only buffers * 2: uniform buffers */ DescriptorSetPool descriptorSetPools[3]; + Uint32 numSamplers; Uint32 numReadonlyStorageTextures; Uint32 numReadonlyStorageBuffers; Uint32 numWriteonlyStorageTextures; @@ -1055,6 +1056,8 @@ typedef struct VulkanCommandBuffer Uint32 writeOnlyComputeStorageTextureSubresourceCount; VulkanBuffer *writeOnlyComputeStorageBuffers[MAX_COMPUTE_WRITE_BUFFERS]; + VulkanTexture *computeSamplerTextures[MAX_TEXTURE_SAMPLERS_PER_STAGE]; + VulkanSampler *computeSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE]; VulkanTexture *readOnlyComputeStorageTextures[MAX_STORAGE_TEXTURES_PER_STAGE]; VulkanBuffer *readOnlyComputeStorageBuffers[MAX_STORAGE_BUFFERS_PER_STAGE]; @@ -3870,6 +3873,7 @@ static bool VULKAN_INTERNAL_InitializeComputePipelineResourceLayout( VkResult vulkanResult; Uint32 i; + pipelineResourceLayout->numSamplers = createinfo->num_samplers; pipelineResourceLayout->numReadonlyStorageTextures = createinfo->num_readonly_storage_textures; pipelineResourceLayout->numReadonlyStorageBuffers = createinfo->num_readonly_storage_buffers; pipelineResourceLayout->numWriteonlyStorageTextures = createinfo->num_writeonly_storage_textures; @@ -3883,6 +3887,7 @@ static bool VULKAN_INTERNAL_InitializeComputePipelineResourceLayout( descriptorSetLayoutCreateInfo.flags = 0; descriptorSetLayoutCreateInfo.pBindings = NULL; descriptorSetLayoutCreateInfo.bindingCount = + createinfo->num_samplers + createinfo->num_readonly_storage_textures + createinfo->num_readonly_storage_buffers; @@ -3895,7 +3900,18 @@ static bool VULKAN_INTERNAL_InitializeComputePipelineResourceLayout( descriptorSetPool->descriptorInfos = SDL_malloc( descriptorSetPool->descriptorInfoCount * sizeof(VulkanDescriptorInfo)); - for (i = 0; i < createinfo->num_readonly_storage_textures; i += 1) { + for (i = 0; i < createinfo->num_samplers; i += 1) { + descriptorSetLayoutBindings[i].binding = i; + descriptorSetLayoutBindings[i].descriptorCount = 1; + descriptorSetLayoutBindings[i].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + descriptorSetLayoutBindings[i].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + descriptorSetLayoutBindings[i].pImmutableSamplers = NULL; + + descriptorSetPool->descriptorInfos[i].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + descriptorSetPool->descriptorInfos[i].stageFlag = VK_SHADER_STAGE_COMPUTE_BIT; + } + + for (i = createinfo->num_samplers; i < createinfo->num_samplers + createinfo->num_readonly_storage_textures; i += 1) { descriptorSetLayoutBindings[i].binding = i; descriptorSetLayoutBindings[i].descriptorCount = 1; descriptorSetLayoutBindings[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; @@ -3906,7 +3922,7 @@ static bool VULKAN_INTERNAL_InitializeComputePipelineResourceLayout( descriptorSetPool->descriptorInfos[i].stageFlag = VK_SHADER_STAGE_COMPUTE_BIT; } - for (i = createinfo->num_readonly_storage_textures; i < descriptorSetLayoutCreateInfo.bindingCount; i += 1) { + for (i = createinfo->num_samplers + createinfo->num_readonly_storage_textures; i < descriptorSetLayoutCreateInfo.bindingCount; i += 1) { descriptorSetLayoutBindings[i].binding = i; descriptorSetLayoutBindings[i].descriptorCount = 1; descriptorSetLayoutBindings[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; @@ -8368,6 +8384,31 @@ static void VULKAN_BindComputePipeline( vulkanCommandBuffer->needNewComputeUniformOffsets = true; } +static void VULKAN_BindComputeSamplers( + SDL_GPUCommandBuffer *commandBuffer, + Uint32 firstSlot, + const SDL_GPUTextureSamplerBinding *textureSamplerBindings, + Uint32 numBindings) +{ + VulkanCommandBuffer *vulkanCommandBuffer = (VulkanCommandBuffer *)commandBuffer; + + for (Uint32 i = 0; i < numBindings; i += 1) { + VulkanTextureContainer *textureContainer = (VulkanTextureContainer *)textureSamplerBindings[i].texture; + vulkanCommandBuffer->computeSamplerTextures[firstSlot + i] = textureContainer->activeTextureHandle->vulkanTexture; + vulkanCommandBuffer->computeSamplers[firstSlot + i] = (VulkanSampler *)textureSamplerBindings[i].sampler; + + VULKAN_INTERNAL_TrackSampler( + vulkanCommandBuffer, + (VulkanSampler *)textureSamplerBindings[i].sampler); + + VULKAN_INTERNAL_TrackTexture( + vulkanCommandBuffer, + textureContainer->activeTextureHandle->vulkanTexture); + } + + vulkanCommandBuffer->needNewComputeReadOnlyDescriptorSet = true; +} + static void VULKAN_BindComputeStorageTextures( SDL_GPUCommandBuffer *commandBuffer, Uint32 firstSlot, @@ -8468,7 +8509,7 @@ static void VULKAN_INTERNAL_BindComputeDescriptorSets( VkWriteDescriptorSet *currentWriteDescriptorSet; DescriptorSetPool *descriptorSetPool; VkDescriptorBufferInfo bufferInfos[MAX_STORAGE_BUFFERS_PER_STAGE]; // 8 is max for both read and write - VkDescriptorImageInfo imageInfos[MAX_STORAGE_TEXTURES_PER_STAGE]; // 8 is max for both read and write + VkDescriptorImageInfo imageInfos[MAX_TEXTURE_SAMPLERS_PER_STAGE + MAX_STORAGE_TEXTURES_PER_STAGE]; Uint32 dynamicOffsets[MAX_UNIFORM_BUFFERS_PER_STAGE]; Uint32 bufferInfoCount = 0; Uint32 imageInfoCount = 0; @@ -8486,9 +8527,31 @@ static void VULKAN_INTERNAL_BindComputeDescriptorSets( writeDescriptorSets = SDL_stack_alloc( VkWriteDescriptorSet, - resourceLayout->numReadonlyStorageTextures + + resourceLayout->numSamplers + + resourceLayout->numReadonlyStorageTextures + resourceLayout->numReadonlyStorageBuffers); + for (i = 0; i < resourceLayout->numSamplers; i += 1) { + currentWriteDescriptorSet = &writeDescriptorSets[i]; + currentWriteDescriptorSet->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + currentWriteDescriptorSet->pNext = NULL; + currentWriteDescriptorSet->descriptorCount = 1; + currentWriteDescriptorSet->descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + currentWriteDescriptorSet->dstArrayElement = 0; + currentWriteDescriptorSet->dstBinding = i; + currentWriteDescriptorSet->dstSet = commandBuffer->computeReadOnlyDescriptorSet; + currentWriteDescriptorSet->pTexelBufferView = NULL; + currentWriteDescriptorSet->pBufferInfo = NULL; + + imageInfos[imageInfoCount].sampler = commandBuffer->computeSamplers[i]->sampler; + imageInfos[imageInfoCount].imageView = commandBuffer->computeSamplerTextures[i]->fullView; + imageInfos[imageInfoCount].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + + currentWriteDescriptorSet->pImageInfo = &imageInfos[imageInfoCount]; + + imageInfoCount += 1; + } + for (i = 0; i < resourceLayout->numReadonlyStorageTextures; i += 1) { currentWriteDescriptorSet = &writeDescriptorSets[i]; currentWriteDescriptorSet->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; @@ -8496,7 +8559,7 @@ static void VULKAN_INTERNAL_BindComputeDescriptorSets( currentWriteDescriptorSet->descriptorCount = 1; currentWriteDescriptorSet->descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; currentWriteDescriptorSet->dstArrayElement = 0; - currentWriteDescriptorSet->dstBinding = i; + currentWriteDescriptorSet->dstBinding = resourceLayout->numSamplers + i; currentWriteDescriptorSet->dstSet = commandBuffer->computeReadOnlyDescriptorSet; currentWriteDescriptorSet->pTexelBufferView = NULL; currentWriteDescriptorSet->pBufferInfo = NULL; @@ -8518,7 +8581,7 @@ static void VULKAN_INTERNAL_BindComputeDescriptorSets( currentWriteDescriptorSet->descriptorCount = 1; currentWriteDescriptorSet->descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; currentWriteDescriptorSet->dstArrayElement = 0; - currentWriteDescriptorSet->dstBinding = resourceLayout->numReadonlyStorageTextures + i; + currentWriteDescriptorSet->dstBinding = resourceLayout->numSamplers + resourceLayout->numReadonlyStorageTextures + i; currentWriteDescriptorSet->dstSet = commandBuffer->computeReadOnlyDescriptorSet; currentWriteDescriptorSet->pTexelBufferView = NULL; currentWriteDescriptorSet->pImageInfo = NULL; @@ -8534,7 +8597,7 @@ static void VULKAN_INTERNAL_BindComputeDescriptorSets( renderer->vkUpdateDescriptorSets( renderer->logicalDevice, - resourceLayout->numReadonlyStorageTextures + resourceLayout->numReadonlyStorageBuffers, + resourceLayout->numSamplers + resourceLayout->numReadonlyStorageTextures + resourceLayout->numReadonlyStorageBuffers, writeDescriptorSets, 0, NULL); @@ -8794,6 +8857,10 @@ static void VULKAN_EndComputePass( } } + // we don't need a barrier because sampler state is always the default if sampler bit is set + SDL_zeroa(vulkanCommandBuffer->computeSamplerTextures); + SDL_zeroa(vulkanCommandBuffer->computeSamplers); + vulkanCommandBuffer->currentComputePipeline = NULL; vulkanCommandBuffer->computeReadOnlyDescriptorSet = VK_NULL_HANDLE; @@ -9676,6 +9743,8 @@ static SDL_GPUCommandBuffer *VULKAN_AcquireCommandBuffer( SDL_zeroa(commandBuffer->writeOnlyComputeStorageTextureSubresources); commandBuffer->writeOnlyComputeStorageTextureSubresourceCount = 0; SDL_zeroa(commandBuffer->writeOnlyComputeStorageBuffers); + SDL_zeroa(commandBuffer->computeSamplerTextures); + SDL_zeroa(commandBuffer->computeSamplers); SDL_zeroa(commandBuffer->readOnlyComputeStorageTextures); SDL_zeroa(commandBuffer->readOnlyComputeStorageBuffers);