Add SDL_BindGPUComputeSamplers (#10778)

---------

Co-authored-by: Caleb Cornett <caleb.cornett@outlook.com>
This commit is contained in:
Evan Hemsley 2024-09-10 19:20:14 -07:00 committed by GitHub
parent 323b60abf1
commit 2b8a349b26
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 692 additions and 209 deletions

View file

@ -1491,6 +1491,7 @@ typedef struct SDL_GPUComputePipelineCreateInfo
const Uint8 *code; /**< A pointer to compute shader code. */
const char *entrypoint; /**< A pointer to a null-terminated UTF-8 string specifying the entry point function name for the shader. */
SDL_GPUShaderFormat format; /**< The format of the compute shader code. */
Uint32 num_samplers; /**< The number of samplers defined in the shader. */
Uint32 num_readonly_storage_textures; /**< The number of readonly storage textures defined in the shader. */
Uint32 num_readonly_storage_buffers; /**< The number of readonly storage buffers defined in the shader. */
Uint32 num_writeonly_storage_textures; /**< The number of writeonly storage textures defined in the shader. */
@ -1791,13 +1792,13 @@ extern SDL_DECLSPEC SDL_GPUDriver SDLCALL SDL_GetGPUDriver(SDL_GPUDevice *device
*
* For SPIR-V shaders, use the following resource sets:
*
* - 0: Read-only storage textures, followed by read-only storage buffers
* - 0: Sampled textures, followed by read-only storage textures, followed by read-only storage buffers
* - 1: Write-only storage textures, followed by write-only storage buffers
* - 2: Uniform buffers
*
* For DXBC Shader Model 5_0 shaders, use the following register order:
*
* - t registers: Read-only storage textures, followed by read-only storage
* - t registers: Sampled textures, followed by read-only storage textures, followed by read-only storage
* buffers
* - u registers: Write-only storage textures, followed by write-only storage
* buffers
@ -1805,7 +1806,7 @@ extern SDL_DECLSPEC SDL_GPUDriver SDLCALL SDL_GetGPUDriver(SDL_GPUDevice *device
*
* For DXIL shaders, use the following register order:
*
* - (t[n], space0): Read-only storage textures, followed by read-only storage
* - (t[n], space0): Sampled textures, followed by read-only storage textures, followed by read-only storage
* buffers
* - (u[n], space1): Write-only storage textures, followed by write-only
* storage buffers
@ -1815,7 +1816,7 @@ extern SDL_DECLSPEC SDL_GPUDriver SDLCALL SDL_GetGPUDriver(SDL_GPUDevice *device
*
* - [[buffer]]: Uniform buffers, followed by write-only storage buffers,
* followed by write-only storage buffers
* - [[texture]]: Read-only storage textures, followed by write-only storage
* - [[texture]]: Sampled textures, followed by read-only storage textures, followed by write-only storage
* textures
*
* \param device a GPU Context.
@ -2757,6 +2758,24 @@ extern SDL_DECLSPEC void SDLCALL SDL_BindGPUComputePipeline(
SDL_GPUComputePass *compute_pass,
SDL_GPUComputePipeline *compute_pipeline);
/**
* Binds texture-sampler pairs for use on the compute shader.
*
* The textures must have been created with SDL_GPU_TEXTUREUSAGE_SAMPLER.
*
* \param compute_pass a compute pass handle.
* \param first_slot the compute sampler slot to begin binding from.
* \param texture_sampler_bindings an array of texture-sampler binding structs.
* \param num_bindings the number of texture-sampler bindings to bind from the array.
*
* \since This function is available since SDL 3.0.0
*/
extern SDL_DECLSPEC void SDLCALL SDL_BindGPUComputeSamplers(
SDL_GPUComputePass *compute_pass,
Uint32 first_slot,
const SDL_GPUTextureSamplerBinding *texture_sampler_bindings,
Uint32 num_bindings);
/**
* Binds storage textures as readonly for use on the compute pipeline.
*

View file

@ -29,6 +29,7 @@ SDL3_0.0.0 {
SDL_BindAudioStream;
SDL_BindAudioStreams;
SDL_BindGPUComputePipeline;
SDL_BindGPUComputeSamplers;
SDL_BindGPUComputeStorageBuffers;
SDL_BindGPUComputeStorageTextures;
SDL_BindGPUFragmentSamplers;

View file

@ -54,6 +54,7 @@
#define SDL_BindAudioStream SDL_BindAudioStream_REAL
#define SDL_BindAudioStreams SDL_BindAudioStreams_REAL
#define SDL_BindGPUComputePipeline SDL_BindGPUComputePipeline_REAL
#define SDL_BindGPUComputeSamplers SDL_BindGPUComputeSamplers_REAL
#define SDL_BindGPUComputeStorageBuffers SDL_BindGPUComputeStorageBuffers_REAL
#define SDL_BindGPUComputeStorageTextures SDL_BindGPUComputeStorageTextures_REAL
#define SDL_BindGPUFragmentSamplers SDL_BindGPUFragmentSamplers_REAL

View file

@ -74,6 +74,7 @@ SDL_DYNAPI_PROC(SDL_GPURenderPass*,SDL_BeginGPURenderPass,(SDL_GPUCommandBuffer
SDL_DYNAPI_PROC(SDL_bool,SDL_BindAudioStream,(SDL_AudioDeviceID a, SDL_AudioStream *b),(a,b),return)
SDL_DYNAPI_PROC(SDL_bool,SDL_BindAudioStreams,(SDL_AudioDeviceID a, SDL_AudioStream **b, int c),(a,b,c),return)
SDL_DYNAPI_PROC(void,SDL_BindGPUComputePipeline,(SDL_GPUComputePass *a, SDL_GPUComputePipeline *b),(a,b),)
SDL_DYNAPI_PROC(void,SDL_BindGPUComputeSamplers,(SDL_GPUComputePass *a, Uint32 b, const SDL_GPUTextureSamplerBinding *c, Uint32 d),(a,b,c,d),)
SDL_DYNAPI_PROC(void,SDL_BindGPUComputeStorageBuffers,(SDL_GPUComputePass *a, Uint32 b, SDL_GPUBuffer *const *c, Uint32 d),(a,b,c,d),)
SDL_DYNAPI_PROC(void,SDL_BindGPUComputeStorageTextures,(SDL_GPUComputePass *a, Uint32 b, SDL_GPUTexture *const *c, Uint32 d),(a,b,c,d),)
SDL_DYNAPI_PROC(void,SDL_BindGPUFragmentSamplers,(SDL_GPURenderPass *a, Uint32 b, const SDL_GPUTextureSamplerBinding *c, Uint32 d),(a,b,c,d),)

View file

@ -1782,6 +1782,32 @@ void SDL_BindGPUComputePipeline(
commandBufferHeader->compute_pipeline_bound = true;
}
void SDL_BindGPUComputeSamplers(
SDL_GPUComputePass *compute_pass,
Uint32 first_slot,
const SDL_GPUTextureSamplerBinding *texture_sampler_bindings,
Uint32 num_bindings)
{
if (compute_pass == NULL) {
SDL_InvalidParamError("compute_pass");
return;
}
if (texture_sampler_bindings == NULL && num_bindings > 0) {
SDL_InvalidParamError("texture_sampler_bindings");
return;
}
if (COMPUTEPASS_DEVICE->debug_mode) {
CHECK_COMPUTEPASS
}
COMPUTEPASS_DEVICE->BindComputeSamplers(
COMPUTEPASS_COMMAND_BUFFER,
first_slot,
texture_sampler_bindings,
num_bindings);
}
void SDL_BindGPUComputeStorageTextures(
SDL_GPUComputePass *compute_pass,
Uint32 first_slot,

View file

@ -511,6 +511,12 @@ struct SDL_GPUDevice
SDL_GPUCommandBuffer *commandBuffer,
SDL_GPUComputePipeline *computePipeline);
void (*BindComputeSamplers)(
SDL_GPUCommandBuffer *commandBuffer,
Uint32 firstSlot,
const SDL_GPUTextureSamplerBinding *textureSamplerBindings,
Uint32 numBindings);
void (*BindComputeStorageTextures)(
SDL_GPUCommandBuffer *commandBuffer,
Uint32 firstSlot,
@ -740,6 +746,7 @@ struct SDL_GPUDevice
ASSIGN_DRIVER_FUNC(EndRenderPass, name) \
ASSIGN_DRIVER_FUNC(BeginComputePass, name) \
ASSIGN_DRIVER_FUNC(BindComputePipeline, name) \
ASSIGN_DRIVER_FUNC(BindComputeSamplers, name) \
ASSIGN_DRIVER_FUNC(BindComputeStorageTextures, name) \
ASSIGN_DRIVER_FUNC(BindComputeStorageBuffers, name) \
ASSIGN_DRIVER_FUNC(PushComputeUniformData, name) \

View file

@ -414,13 +414,13 @@ typedef struct D3D11TextureContainer
TextureCommonHeader header;
D3D11Texture *activeTexture;
bool canBeCycled;
Uint32 textureCapacity;
Uint32 textureCount;
D3D11Texture **textures;
char *debugName;
bool canBeCycled;
} D3D11TextureContainer;
typedef struct D3D11TextureSubresource
@ -523,6 +523,7 @@ typedef struct D3D11ComputePipeline
{
ID3D11ComputeShader *computeShader;
Uint32 numSamplers;
Uint32 numReadonlyStorageTextures;
Uint32 numWriteonlyStorageTextures;
Uint32 numReadonlyStorageBuffers;
@ -609,6 +610,11 @@ typedef struct D3D11UniformBuffer
Uint32 currentBlockSize;
} D3D11UniformBuffer;
typedef struct D3D11Sampler
{
ID3D11SamplerState *handle;
} D3D11Sampler;
typedef struct D3D11Renderer D3D11Renderer;
typedef struct D3D11CommandBuffer
@ -646,35 +652,40 @@ typedef struct D3D11CommandBuffer
bool needVertexBufferBind;
bool needVertexSamplerBind;
bool needVertexResourceBind;
bool needVertexStorageTextureBind;
bool needVertexStorageBufferBind;
bool needVertexUniformBufferBind;
bool needFragmentSamplerBind;
bool needFragmentResourceBind;
bool needFragmentStorageTextureBind;
bool needFragmentStorageBufferBind;
bool needFragmentUniformBufferBind;
bool needComputeUAVBind;
bool needComputeSRVBind;
bool needComputeSamplerBind;
bool needComputeReadOnlyTextureBind;
bool needComputeReadOnlyBufferBind;
bool needComputeUniformBufferBind;
ID3D11Buffer *vertexBuffers[MAX_BUFFER_BINDINGS];
Uint32 vertexBufferOffsets[MAX_BUFFER_BINDINGS];
Uint32 vertexBufferCount;
ID3D11SamplerState *vertexSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE];
ID3D11ShaderResourceView *vertexShaderResourceViews[MAX_TEXTURE_SAMPLERS_PER_STAGE +
MAX_STORAGE_BUFFERS_PER_STAGE +
MAX_STORAGE_TEXTURES_PER_STAGE];
D3D11Texture *vertexSamplerTextures[MAX_TEXTURE_SAMPLERS_PER_STAGE];
D3D11Sampler *vertexSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE];
D3D11Texture *vertexStorageTextures[MAX_STORAGE_TEXTURES_PER_STAGE];
D3D11Buffer *vertexStorageBuffers[MAX_STORAGE_BUFFERS_PER_STAGE];
ID3D11SamplerState *fragmentSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE];
ID3D11ShaderResourceView *fragmentShaderResourceViews[MAX_TEXTURE_SAMPLERS_PER_STAGE +
MAX_STORAGE_BUFFERS_PER_STAGE +
MAX_STORAGE_TEXTURES_PER_STAGE];
D3D11Texture *fragmentSamplerTextures[MAX_TEXTURE_SAMPLERS_PER_STAGE];
D3D11Sampler *fragmentSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE];
D3D11Texture *fragmentStorageTextures[MAX_STORAGE_TEXTURES_PER_STAGE];
D3D11Buffer *fragmentStorageBuffers[MAX_STORAGE_BUFFERS_PER_STAGE];
ID3D11ShaderResourceView *computeShaderResourceViews[MAX_STORAGE_TEXTURES_PER_STAGE +
MAX_STORAGE_BUFFERS_PER_STAGE];
ID3D11UnorderedAccessView *computeUnorderedAccessViews[MAX_COMPUTE_WRITE_TEXTURES +
MAX_COMPUTE_WRITE_BUFFERS];
D3D11Texture *computeSamplerTextures[MAX_TEXTURE_SAMPLERS_PER_STAGE];
D3D11Sampler *computeSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE];
D3D11Texture *computeReadOnlyStorageTextures[MAX_STORAGE_TEXTURES_PER_STAGE];
D3D11Buffer *computeReadOnlyStorageBuffers[MAX_STORAGE_BUFFERS_PER_STAGE];
D3D11TextureSubresource *computeWriteOnlyStorageTextureSubresources[MAX_COMPUTE_WRITE_TEXTURES];
D3D11Buffer *computeWriteOnlyStorageBuffers[MAX_COMPUTE_WRITE_BUFFERS];
// Uniform buffers
D3D11UniformBuffer *vertexUniformBuffers[MAX_UNIFORM_BUFFERS_PER_STAGE];
@ -703,11 +714,6 @@ typedef struct D3D11CommandBuffer
Uint32 usedUniformBufferCapacity;
} D3D11CommandBuffer;
typedef struct D3D11Sampler
{
ID3D11SamplerState *handle;
} D3D11Sampler;
struct D3D11Renderer
{
ID3D11Device1 *device;
@ -772,20 +778,19 @@ struct D3D11Renderer
SDL_Mutex *acquireUniformBufferLock;
SDL_Mutex *fenceLock;
SDL_Mutex *windowLock;
};
// Null arrays for resetting shader resource slots
// Null arrays for resetting resource slots
ID3D11RenderTargetView *nullRTVs[MAX_COLOR_TARGET_BINDINGS];
ID3D11RenderTargetView *nullRTVs[MAX_COLOR_TARGET_BINDINGS];
ID3D11ShaderResourceView *nullSRVs[MAX_TEXTURE_SAMPLERS_PER_STAGE +
ID3D11ShaderResourceView *nullSRVs[MAX_TEXTURE_SAMPLERS_PER_STAGE * 2 +
MAX_STORAGE_TEXTURES_PER_STAGE +
MAX_STORAGE_BUFFERS_PER_STAGE];
ID3D11SamplerState *nullSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE];
ID3D11SamplerState *nullSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE * 2];
ID3D11UnorderedAccessView *nullUAVs[MAX_COMPUTE_WRITE_TEXTURES +
ID3D11UnorderedAccessView *nullUAVs[MAX_COMPUTE_WRITE_TEXTURES +
MAX_COMPUTE_WRITE_BUFFERS];
};
// Logging
@ -1527,6 +1532,7 @@ static SDL_GPUComputePipeline *D3D11_CreateComputePipeline(
pipeline = SDL_malloc(sizeof(D3D11ComputePipeline));
pipeline->computeShader = shader;
pipeline->numSamplers = createinfo->num_samplers;
pipeline->numReadonlyStorageTextures = createinfo->num_readonly_storage_textures;
pipeline->numWriteonlyStorageTextures = createinfo->num_writeonly_storage_textures;
pipeline->numReadonlyStorageBuffers = createinfo->num_readonly_storage_buffers;
@ -3235,21 +3241,31 @@ static SDL_GPUCommandBuffer *D3D11_AcquireCommandBuffer(
}
commandBuffer->needVertexSamplerBind = true;
commandBuffer->needVertexResourceBind = true;
commandBuffer->needVertexStorageTextureBind = true;
commandBuffer->needVertexStorageBufferBind = true;
commandBuffer->needVertexUniformBufferBind = true;
commandBuffer->needFragmentSamplerBind = true;
commandBuffer->needFragmentResourceBind = true;
commandBuffer->needFragmentStorageTextureBind = true;
commandBuffer->needFragmentStorageBufferBind = true;
commandBuffer->needFragmentUniformBufferBind = true;
commandBuffer->needComputeUAVBind = true;
commandBuffer->needComputeSRVBind = true;
commandBuffer->needComputeUniformBufferBind = true;
SDL_zeroa(commandBuffer->vertexSamplers);
SDL_zeroa(commandBuffer->vertexShaderResourceViews);
SDL_zeroa(commandBuffer->vertexSamplerTextures);
SDL_zeroa(commandBuffer->vertexStorageTextures);
SDL_zeroa(commandBuffer->vertexStorageBuffers);
SDL_zeroa(commandBuffer->fragmentSamplers);
SDL_zeroa(commandBuffer->fragmentShaderResourceViews);
SDL_zeroa(commandBuffer->computeShaderResourceViews);
SDL_zeroa(commandBuffer->computeUnorderedAccessViews);
SDL_zeroa(commandBuffer->fragmentSamplerTextures);
SDL_zeroa(commandBuffer->fragmentStorageTextures);
SDL_zeroa(commandBuffer->fragmentStorageBuffers);
SDL_zeroa(commandBuffer->computeSamplers);
SDL_zeroa(commandBuffer->computeSamplerTextures);
SDL_zeroa(commandBuffer->computeReadOnlyStorageTextures);
SDL_zeroa(commandBuffer->computeReadOnlyStorageBuffers);
SDL_zeroa(commandBuffer->computeWriteOnlyStorageTextureSubresources);
SDL_zeroa(commandBuffer->computeWriteOnlyStorageBuffers);
D3D11_INTERNAL_AcquireFence(commandBuffer);
commandBuffer->autoReleaseFence = 1;
@ -3489,11 +3505,6 @@ static void D3D11_BeginRenderPass(
SDL_GPUViewport viewport;
SDL_Rect scissorRect;
d3d11CommandBuffer->needVertexSamplerBind = true;
d3d11CommandBuffer->needVertexResourceBind = true;
d3d11CommandBuffer->needFragmentSamplerBind = true;
d3d11CommandBuffer->needFragmentResourceBind = true;
// Clear the bound targets for the current command buffer
for (Uint32 i = 0; i < MAX_COLOR_TARGET_BINDINGS; i += 1) {
d3d11CommandBuffer->colorTargetResolveTexture[i] = NULL;
@ -3717,8 +3728,14 @@ static void D3D11_BindGraphicsPipeline(
}
}
// Mark that uniform bindings are needed
// Mark that bindings are needed
d3d11CommandBuffer->needVertexSamplerBind = true;
d3d11CommandBuffer->needVertexStorageTextureBind = true;
d3d11CommandBuffer->needVertexStorageBufferBind = true;
d3d11CommandBuffer->needVertexUniformBufferBind = true;
d3d11CommandBuffer->needFragmentSamplerBind = true;
d3d11CommandBuffer->needFragmentStorageTextureBind = true;
d3d11CommandBuffer->needFragmentStorageBufferBind = true;
d3d11CommandBuffer->needFragmentUniformBufferBind = true;
}
@ -3776,14 +3793,13 @@ static void D3D11_BindVertexSamplers(
textureContainer->activeTexture);
d3d11CommandBuffer->vertexSamplers[firstSlot + i] =
((D3D11Sampler *)textureSamplerBindings[i].sampler)->handle;
(D3D11Sampler *)textureSamplerBindings[i].sampler;
d3d11CommandBuffer->vertexShaderResourceViews[firstSlot + i] =
textureContainer->activeTexture->shaderView;
d3d11CommandBuffer->vertexSamplerTextures[firstSlot + i] =
textureContainer->activeTexture;
}
d3d11CommandBuffer->needVertexSamplerBind = true;
d3d11CommandBuffer->needVertexResourceBind = true;
}
static void D3D11_BindVertexStorageTextures(
@ -3801,11 +3817,11 @@ static void D3D11_BindVertexStorageTextures(
d3d11CommandBuffer,
textureContainer->activeTexture);
d3d11CommandBuffer->vertexShaderResourceViews[firstSlot + i +
d3d11CommandBuffer->graphicsPipeline->vertexSamplerCount] = textureContainer->activeTexture->shaderView;
d3d11CommandBuffer->vertexStorageTextures[firstSlot + i] =
textureContainer->activeTexture;
}
d3d11CommandBuffer->needVertexResourceBind = true;
d3d11CommandBuffer->needVertexStorageTextureBind = true;
}
static void D3D11_BindVertexStorageBuffers(
@ -3825,12 +3841,11 @@ static void D3D11_BindVertexStorageBuffers(
d3d11CommandBuffer,
bufferContainer->activeBuffer);
d3d11CommandBuffer->vertexShaderResourceViews[firstSlot + i +
d3d11CommandBuffer->graphicsPipeline->vertexSamplerCount +
d3d11CommandBuffer->graphicsPipeline->vertexStorageTextureCount] = bufferContainer->activeBuffer->srv;
d3d11CommandBuffer->vertexStorageBuffers[firstSlot + i] =
bufferContainer->activeBuffer;
}
d3d11CommandBuffer->needVertexResourceBind = true;
d3d11CommandBuffer->needVertexStorageBufferBind = true;
}
static void D3D11_BindFragmentSamplers(
@ -3849,14 +3864,13 @@ static void D3D11_BindFragmentSamplers(
textureContainer->activeTexture);
d3d11CommandBuffer->fragmentSamplers[firstSlot + i] =
((D3D11Sampler *)textureSamplerBindings[i].sampler)->handle;
(D3D11Sampler *)textureSamplerBindings[i].sampler;
d3d11CommandBuffer->fragmentShaderResourceViews[firstSlot + i] =
textureContainer->activeTexture->shaderView;
d3d11CommandBuffer->fragmentSamplerTextures[firstSlot + i] =
(D3D11Texture *)textureContainer->activeTexture;
}
d3d11CommandBuffer->needFragmentSamplerBind = true;
d3d11CommandBuffer->needFragmentResourceBind = true;
}
static void D3D11_BindFragmentStorageTextures(
@ -3874,11 +3888,11 @@ static void D3D11_BindFragmentStorageTextures(
d3d11CommandBuffer,
textureContainer->activeTexture);
d3d11CommandBuffer->fragmentShaderResourceViews[firstSlot + i +
d3d11CommandBuffer->graphicsPipeline->fragmentSamplerCount] = textureContainer->activeTexture->shaderView;
d3d11CommandBuffer->fragmentStorageTextures[firstSlot + i] =
textureContainer->activeTexture;
}
d3d11CommandBuffer->needFragmentResourceBind = true;
d3d11CommandBuffer->needFragmentStorageTextureBind = true;
}
static void D3D11_BindFragmentStorageBuffers(
@ -3898,12 +3912,11 @@ static void D3D11_BindFragmentStorageBuffers(
d3d11CommandBuffer,
bufferContainer->activeBuffer);
d3d11CommandBuffer->fragmentShaderResourceViews[firstSlot + i +
d3d11CommandBuffer->graphicsPipeline->fragmentSamplerCount +
d3d11CommandBuffer->graphicsPipeline->fragmentStorageTextureCount] = bufferContainer->activeBuffer->srv;
d3d11CommandBuffer->fragmentStorageBuffers[firstSlot + i] =
bufferContainer->activeBuffer;
}
d3d11CommandBuffer->needFragmentResourceBind = true;
d3d11CommandBuffer->needFragmentStorageBufferBind = true;
}
static void D3D11_INTERNAL_BindGraphicsResources(
@ -3911,18 +3924,8 @@ static void D3D11_INTERNAL_BindGraphicsResources(
{
D3D11GraphicsPipeline *graphicsPipeline = commandBuffer->graphicsPipeline;
Uint32 vertexResourceCount =
graphicsPipeline->vertexSamplerCount +
graphicsPipeline->vertexStorageTextureCount +
graphicsPipeline->vertexStorageBufferCount;
Uint32 fragmentResourceCount =
graphicsPipeline->fragmentSamplerCount +
graphicsPipeline->fragmentStorageTextureCount +
graphicsPipeline->fragmentStorageBufferCount;
ID3D11Buffer *nullBuf = NULL;
Uint32 offsetInConstants, blockSizeInConstants, i;
Uint32 offsetInConstants, blockSizeInConstants;
if (commandBuffer->needVertexBufferBind) {
ID3D11DeviceContext_IASetVertexBuffers(
@ -3936,30 +3939,68 @@ static void D3D11_INTERNAL_BindGraphicsResources(
if (commandBuffer->needVertexSamplerBind) {
if (graphicsPipeline->vertexSamplerCount > 0) {
ID3D11SamplerState *samplerStates[MAX_TEXTURE_SAMPLERS_PER_STAGE];
ID3D11ShaderResourceView *srvs[MAX_TEXTURE_SAMPLERS_PER_STAGE];
for (Uint32 i = 0; i < graphicsPipeline->vertexSamplerCount; i += 1) {
samplerStates[i] = commandBuffer->vertexSamplers[i]->handle;
srvs[i] = commandBuffer->vertexSamplerTextures[i]->shaderView;
}
ID3D11DeviceContext_VSSetSamplers(
commandBuffer->context,
0,
graphicsPipeline->vertexSamplerCount,
commandBuffer->vertexSamplers);
samplerStates);
ID3D11DeviceContext_VSSetShaderResources(
commandBuffer->context,
0,
graphicsPipeline->vertexSamplerCount,
srvs);
}
commandBuffer->needVertexSamplerBind = false;
}
if (commandBuffer->needVertexResourceBind) {
if (vertexResourceCount > 0) {
ID3D11DeviceContext_VSSetShaderResources(
commandBuffer->context,
0,
vertexResourceCount,
commandBuffer->vertexShaderResourceViews);
if (commandBuffer->needVertexStorageTextureBind) {
if (graphicsPipeline->vertexStorageTextureCount > 0) {
ID3D11ShaderResourceView *srvs[MAX_STORAGE_TEXTURES_PER_STAGE];
for (Uint32 i = 0; i < graphicsPipeline->vertexStorageTextureCount; i += 1) {
srvs[i] = commandBuffer->vertexStorageTextures[i]->shaderView;
}
commandBuffer->needVertexResourceBind = false;
ID3D11DeviceContext_VSSetShaderResources(
commandBuffer->context,
graphicsPipeline->vertexSamplerCount,
graphicsPipeline->vertexStorageTextureCount,
srvs);
}
commandBuffer->needVertexStorageTextureBind = false;
}
if (commandBuffer->needVertexStorageBufferBind) {
if (graphicsPipeline->vertexStorageBufferCount > 0) {
ID3D11ShaderResourceView *srvs[MAX_STORAGE_BUFFERS_PER_STAGE];
for (Uint32 i = 0; i < graphicsPipeline->vertexStorageBufferCount; i += 1) {
srvs[i] = commandBuffer->vertexStorageBuffers[i]->srv;
}
ID3D11DeviceContext_VSSetShaderResources(
commandBuffer->context,
graphicsPipeline->vertexSamplerCount + graphicsPipeline->vertexStorageTextureCount,
graphicsPipeline->vertexStorageBufferCount,
srvs);
}
commandBuffer->needVertexStorageBufferBind = false;
}
if (commandBuffer->needVertexUniformBufferBind) {
for (i = 0; i < graphicsPipeline->vertexUniformBufferCount; i += 1) {
for (Uint32 i = 0; i < graphicsPipeline->vertexUniformBufferCount; i += 1) {
/* stupid workaround for god awful D3D11 drivers
* see: https://learn.microsoft.com/en-us/windows/win32/api/d3d11_1/nf-d3d11_1-id3d11devicecontext1-vssetconstantbuffers1#calling-vssetconstantbuffers1-with-command-list-emulation
*/
@ -3986,30 +4027,68 @@ static void D3D11_INTERNAL_BindGraphicsResources(
if (commandBuffer->needFragmentSamplerBind) {
if (graphicsPipeline->fragmentSamplerCount > 0) {
ID3D11SamplerState *samplerStates[MAX_TEXTURE_SAMPLERS_PER_STAGE];
ID3D11ShaderResourceView *srvs[MAX_TEXTURE_SAMPLERS_PER_STAGE];
for (Uint32 i = 0; i < graphicsPipeline->fragmentSamplerCount; i += 1) {
samplerStates[i] = commandBuffer->fragmentSamplers[i]->handle;
srvs[i] = commandBuffer->fragmentSamplerTextures[i]->shaderView;
}
ID3D11DeviceContext_PSSetSamplers(
commandBuffer->context,
0,
graphicsPipeline->fragmentSamplerCount,
commandBuffer->fragmentSamplers);
samplerStates);
ID3D11DeviceContext_PSSetShaderResources(
commandBuffer->context,
0,
graphicsPipeline->fragmentSamplerCount,
srvs);
}
commandBuffer->needFragmentSamplerBind = false;
}
if (commandBuffer->needFragmentResourceBind) {
if (fragmentResourceCount > 0) {
ID3D11DeviceContext_PSSetShaderResources(
commandBuffer->context,
0,
fragmentResourceCount,
commandBuffer->fragmentShaderResourceViews);
if (commandBuffer->needFragmentStorageTextureBind) {
if (graphicsPipeline->fragmentStorageTextureCount > 0) {
ID3D11ShaderResourceView *srvs[MAX_STORAGE_TEXTURES_PER_STAGE];
for (Uint32 i = 0; i < graphicsPipeline->fragmentStorageTextureCount; i += 1) {
srvs[i] = commandBuffer->fragmentStorageTextures[i]->shaderView;
}
commandBuffer->needFragmentResourceBind = false;
ID3D11DeviceContext_PSSetShaderResources(
commandBuffer->context,
graphicsPipeline->fragmentSamplerCount,
graphicsPipeline->fragmentStorageTextureCount,
srvs);
}
commandBuffer->needFragmentStorageTextureBind = false;
}
if (commandBuffer->needFragmentStorageBufferBind) {
if (graphicsPipeline->fragmentStorageBufferCount > 0) {
ID3D11ShaderResourceView *srvs[MAX_STORAGE_BUFFERS_PER_STAGE];
for (Uint32 i = 0; i < graphicsPipeline->fragmentStorageBufferCount; i += 1) {
srvs[i] = commandBuffer->fragmentStorageBuffers[i]->srv;
}
ID3D11DeviceContext_PSSetShaderResources(
commandBuffer->context,
graphicsPipeline->fragmentSamplerCount + graphicsPipeline->fragmentStorageTextureCount,
graphicsPipeline->fragmentStorageBufferCount,
srvs);
}
commandBuffer->needFragmentStorageBufferBind = false;
}
if (commandBuffer->needFragmentUniformBufferBind) {
for (i = 0; i < graphicsPipeline->fragmentUniformBufferCount; i += 1) {
for (Uint32 i = 0; i < graphicsPipeline->fragmentUniformBufferCount; i += 1) {
/* stupid workaround for god awful D3D11 drivers
* see: https://learn.microsoft.com/en-us/windows/win32/api/d3d11_1/nf-d3d11_1-id3d11devicecontext1-pssetconstantbuffers1#calling-pssetconstantbuffers1-with-command-list-emulation
*/
@ -4127,6 +4206,7 @@ static void D3D11_EndRenderPass(
SDL_GPUCommandBuffer *commandBuffer)
{
D3D11CommandBuffer *d3d11CommandBuffer = (D3D11CommandBuffer *)commandBuffer;
D3D11Renderer *renderer = d3d11CommandBuffer->renderer;
Uint32 i;
// Set render target slots to NULL to avoid NULL set behavior
@ -4134,7 +4214,7 @@ static void D3D11_EndRenderPass(
ID3D11DeviceContext_OMSetRenderTargets(
d3d11CommandBuffer->context,
MAX_COLOR_TARGET_BINDINGS,
nullRTVs,
renderer->nullRTVs,
NULL);
// Resolve MSAA color render targets
@ -4150,16 +4230,44 @@ static void D3D11_EndRenderPass(
}
}
ID3D11DeviceContext_VSSetSamplers(
d3d11CommandBuffer->context,
0,
MAX_TEXTURE_SAMPLERS_PER_STAGE,
renderer->nullSamplers);
ID3D11DeviceContext_VSSetShaderResources(
d3d11CommandBuffer->context,
0,
MAX_TEXTURE_SAMPLERS_PER_STAGE * 2 + MAX_STORAGE_TEXTURES_PER_STAGE + MAX_STORAGE_BUFFERS_PER_STAGE,
renderer->nullSRVs);
ID3D11DeviceContext_PSSetSamplers(
d3d11CommandBuffer->context,
0,
MAX_TEXTURE_SAMPLERS_PER_STAGE,
renderer->nullSamplers);
ID3D11DeviceContext_PSSetShaderResources(
d3d11CommandBuffer->context,
0,
MAX_TEXTURE_SAMPLERS_PER_STAGE * 2 + MAX_STORAGE_TEXTURES_PER_STAGE + MAX_STORAGE_BUFFERS_PER_STAGE,
renderer->nullSRVs);
// Reset bind state
SDL_zeroa(d3d11CommandBuffer->vertexBuffers);
SDL_zeroa(d3d11CommandBuffer->vertexBufferOffsets);
d3d11CommandBuffer->vertexBufferCount = 0;
SDL_zeroa(d3d11CommandBuffer->vertexSamplers);
SDL_zeroa(d3d11CommandBuffer->vertexShaderResourceViews);
SDL_zeroa(d3d11CommandBuffer->vertexSamplerTextures);
SDL_zeroa(d3d11CommandBuffer->vertexStorageTextures);
SDL_zeroa(d3d11CommandBuffer->vertexStorageBuffers);
SDL_zeroa(d3d11CommandBuffer->fragmentSamplers);
SDL_zeroa(d3d11CommandBuffer->fragmentShaderResourceViews);
SDL_zeroa(d3d11CommandBuffer->fragmentSamplerTextures);
SDL_zeroa(d3d11CommandBuffer->fragmentStorageTextures);
SDL_zeroa(d3d11CommandBuffer->fragmentStorageBuffers);
}
static void D3D11_PushVertexUniformData(
@ -4229,13 +4337,10 @@ static void D3D11_BeginComputePass(
D3D11TextureSubresource *textureSubresource;
D3D11BufferContainer *bufferContainer;
D3D11Buffer *buffer;
Uint32 i;
ID3D11UnorderedAccessView *uavs[MAX_COMPUTE_WRITE_TEXTURES + MAX_COMPUTE_WRITE_BUFFERS];
for (i = 0; i < numStorageTextureBindings; i += 1) {
for (Uint32 i = 0; i < numStorageTextureBindings; i += 1) {
textureContainer = (D3D11TextureContainer *)storageTextureBindings[i].texture;
if (!(textureContainer->header.info.usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE)) {
SDL_LogError(SDL_LOG_CATEGORY_GPU, "Attempted to bind read-only texture as compute write texture");
}
textureSubresource = D3D11_INTERNAL_PrepareTextureSubresourceForWrite(
d3d11CommandBuffer->renderer,
@ -4248,10 +4353,10 @@ static void D3D11_BeginComputePass(
d3d11CommandBuffer,
textureSubresource->parent);
d3d11CommandBuffer->computeUnorderedAccessViews[i] = textureSubresource->uav;
d3d11CommandBuffer->computeWriteOnlyStorageTextureSubresources[i] = textureSubresource;
}
for (i = 0; i < numStorageBufferBindings; i += 1) {
for (Uint32 i = 0; i < numStorageBufferBindings; i += 1) {
bufferContainer = (D3D11BufferContainer *)storageBufferBindings[i].buffer;
buffer = D3D11_INTERNAL_PrepareBufferForWrite(
@ -4263,10 +4368,23 @@ static void D3D11_BeginComputePass(
d3d11CommandBuffer,
buffer);
d3d11CommandBuffer->computeUnorderedAccessViews[i + numStorageTextureBindings] = buffer->uav;
d3d11CommandBuffer->computeWriteOnlyStorageBuffers[i] = buffer;
}
d3d11CommandBuffer->needComputeUAVBind = true;
for (Uint32 i = 0; i < numStorageTextureBindings; i += 1) {
uavs[i] = d3d11CommandBuffer->computeWriteOnlyStorageTextureSubresources[i]->uav;
}
for (Uint32 i = 0; i < numStorageBufferBindings; i += 1) {
uavs[numStorageTextureBindings + i] = d3d11CommandBuffer->computeWriteOnlyStorageBuffers[i]->uav;
}
ID3D11DeviceContext_CSSetUnorderedAccessViews(
d3d11CommandBuffer->context,
0,
numStorageTextureBindings + numStorageBufferBindings,
uavs,
NULL);
}
static void D3D11_BindComputePipeline(
@ -4292,9 +4410,37 @@ static void D3D11_BindComputePipeline(
}
}
d3d11CommandBuffer->needComputeSamplerBind = true;
d3d11CommandBuffer->needComputeReadOnlyTextureBind = true;
d3d11CommandBuffer->needComputeReadOnlyBufferBind = true;
d3d11CommandBuffer->needComputeUniformBufferBind = true;
}
static void D3D11_BindComputeSamplers(
SDL_GPUCommandBuffer *commandBuffer,
Uint32 firstSlot,
const SDL_GPUTextureSamplerBinding *textureSamplerBindings,
Uint32 numBindings)
{
D3D11CommandBuffer *d3d11CommandBuffer = (D3D11CommandBuffer *)commandBuffer;
for (Uint32 i = 0; i < numBindings; i += 1) {
D3D11TextureContainer *textureContainer = (D3D11TextureContainer *)textureSamplerBindings[i].texture;
D3D11_INTERNAL_TrackTexture(
d3d11CommandBuffer,
textureContainer->activeTexture);
d3d11CommandBuffer->computeSamplers[firstSlot + i] =
(D3D11Sampler *)textureSamplerBindings[i].sampler;
d3d11CommandBuffer->computeSamplerTextures[firstSlot + i] =
textureContainer->activeTexture;
}
d3d11CommandBuffer->needComputeSamplerBind = true;
}
static void D3D11_BindComputeStorageTextures(
SDL_GPUCommandBuffer *commandBuffer,
Uint32 firstSlot,
@ -4310,11 +4456,11 @@ static void D3D11_BindComputeStorageTextures(
d3d11CommandBuffer,
textureContainer->activeTexture);
d3d11CommandBuffer->computeShaderResourceViews[firstSlot + i] =
textureContainer->activeTexture->shaderView;
d3d11CommandBuffer->computeReadOnlyStorageTextures[firstSlot + i] =
textureContainer->activeTexture;
}
d3d11CommandBuffer->needComputeSRVBind = true;
d3d11CommandBuffer->needComputeReadOnlyTextureBind = true;
}
static void D3D11_BindComputeStorageBuffers(
@ -4334,11 +4480,11 @@ static void D3D11_BindComputeStorageBuffers(
d3d11CommandBuffer,
bufferContainer->activeBuffer);
d3d11CommandBuffer->computeShaderResourceViews[firstSlot + i +
d3d11CommandBuffer->computePipeline->numReadonlyStorageTextures] = bufferContainer->activeBuffer->srv;
d3d11CommandBuffer->computeReadOnlyStorageBuffers[firstSlot + i] =
bufferContainer->activeBuffer;
}
d3d11CommandBuffer->needComputeSRVBind = true;
d3d11CommandBuffer->needComputeReadOnlyBufferBind = true;
}
static void D3D11_PushComputeUniformData(
@ -4360,40 +4506,73 @@ static void D3D11_INTERNAL_BindComputeResources(
{
D3D11ComputePipeline *computePipeline = commandBuffer->computePipeline;
Uint32 readOnlyResourceCount =
computePipeline->numReadonlyStorageTextures +
computePipeline->numReadonlyStorageBuffers;
Uint32 writeOnlyResourceCount =
computePipeline->numWriteonlyStorageTextures +
computePipeline->numWriteonlyStorageBuffers;
ID3D11Buffer *nullBuf = NULL;
Uint32 offsetInConstants, blockSizeInConstants, i;
Uint32 offsetInConstants, blockSizeInConstants;
if (commandBuffer->needComputeUAVBind) {
ID3D11DeviceContext_CSSetUnorderedAccessViews(
commandBuffer->context,
0,
writeOnlyResourceCount,
commandBuffer->computeUnorderedAccessViews,
NULL);
if (commandBuffer->needComputeSamplerBind) {
if (computePipeline->numSamplers > 0) {
ID3D11SamplerState *samplers[MAX_TEXTURE_SAMPLERS_PER_STAGE];
ID3D11ShaderResourceView *srvs[MAX_TEXTURE_SAMPLERS_PER_STAGE];
commandBuffer->needComputeUAVBind = false;
for (Uint32 i = 0; i < computePipeline->numSamplers; i += 1) {
samplers[i] = commandBuffer->computeSamplers[i]->handle;
srvs[i] = commandBuffer->computeSamplerTextures[i]->shaderView;
}
if (commandBuffer->needComputeSRVBind) {
ID3D11DeviceContext_CSSetSamplers(
commandBuffer->context,
0,
computePipeline->numSamplers,
samplers);
ID3D11DeviceContext_CSSetShaderResources(
commandBuffer->context,
0,
readOnlyResourceCount,
commandBuffer->computeShaderResourceViews);
computePipeline->numSamplers,
srvs);
}
commandBuffer->needComputeSRVBind = false;
commandBuffer->needComputeSamplerBind = false;
}
if (commandBuffer->needComputeReadOnlyTextureBind) {
if (computePipeline->numReadonlyStorageTextures > 0) {
ID3D11ShaderResourceView *srvs[MAX_STORAGE_TEXTURES_PER_STAGE];
for (Uint32 i = 0; i < computePipeline->numReadonlyStorageTextures; i += 1) {
srvs[i] = commandBuffer->computeReadOnlyStorageTextures[i]->shaderView;
}
ID3D11DeviceContext_CSSetShaderResources(
commandBuffer->context,
computePipeline->numSamplers,
computePipeline->numReadonlyStorageTextures,
srvs);
}
commandBuffer->needComputeReadOnlyTextureBind = false;
}
if (commandBuffer->needComputeReadOnlyBufferBind) {
if (computePipeline->numReadonlyStorageBuffers > 0) {
ID3D11ShaderResourceView *srvs[MAX_STORAGE_TEXTURES_PER_STAGE];
for (Uint32 i = 0; i < computePipeline->numReadonlyStorageBuffers; i += 1) {
srvs[i] = commandBuffer->computeReadOnlyStorageBuffers[i]->srv;
}
ID3D11DeviceContext_CSSetShaderResources(
commandBuffer->context,
computePipeline->numSamplers + computePipeline->numReadonlyStorageTextures,
computePipeline->numReadonlyStorageBuffers,
srvs);
}
commandBuffer->needComputeReadOnlyBufferBind = false;
}
if (commandBuffer->needComputeUniformBufferBind) {
for (i = 0; i < computePipeline->numUniformBuffers; i += 1) {
for (Uint32 i = 0; i < computePipeline->numUniformBuffers; i += 1) {
/* stupid workaround for god awful D3D11 drivers
* see: https://learn.microsoft.com/en-us/windows/win32/api/d3d11_1/nf-d3d11_1-id3d11devicecontext1-vssetconstantbuffers1#calling-vssetconstantbuffers1-with-command-list-emulation
*/
@ -4456,6 +4635,7 @@ static void D3D11_EndComputePass(
SDL_GPUCommandBuffer *commandBuffer)
{
D3D11CommandBuffer *d3d11CommandBuffer = (D3D11CommandBuffer *)commandBuffer;
D3D11Renderer *renderer = d3d11CommandBuffer->renderer;
// reset UAV slots to avoid NULL set behavior
// https://learn.microsoft.com/en-us/windows/win32/api/d3d11/nf-d3d11-id3d11devicecontext-cssetshaderresources
@ -4463,14 +4643,30 @@ static void D3D11_EndComputePass(
d3d11CommandBuffer->context,
0,
MAX_COMPUTE_WRITE_TEXTURES + MAX_COMPUTE_WRITE_BUFFERS,
nullUAVs,
renderer->nullUAVs,
NULL);
ID3D11DeviceContext_CSSetSamplers(
d3d11CommandBuffer->context,
0,
MAX_TEXTURE_SAMPLERS_PER_STAGE,
renderer->nullSamplers);
ID3D11DeviceContext_CSSetShaderResources(
d3d11CommandBuffer->context,
0,
MAX_TEXTURE_SAMPLERS_PER_STAGE + MAX_STORAGE_TEXTURES_PER_STAGE + MAX_STORAGE_BUFFERS_PER_STAGE,
renderer->nullSRVs);
d3d11CommandBuffer->computePipeline = NULL;
// Reset bind state
SDL_zeroa(d3d11CommandBuffer->computeUnorderedAccessViews);
SDL_zeroa(d3d11CommandBuffer->computeShaderResourceViews);
SDL_zeroa(d3d11CommandBuffer->computeSamplers);
SDL_zeroa(d3d11CommandBuffer->computeSamplerTextures);
SDL_zeroa(d3d11CommandBuffer->computeReadOnlyStorageTextures);
SDL_zeroa(d3d11CommandBuffer->computeReadOnlyStorageBuffers);
SDL_zeroa(d3d11CommandBuffer->computeWriteOnlyStorageTextureSubresources);
SDL_zeroa(d3d11CommandBuffer->computeWriteOnlyStorageBuffers);
}
// Fence Cleanup
@ -6185,10 +6381,10 @@ tryCreateDevice:
// Initialize null states
SDL_zeroa(nullRTVs);
SDL_zeroa(nullSRVs);
SDL_zeroa(nullSamplers);
SDL_zeroa(nullUAVs);
SDL_zeroa(renderer->nullRTVs);
SDL_zeroa(renderer->nullSRVs);
SDL_zeroa(renderer->nullSamplers);
SDL_zeroa(renderer->nullUAVs);
// Initialize built-in pipelines
D3D11_INTERNAL_InitBlitPipelines(renderer);

View file

@ -694,6 +694,7 @@ struct D3D12CommandBuffer
bool needFragmentStorageBufferBind;
bool needFragmentUniformBufferBind[MAX_UNIFORM_BUFFERS_PER_STAGE];
bool needComputeSamplerBind;
bool needComputeReadOnlyStorageTextureBind;
bool needComputeReadOnlyStorageBufferBind;
bool needComputeUniformBufferBind[MAX_UNIFORM_BUFFERS_PER_STAGE];
@ -714,6 +715,8 @@ struct D3D12CommandBuffer
D3D12Buffer *fragmentStorageBuffers[MAX_STORAGE_BUFFERS_PER_STAGE];
D3D12UniformBuffer *fragmentUniformBuffers[MAX_UNIFORM_BUFFERS_PER_STAGE];
D3D12Texture *computeSamplerTextures[MAX_TEXTURE_SAMPLERS_PER_STAGE];
D3D12Sampler *computeSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE];
D3D12Texture *computeReadOnlyStorageTextures[MAX_STORAGE_TEXTURES_PER_STAGE];
D3D12Buffer *computeReadOnlyStorageBuffers[MAX_STORAGE_BUFFERS_PER_STAGE];
D3D12TextureSubresource *computeWriteOnlyStorageTextureSubresources[MAX_COMPUTE_WRITE_TEXTURES];
@ -805,6 +808,8 @@ typedef struct D3D12ComputeRootSignature
{
ID3D12RootSignature *handle;
Sint32 samplerRootIndex;
Sint32 samplerTextureRootIndex;
Sint32 readOnlyStorageTextureRootIndex;
Sint32 readOnlyStorageBufferRootIndex;
Sint32 writeOnlyStorageTextureRootIndex;
@ -817,10 +822,11 @@ struct D3D12ComputePipeline
ID3D12PipelineState *pipelineState;
D3D12ComputeRootSignature *rootSignature;
Uint32 num_readonly_storage_textures;
Uint32 num_readonly_storage_buffers;
Uint32 num_writeonly_storage_textures;
Uint32 num_writeonly_storage_buffers;
Uint32 numSamplers;
Uint32 numReadOnlyStorageTextures;
Uint32 numReadOnlyStorageBuffers;
Uint32 numWriteOnlyStorageTextures;
Uint32 numWriteOnlyStorageBuffers;
Uint32 numUniformBuffers;
SDL_AtomicInt referenceCount;
@ -2145,6 +2151,8 @@ static D3D12ComputeRootSignature *D3D12_INTERNAL_CreateComputeRootSignature(
SDL_zeroa(descriptorRanges);
SDL_zero(rootParameter);
d3d12ComputeRootSignature->samplerRootIndex = -1;
d3d12ComputeRootSignature->samplerTextureRootIndex = -1;
d3d12ComputeRootSignature->readOnlyStorageTextureRootIndex = -1;
d3d12ComputeRootSignature->readOnlyStorageBufferRootIndex = -1;
d3d12ComputeRootSignature->writeOnlyStorageTextureRootIndex = -1;
@ -2154,10 +2162,44 @@ static D3D12ComputeRootSignature *D3D12_INTERNAL_CreateComputeRootSignature(
d3d12ComputeRootSignature->uniformBufferRootIndex[i] = -1;
}
if (createInfo->num_samplers) {
descriptorRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER;
descriptorRange.NumDescriptors = createInfo->num_samplers;
descriptorRange.BaseShaderRegister = 0;
descriptorRange.RegisterSpace = 0;
descriptorRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
descriptorRanges[rangeCount] = descriptorRange;
rootParameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
rootParameter.DescriptorTable.NumDescriptorRanges = 1;
rootParameter.DescriptorTable.pDescriptorRanges = &descriptorRanges[rangeCount];
rootParameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; // ALL is used for compute
rootParameters[parameterCount] = rootParameter;
d3d12ComputeRootSignature->samplerRootIndex = parameterCount;
rangeCount += 1;
parameterCount += 1;
descriptorRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
descriptorRange.NumDescriptors = createInfo->num_samplers;
descriptorRange.BaseShaderRegister = 0;
descriptorRange.RegisterSpace = 0;
descriptorRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
descriptorRanges[rangeCount] = descriptorRange;
rootParameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
rootParameter.DescriptorTable.NumDescriptorRanges = 1;
rootParameter.DescriptorTable.pDescriptorRanges = &descriptorRanges[rangeCount];
rootParameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; // ALL is used for compute
rootParameters[parameterCount] = rootParameter;
d3d12ComputeRootSignature->samplerTextureRootIndex = parameterCount;
rangeCount += 1;
parameterCount += 1;
}
if (createInfo->num_readonly_storage_textures) {
descriptorRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
descriptorRange.NumDescriptors = createInfo->num_readonly_storage_textures;
descriptorRange.BaseShaderRegister = 0;
descriptorRange.BaseShaderRegister = createInfo->num_samplers;
descriptorRange.RegisterSpace = 0;
descriptorRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
descriptorRanges[rangeCount] = descriptorRange;
@ -2175,7 +2217,7 @@ static D3D12ComputeRootSignature *D3D12_INTERNAL_CreateComputeRootSignature(
if (createInfo->num_readonly_storage_buffers) {
descriptorRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
descriptorRange.NumDescriptors = createInfo->num_readonly_storage_buffers;
descriptorRange.BaseShaderRegister = createInfo->num_readonly_storage_textures;
descriptorRange.BaseShaderRegister = createInfo->num_samplers + createInfo->num_readonly_storage_textures;
descriptorRange.RegisterSpace = 0;
descriptorRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
descriptorRanges[rangeCount] = descriptorRange;
@ -2346,10 +2388,11 @@ static SDL_GPUComputePipeline *D3D12_CreateComputePipeline(
computePipeline->pipelineState = pipelineState;
computePipeline->rootSignature = rootSignature;
computePipeline->num_readonly_storage_textures = createinfo->num_readonly_storage_textures;
computePipeline->num_readonly_storage_buffers = createinfo->num_readonly_storage_buffers;
computePipeline->num_writeonly_storage_textures = createinfo->num_writeonly_storage_textures;
computePipeline->num_writeonly_storage_buffers = createinfo->num_writeonly_storage_buffers;
computePipeline->numSamplers = createinfo->num_samplers;
computePipeline->numReadOnlyStorageTextures = createinfo->num_readonly_storage_textures;
computePipeline->numReadOnlyStorageBuffers = createinfo->num_readonly_storage_buffers;
computePipeline->numWriteOnlyStorageTextures = createinfo->num_writeonly_storage_textures;
computePipeline->numWriteOnlyStorageBuffers = createinfo->num_writeonly_storage_buffers;
computePipeline->numUniformBuffers = createinfo->num_uniform_buffers;
SDL_AtomicSet(&computePipeline->referenceCount, 0);
@ -4889,6 +4932,7 @@ static void D3D12_BindComputePipeline(
d3d12CommandBuffer->currentComputePipeline = pipeline;
d3d12CommandBuffer->needComputeSamplerBind = true;
d3d12CommandBuffer->needComputeReadOnlyStorageTextureBind = true;
d3d12CommandBuffer->needComputeReadOnlyStorageBufferBind = true;
@ -4906,8 +4950,8 @@ static void D3D12_BindComputePipeline(
D3D12_INTERNAL_TrackComputePipeline(d3d12CommandBuffer, pipeline);
// Bind write-only resources after setting root signature
if (pipeline->num_writeonly_storage_textures > 0) {
for (Uint32 i = 0; i < pipeline->num_writeonly_storage_textures; i += 1) {
if (pipeline->numWriteOnlyStorageTextures > 0) {
for (Uint32 i = 0; i < pipeline->numWriteOnlyStorageTextures; i += 1) {
cpuHandles[i] = d3d12CommandBuffer->computeWriteOnlyStorageTextureSubresources[i]->uavHandle.cpuHandle;
}
@ -4924,8 +4968,8 @@ static void D3D12_BindComputePipeline(
gpuDescriptorHandle);
}
if (pipeline->num_writeonly_storage_buffers > 0) {
for (Uint32 i = 0; i < pipeline->num_writeonly_storage_buffers; i += 1) {
if (pipeline->numWriteOnlyStorageBuffers > 0) {
for (Uint32 i = 0; i < pipeline->numWriteOnlyStorageBuffers; i += 1) {
cpuHandles[i] = d3d12CommandBuffer->computeWriteOnlyStorageBuffers[i]->uavDescriptor.cpuHandle;
}
@ -4943,6 +4987,32 @@ static void D3D12_BindComputePipeline(
}
}
static void D3D12_BindComputeSamplers(
SDL_GPUCommandBuffer *commandBuffer,
Uint32 firstSlot,
const SDL_GPUTextureSamplerBinding *textureSamplerBindings,
Uint32 numBindings)
{
D3D12CommandBuffer *d3d12CommandBuffer = (D3D12CommandBuffer *)commandBuffer;
for (Uint32 i = 0; i < numBindings; i += 1) {
D3D12TextureContainer *container = (D3D12TextureContainer *)textureSamplerBindings[i].texture;
D3D12_INTERNAL_TrackSampler(
d3d12CommandBuffer,
(D3D12Sampler *)textureSamplerBindings[i].sampler);
D3D12_INTERNAL_TrackTexture(
d3d12CommandBuffer,
container->activeTexture);
d3d12CommandBuffer->computeSamplerTextures[firstSlot + i] = container->activeTexture;
d3d12CommandBuffer->computeSamplers[firstSlot + i] = (D3D12Sampler *)textureSamplerBindings[i].sampler;
}
d3d12CommandBuffer->needComputeSamplerBind = true;
}
static void D3D12_BindComputeStorageTextures(
SDL_GPUCommandBuffer *commandBuffer,
Uint32 firstSlot,
@ -5033,9 +5103,46 @@ static void D3D12_INTERNAL_BindComputeResources(
D3D12_CPU_DESCRIPTOR_HANDLE cpuHandles[MAX_TEXTURE_SAMPLERS_PER_STAGE];
D3D12_GPU_DESCRIPTOR_HANDLE gpuDescriptorHandle;
if (commandBuffer->needComputeSamplerBind) {
if (computePipeline->numSamplers > 0) {
for (Uint32 i = 0; i < computePipeline->numSamplers; i += 1) {
cpuHandles[i] = commandBuffer->computeSamplers[i]->handle.cpuHandle;
}
D3D12_INTERNAL_WriteGPUDescriptors(
commandBuffer,
D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
cpuHandles,
computePipeline->numSamplers,
&gpuDescriptorHandle);
ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(
commandBuffer->graphicsCommandList,
computePipeline->rootSignature->samplerRootIndex,
gpuDescriptorHandle);
for (Uint32 i = 0; i < computePipeline->numSamplers; i += 1) {
cpuHandles[i] = commandBuffer->computeSamplerTextures[i]->srvHandle.cpuHandle;
}
D3D12_INTERNAL_WriteGPUDescriptors(
commandBuffer,
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
cpuHandles,
computePipeline->numSamplers,
&gpuDescriptorHandle);
ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(
commandBuffer->graphicsCommandList,
computePipeline->rootSignature->samplerTextureRootIndex,
gpuDescriptorHandle);
}
commandBuffer->needComputeSamplerBind = false;
}
if (commandBuffer->needComputeReadOnlyStorageTextureBind) {
if (computePipeline->num_readonly_storage_textures > 0) {
for (Uint32 i = 0; i < computePipeline->num_readonly_storage_textures; i += 1) {
if (computePipeline->numReadOnlyStorageTextures > 0) {
for (Uint32 i = 0; i < computePipeline->numReadOnlyStorageTextures; i += 1) {
cpuHandles[i] = commandBuffer->computeReadOnlyStorageTextures[i]->srvHandle.cpuHandle;
}
@ -5043,7 +5150,7 @@ static void D3D12_INTERNAL_BindComputeResources(
commandBuffer,
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
cpuHandles,
computePipeline->num_readonly_storage_textures,
computePipeline->numReadOnlyStorageTextures,
&gpuDescriptorHandle);
ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(
@ -5055,8 +5162,8 @@ static void D3D12_INTERNAL_BindComputeResources(
}
if (commandBuffer->needComputeReadOnlyStorageBufferBind) {
if (computePipeline->num_readonly_storage_buffers > 0) {
for (Uint32 i = 0; i < computePipeline->num_readonly_storage_buffers; i += 1) {
if (computePipeline->numReadOnlyStorageBuffers > 0) {
for (Uint32 i = 0; i < computePipeline->numReadOnlyStorageBuffers; i += 1) {
cpuHandles[i] = commandBuffer->computeReadOnlyStorageBuffers[i]->srvDescriptor.cpuHandle;
}
@ -5064,7 +5171,7 @@ static void D3D12_INTERNAL_BindComputeResources(
commandBuffer,
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
cpuHandles,
computePipeline->num_readonly_storage_buffers,
computePipeline->numReadOnlyStorageBuffers,
&gpuDescriptorHandle);
ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(
@ -5174,6 +5281,9 @@ static void D3D12_EndComputePass(
}
}
SDL_zeroa(d3d12CommandBuffer->computeSamplerTextures);
SDL_zeroa(d3d12CommandBuffer->computeSamplers);
d3d12CommandBuffer->currentComputePipeline = NULL;
}
@ -6751,6 +6861,8 @@ static SDL_GPUCommandBuffer *D3D12_AcquireCommandBuffer(
SDL_zeroa(commandBuffer->fragmentStorageBuffers);
SDL_zeroa(commandBuffer->fragmentUniformBuffers);
SDL_zeroa(commandBuffer->computeSamplerTextures);
SDL_zeroa(commandBuffer->computeSamplers);
SDL_zeroa(commandBuffer->computeReadOnlyStorageTextures);
SDL_zeroa(commandBuffer->computeReadOnlyStorageBuffers);
SDL_zeroa(commandBuffer->computeWriteOnlyStorageTextureSubresources);

View file

@ -403,7 +403,7 @@ typedef struct MetalShader
id<MTLLibrary> library;
id<MTLFunction> function;
Uint32 num_samplers;
Uint32 numSamplers;
Uint32 numUniformBuffers;
Uint32 numStorageBuffers;
Uint32 numStorageTextures;
@ -434,14 +434,15 @@ typedef struct MetalGraphicsPipeline
typedef struct MetalComputePipeline
{
id<MTLComputePipelineState> handle;
Uint32 num_readonly_storage_textures;
Uint32 num_writeonly_storage_textures;
Uint32 num_readonly_storage_buffers;
Uint32 num_writeonly_storage_buffers;
Uint32 numSamplers;
Uint32 numReadonlyStorageTextures;
Uint32 numWriteonlyStorageTextures;
Uint32 numReadonlyStorageBuffers;
Uint32 numWriteonlyStorageBuffers;
Uint32 numUniformBuffers;
Uint32 threadcount_x;
Uint32 threadcount_y;
Uint32 threadcount_z;
Uint32 threadcountX;
Uint32 threadcountY;
Uint32 threadcountZ;
} MetalComputePipeline;
typedef struct MetalBuffer
@ -511,6 +512,7 @@ typedef struct MetalCommandBuffer
bool needFragmentStorageBufferBind;
bool needFragmentUniformBind;
bool needComputeSamplerBind;
bool needComputeTextureBind;
bool needComputeBufferBind;
bool needComputeUniformBind;
@ -525,6 +527,8 @@ typedef struct MetalCommandBuffer
id<MTLTexture> fragmentStorageTextures[MAX_STORAGE_TEXTURES_PER_STAGE];
id<MTLBuffer> fragmentStorageBuffers[MAX_STORAGE_BUFFERS_PER_STAGE];
id<MTLTexture> computeSamplerTextures[MAX_TEXTURE_SAMPLERS_PER_STAGE];
id<MTLSamplerState> computeSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE];
id<MTLTexture> computeReadOnlyTextures[MAX_STORAGE_TEXTURES_PER_STAGE];
id<MTLBuffer> computeReadOnlyBuffers[MAX_STORAGE_BUFFERS_PER_STAGE];
id<MTLTexture> computeWriteOnlyTextures[MAX_COMPUTE_WRITE_TEXTURES];
@ -984,14 +988,15 @@ static SDL_GPUComputePipeline *METAL_CreateComputePipeline(
pipeline = SDL_calloc(1, sizeof(MetalComputePipeline));
pipeline->handle = handle;
pipeline->num_readonly_storage_textures = createinfo->num_readonly_storage_textures;
pipeline->num_writeonly_storage_textures = createinfo->num_writeonly_storage_textures;
pipeline->num_readonly_storage_buffers = createinfo->num_readonly_storage_buffers;
pipeline->num_writeonly_storage_buffers = createinfo->num_writeonly_storage_buffers;
pipeline->numSamplers = createinfo->num_samplers;
pipeline->numReadonlyStorageTextures = createinfo->num_readonly_storage_textures;
pipeline->numWriteonlyStorageTextures = createinfo->num_writeonly_storage_textures;
pipeline->numReadonlyStorageBuffers = createinfo->num_readonly_storage_buffers;
pipeline->numWriteonlyStorageBuffers = createinfo->num_writeonly_storage_buffers;
pipeline->numUniformBuffers = createinfo->num_uniform_buffers;
pipeline->threadcount_x = createinfo->threadcount_x;
pipeline->threadcount_y = createinfo->threadcount_y;
pipeline->threadcount_z = createinfo->threadcount_z;
pipeline->threadcountX = createinfo->threadcount_x;
pipeline->threadcountY = createinfo->threadcount_y;
pipeline->threadcountZ = createinfo->threadcount_z;
return (SDL_GPUComputePipeline *)pipeline;
}
@ -1123,11 +1128,11 @@ static SDL_GPUGraphicsPipeline *METAL_CreateGraphicsPipeline(
result->depth_stencil_state = depthStencilState;
result->rasterizerState = createinfo->rasterizer_state;
result->primitiveType = createinfo->primitive_type;
result->vertexSamplerCount = vertexShader->num_samplers;
result->vertexSamplerCount = vertexShader->numSamplers;
result->vertexUniformBufferCount = vertexShader->numUniformBuffers;
result->vertexStorageBufferCount = vertexShader->numStorageBuffers;
result->vertexStorageTextureCount = vertexShader->numStorageTextures;
result->fragmentSamplerCount = fragmentShader->num_samplers;
result->fragmentSamplerCount = fragmentShader->numSamplers;
result->fragmentUniformBufferCount = fragmentShader->numUniformBuffers;
result->fragmentStorageBufferCount = fragmentShader->numStorageBuffers;
result->fragmentStorageTextureCount = fragmentShader->numStorageTextures;
@ -1309,7 +1314,7 @@ static SDL_GPUShader *METAL_CreateShader(
result = SDL_calloc(1, sizeof(MetalShader));
result->library = libraryFunction.library;
result->function = libraryFunction.function;
result->num_samplers = createinfo->num_samplers;
result->numSamplers = createinfo->num_samplers;
result->numStorageBuffers = createinfo->num_storage_buffers;
result->numStorageTextures = createinfo->num_storage_textures;
result->numUniformBuffers = createinfo->num_uniform_buffers;
@ -2042,6 +2047,7 @@ static SDL_GPUCommandBuffer *METAL_AcquireCommandBuffer(
commandBuffer->needFragmentStorageTextureBind = true;
commandBuffer->needFragmentStorageBufferBind = true;
commandBuffer->needFragmentUniformBind = true;
commandBuffer->needComputeSamplerBind = true;
commandBuffer->needComputeBufferBind = true;
commandBuffer->needComputeTextureBind = true;
commandBuffer->needComputeUniformBind = true;
@ -2627,41 +2633,54 @@ static void METAL_INTERNAL_BindComputeResources(
MetalCommandBuffer *commandBuffer)
{
MetalComputePipeline *computePipeline = commandBuffer->compute_pipeline;
NSUInteger offsets[MAX_STORAGE_BUFFERS_PER_STAGE] = { 0 }; // 8 is the max for both read and write-only
NSUInteger offsets[MAX_STORAGE_BUFFERS_PER_STAGE] = { 0 };
if (commandBuffer->needComputeSamplerBind) {
// Bind sampler textures
if (computePipeline->numSamplers > 0) {
[commandBuffer->computeEncoder setTextures:commandBuffer->computeSamplerTextures
withRange:NSMakeRange(0, computePipeline->numSamplers)];
[commandBuffer->computeEncoder setSamplerStates:commandBuffer->computeSamplers
withRange:NSMakeRange(0, computePipeline->numSamplers)];
}
commandBuffer->needComputeSamplerBind = false;
}
if (commandBuffer->needComputeTextureBind) {
// Bind read-only textures
if (computePipeline->num_readonly_storage_textures > 0) {
if (computePipeline->numReadonlyStorageTextures > 0) {
[commandBuffer->computeEncoder setTextures:commandBuffer->computeReadOnlyTextures
withRange:NSMakeRange(0, computePipeline->num_readonly_storage_textures)];
withRange:NSMakeRange(
computePipeline->numSamplers,
computePipeline->numReadonlyStorageTextures)];
}
// Bind write-only textures
if (computePipeline->num_writeonly_storage_textures > 0) {
if (computePipeline->numWriteonlyStorageTextures > 0) {
[commandBuffer->computeEncoder setTextures:commandBuffer->computeWriteOnlyTextures
withRange:NSMakeRange(
computePipeline->num_readonly_storage_textures,
computePipeline->num_writeonly_storage_textures)];
computePipeline->numSamplers + computePipeline->numReadonlyStorageTextures,
computePipeline->numWriteonlyStorageTextures)];
}
commandBuffer->needComputeTextureBind = false;
}
if (commandBuffer->needComputeBufferBind) {
// Bind read-only buffers
if (computePipeline->num_readonly_storage_buffers > 0) {
if (computePipeline->numReadonlyStorageBuffers > 0) {
[commandBuffer->computeEncoder setBuffers:commandBuffer->computeReadOnlyBuffers
offsets:offsets
withRange:NSMakeRange(computePipeline->numUniformBuffers,
computePipeline->num_readonly_storage_buffers)];
computePipeline->numReadonlyStorageBuffers)];
}
// Bind write-only buffers
if (computePipeline->num_writeonly_storage_buffers > 0) {
if (computePipeline->numWriteonlyStorageBuffers > 0) {
[commandBuffer->computeEncoder setBuffers:commandBuffer->computeWriteOnlyBuffers
offsets:offsets
withRange:NSMakeRange(
computePipeline->numUniformBuffers +
computePipeline->num_readonly_storage_buffers,
computePipeline->num_writeonly_storage_buffers)];
computePipeline->numReadonlyStorageBuffers,
computePipeline->numWriteonlyStorageBuffers)];
}
commandBuffer->needComputeBufferBind = false;
}
@ -3020,6 +3039,32 @@ static void METAL_BindComputePipeline(
}
}
static void METAL_BindComputeSamplers(
SDL_GPUCommandBuffer *commandBuffer,
Uint32 firstSlot,
const SDL_GPUTextureSamplerBinding *textureSamplerBindings,
Uint32 numBindings)
{
MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
MetalTextureContainer *textureContainer;
for (Uint32 i = 0; i < numBindings; i += 1) {
textureContainer = (MetalTextureContainer *)textureSamplerBindings[i].texture;
METAL_INTERNAL_TrackTexture(
metalCommandBuffer,
textureContainer->activeTexture);
metalCommandBuffer->computeSamplers[firstSlot + i] =
((MetalSampler *)textureSamplerBindings[i].sampler)->handle;
metalCommandBuffer->computeSamplerTextures[firstSlot + i] =
textureContainer->activeTexture->handle;
}
metalCommandBuffer->needComputeSamplerBind = true;
}
static void METAL_BindComputeStorageTextures(
SDL_GPUCommandBuffer *commandBuffer,
Uint32 firstSlot,
@ -3092,9 +3137,9 @@ static void METAL_DispatchCompute(
MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
MTLSize threadgroups = MTLSizeMake(groupcountX, groupcountY, groupcountZ);
MTLSize threadsPerThreadgroup = MTLSizeMake(
metalCommandBuffer->compute_pipeline->threadcount_x,
metalCommandBuffer->compute_pipeline->threadcount_y,
metalCommandBuffer->compute_pipeline->threadcount_z);
metalCommandBuffer->compute_pipeline->threadcountX,
metalCommandBuffer->compute_pipeline->threadcountY,
metalCommandBuffer->compute_pipeline->threadcountZ);
METAL_INTERNAL_BindComputeResources(metalCommandBuffer);
@ -3113,9 +3158,9 @@ static void METAL_DispatchComputeIndirect(
MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
MetalBuffer *metalBuffer = ((MetalBufferContainer *)buffer)->activeBuffer;
MTLSize threadsPerThreadgroup = MTLSizeMake(
metalCommandBuffer->compute_pipeline->threadcount_x,
metalCommandBuffer->compute_pipeline->threadcount_y,
metalCommandBuffer->compute_pipeline->threadcount_z);
metalCommandBuffer->compute_pipeline->threadcountX,
metalCommandBuffer->compute_pipeline->threadcountY,
metalCommandBuffer->compute_pipeline->threadcountZ);
METAL_INTERNAL_BindComputeResources(metalCommandBuffer);
@ -3136,6 +3181,10 @@ static void METAL_EndComputePass(
[metalCommandBuffer->computeEncoder endEncoding];
metalCommandBuffer->computeEncoder = nil;
for (Uint32 i = 0; i < MAX_TEXTURE_SAMPLERS_PER_STAGE; i += 1) {
metalCommandBuffer->computeSamplers[i] = nil;
metalCommandBuffer->computeSamplerTextures[i] = nil;
}
for (Uint32 i = 0; i < MAX_COMPUTE_WRITE_TEXTURES; i += 1) {
metalCommandBuffer->computeWriteOnlyTextures[i] = nil;
}
@ -3223,6 +3272,8 @@ static void METAL_INTERNAL_CleanCommandBuffer(
commandBuffer->vertexTextures[i] = nil;
commandBuffer->fragmentSamplers[i] = nil;
commandBuffer->fragmentTextures[i] = nil;
commandBuffer->computeSamplers[i] = nil;
commandBuffer->computeSamplerTextures[i] = nil;
}
for (i = 0; i < MAX_STORAGE_TEXTURES_PER_STAGE; i += 1) {
commandBuffer->vertexStorageTextures[i] = nil;

View file

@ -891,12 +891,13 @@ typedef struct VulkanComputePipelineResourceLayout
/*
* Descriptor set layout is as follows:
* 0: read-only textures, then read-only buffers
* 0: samplers, then read-only textures, then read-only buffers
* 1: write-only textures, then write-only buffers
* 2: uniform buffers
*/
DescriptorSetPool descriptorSetPools[3];
Uint32 numSamplers;
Uint32 numReadonlyStorageTextures;
Uint32 numReadonlyStorageBuffers;
Uint32 numWriteonlyStorageTextures;
@ -1055,6 +1056,8 @@ typedef struct VulkanCommandBuffer
Uint32 writeOnlyComputeStorageTextureSubresourceCount;
VulkanBuffer *writeOnlyComputeStorageBuffers[MAX_COMPUTE_WRITE_BUFFERS];
VulkanTexture *computeSamplerTextures[MAX_TEXTURE_SAMPLERS_PER_STAGE];
VulkanSampler *computeSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE];
VulkanTexture *readOnlyComputeStorageTextures[MAX_STORAGE_TEXTURES_PER_STAGE];
VulkanBuffer *readOnlyComputeStorageBuffers[MAX_STORAGE_BUFFERS_PER_STAGE];
@ -3870,6 +3873,7 @@ static bool VULKAN_INTERNAL_InitializeComputePipelineResourceLayout(
VkResult vulkanResult;
Uint32 i;
pipelineResourceLayout->numSamplers = createinfo->num_samplers;
pipelineResourceLayout->numReadonlyStorageTextures = createinfo->num_readonly_storage_textures;
pipelineResourceLayout->numReadonlyStorageBuffers = createinfo->num_readonly_storage_buffers;
pipelineResourceLayout->numWriteonlyStorageTextures = createinfo->num_writeonly_storage_textures;
@ -3883,6 +3887,7 @@ static bool VULKAN_INTERNAL_InitializeComputePipelineResourceLayout(
descriptorSetLayoutCreateInfo.flags = 0;
descriptorSetLayoutCreateInfo.pBindings = NULL;
descriptorSetLayoutCreateInfo.bindingCount =
createinfo->num_samplers +
createinfo->num_readonly_storage_textures +
createinfo->num_readonly_storage_buffers;
@ -3895,7 +3900,18 @@ static bool VULKAN_INTERNAL_InitializeComputePipelineResourceLayout(
descriptorSetPool->descriptorInfos = SDL_malloc(
descriptorSetPool->descriptorInfoCount * sizeof(VulkanDescriptorInfo));
for (i = 0; i < createinfo->num_readonly_storage_textures; i += 1) {
for (i = 0; i < createinfo->num_samplers; i += 1) {
descriptorSetLayoutBindings[i].binding = i;
descriptorSetLayoutBindings[i].descriptorCount = 1;
descriptorSetLayoutBindings[i].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
descriptorSetLayoutBindings[i].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
descriptorSetLayoutBindings[i].pImmutableSamplers = NULL;
descriptorSetPool->descriptorInfos[i].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
descriptorSetPool->descriptorInfos[i].stageFlag = VK_SHADER_STAGE_COMPUTE_BIT;
}
for (i = createinfo->num_samplers; i < createinfo->num_samplers + createinfo->num_readonly_storage_textures; i += 1) {
descriptorSetLayoutBindings[i].binding = i;
descriptorSetLayoutBindings[i].descriptorCount = 1;
descriptorSetLayoutBindings[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
@ -3906,7 +3922,7 @@ static bool VULKAN_INTERNAL_InitializeComputePipelineResourceLayout(
descriptorSetPool->descriptorInfos[i].stageFlag = VK_SHADER_STAGE_COMPUTE_BIT;
}
for (i = createinfo->num_readonly_storage_textures; i < descriptorSetLayoutCreateInfo.bindingCount; i += 1) {
for (i = createinfo->num_samplers + createinfo->num_readonly_storage_textures; i < descriptorSetLayoutCreateInfo.bindingCount; i += 1) {
descriptorSetLayoutBindings[i].binding = i;
descriptorSetLayoutBindings[i].descriptorCount = 1;
descriptorSetLayoutBindings[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
@ -8368,6 +8384,31 @@ static void VULKAN_BindComputePipeline(
vulkanCommandBuffer->needNewComputeUniformOffsets = true;
}
static void VULKAN_BindComputeSamplers(
SDL_GPUCommandBuffer *commandBuffer,
Uint32 firstSlot,
const SDL_GPUTextureSamplerBinding *textureSamplerBindings,
Uint32 numBindings)
{
VulkanCommandBuffer *vulkanCommandBuffer = (VulkanCommandBuffer *)commandBuffer;
for (Uint32 i = 0; i < numBindings; i += 1) {
VulkanTextureContainer *textureContainer = (VulkanTextureContainer *)textureSamplerBindings[i].texture;
vulkanCommandBuffer->computeSamplerTextures[firstSlot + i] = textureContainer->activeTextureHandle->vulkanTexture;
vulkanCommandBuffer->computeSamplers[firstSlot + i] = (VulkanSampler *)textureSamplerBindings[i].sampler;
VULKAN_INTERNAL_TrackSampler(
vulkanCommandBuffer,
(VulkanSampler *)textureSamplerBindings[i].sampler);
VULKAN_INTERNAL_TrackTexture(
vulkanCommandBuffer,
textureContainer->activeTextureHandle->vulkanTexture);
}
vulkanCommandBuffer->needNewComputeReadOnlyDescriptorSet = true;
}
static void VULKAN_BindComputeStorageTextures(
SDL_GPUCommandBuffer *commandBuffer,
Uint32 firstSlot,
@ -8468,7 +8509,7 @@ static void VULKAN_INTERNAL_BindComputeDescriptorSets(
VkWriteDescriptorSet *currentWriteDescriptorSet;
DescriptorSetPool *descriptorSetPool;
VkDescriptorBufferInfo bufferInfos[MAX_STORAGE_BUFFERS_PER_STAGE]; // 8 is max for both read and write
VkDescriptorImageInfo imageInfos[MAX_STORAGE_TEXTURES_PER_STAGE]; // 8 is max for both read and write
VkDescriptorImageInfo imageInfos[MAX_TEXTURE_SAMPLERS_PER_STAGE + MAX_STORAGE_TEXTURES_PER_STAGE];
Uint32 dynamicOffsets[MAX_UNIFORM_BUFFERS_PER_STAGE];
Uint32 bufferInfoCount = 0;
Uint32 imageInfoCount = 0;
@ -8486,9 +8527,31 @@ static void VULKAN_INTERNAL_BindComputeDescriptorSets(
writeDescriptorSets = SDL_stack_alloc(
VkWriteDescriptorSet,
resourceLayout->numSamplers +
resourceLayout->numReadonlyStorageTextures +
resourceLayout->numReadonlyStorageBuffers);
for (i = 0; i < resourceLayout->numSamplers; i += 1) {
currentWriteDescriptorSet = &writeDescriptorSets[i];
currentWriteDescriptorSet->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
currentWriteDescriptorSet->pNext = NULL;
currentWriteDescriptorSet->descriptorCount = 1;
currentWriteDescriptorSet->descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
currentWriteDescriptorSet->dstArrayElement = 0;
currentWriteDescriptorSet->dstBinding = i;
currentWriteDescriptorSet->dstSet = commandBuffer->computeReadOnlyDescriptorSet;
currentWriteDescriptorSet->pTexelBufferView = NULL;
currentWriteDescriptorSet->pBufferInfo = NULL;
imageInfos[imageInfoCount].sampler = commandBuffer->computeSamplers[i]->sampler;
imageInfos[imageInfoCount].imageView = commandBuffer->computeSamplerTextures[i]->fullView;
imageInfos[imageInfoCount].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
currentWriteDescriptorSet->pImageInfo = &imageInfos[imageInfoCount];
imageInfoCount += 1;
}
for (i = 0; i < resourceLayout->numReadonlyStorageTextures; i += 1) {
currentWriteDescriptorSet = &writeDescriptorSets[i];
currentWriteDescriptorSet->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
@ -8496,7 +8559,7 @@ static void VULKAN_INTERNAL_BindComputeDescriptorSets(
currentWriteDescriptorSet->descriptorCount = 1;
currentWriteDescriptorSet->descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
currentWriteDescriptorSet->dstArrayElement = 0;
currentWriteDescriptorSet->dstBinding = i;
currentWriteDescriptorSet->dstBinding = resourceLayout->numSamplers + i;
currentWriteDescriptorSet->dstSet = commandBuffer->computeReadOnlyDescriptorSet;
currentWriteDescriptorSet->pTexelBufferView = NULL;
currentWriteDescriptorSet->pBufferInfo = NULL;
@ -8518,7 +8581,7 @@ static void VULKAN_INTERNAL_BindComputeDescriptorSets(
currentWriteDescriptorSet->descriptorCount = 1;
currentWriteDescriptorSet->descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
currentWriteDescriptorSet->dstArrayElement = 0;
currentWriteDescriptorSet->dstBinding = resourceLayout->numReadonlyStorageTextures + i;
currentWriteDescriptorSet->dstBinding = resourceLayout->numSamplers + resourceLayout->numReadonlyStorageTextures + i;
currentWriteDescriptorSet->dstSet = commandBuffer->computeReadOnlyDescriptorSet;
currentWriteDescriptorSet->pTexelBufferView = NULL;
currentWriteDescriptorSet->pImageInfo = NULL;
@ -8534,7 +8597,7 @@ static void VULKAN_INTERNAL_BindComputeDescriptorSets(
renderer->vkUpdateDescriptorSets(
renderer->logicalDevice,
resourceLayout->numReadonlyStorageTextures + resourceLayout->numReadonlyStorageBuffers,
resourceLayout->numSamplers + resourceLayout->numReadonlyStorageTextures + resourceLayout->numReadonlyStorageBuffers,
writeDescriptorSets,
0,
NULL);
@ -8794,6 +8857,10 @@ static void VULKAN_EndComputePass(
}
}
// we don't need a barrier because sampler state is always the default if sampler bit is set
SDL_zeroa(vulkanCommandBuffer->computeSamplerTextures);
SDL_zeroa(vulkanCommandBuffer->computeSamplers);
vulkanCommandBuffer->currentComputePipeline = NULL;
vulkanCommandBuffer->computeReadOnlyDescriptorSet = VK_NULL_HANDLE;
@ -9676,6 +9743,8 @@ static SDL_GPUCommandBuffer *VULKAN_AcquireCommandBuffer(
SDL_zeroa(commandBuffer->writeOnlyComputeStorageTextureSubresources);
commandBuffer->writeOnlyComputeStorageTextureSubresourceCount = 0;
SDL_zeroa(commandBuffer->writeOnlyComputeStorageBuffers);
SDL_zeroa(commandBuffer->computeSamplerTextures);
SDL_zeroa(commandBuffer->computeSamplers);
SDL_zeroa(commandBuffer->readOnlyComputeStorageTextures);
SDL_zeroa(commandBuffer->readOnlyComputeStorageBuffers);