Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h
Original file line number Diff line number Diff line change
Expand Up @@ -966,6 +966,17 @@ static inline VkFFTResult setConfigurationVkFFT(VkFFTApplication* app, VkFFTConf
compileOptions->release();
#endif

if (inputLaunchConfiguration.indirectDispatch != 0) {
app->configuration.indirectDispatch = inputLaunchConfiguration.indirectDispatch;
app->configuration.indirectBuffer = inputLaunchConfiguration.indirectBuffer;
if (app->configuration.indirectBufferOffset){
app->configuration.indirectBufferOffset = inputLaunchConfiguration.indirectBufferOffset;
}
else{
app->configuration.indirectBufferOffset = 0;
}
app->configuration.indirectHostPointer = inputLaunchConfiguration.indirectHostPointer;
}
resFFT = initializeBluesteinAutoPadding(app);
if (resFFT != VKFFT_SUCCESS) {
deleteVkFFT(app);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,10 @@ static inline void appendInputLayoutVkFFT(VkFFTSpecializationConstantsLayout* sc
if (sc->inputBufferBlockNum == 1) {
sc->tempLen = sprintf(sc->tempStr, "\
layout(std430, binding = %d) buffer DataIn{\n\
%s inputs[%" PRIu64 "];\n\
};\n\n", id, inputMemoryType->name, sc->inputBufferBlockSize / typeSize);
%s inputs[];\n\
};\n\n", id, inputMemoryType->name); // use runtime-sized arrays so that the same shader can be reused for different batch numbers


PfAppendLine(sc);
}
else {
Expand All @@ -70,9 +72,11 @@ static inline void appendOutputLayoutVkFFT(VkFFTSpecializationConstantsLayout* s
if (sc->inputBufferBlockNum == 1) {
sc->tempLen = sprintf(sc->tempStr, "\
layout(std430, binding = %d) buffer DataOut{\n\
%s outputs[%" PRIu64 "];\n\
};\n\n", id, outputMemoryType->name, sc->outputBufferBlockSize / typeSize);
PfAppendLine(sc);
%s outputs[];\n\
};\n\n", id, outputMemoryType->name); // use runtime-sized arrays so that the same shader can be reused for different batch numbers


PfAppendLine(sc);
}
else {
sc->tempLen = sprintf(sc->tempStr, "\
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,28 @@ static inline VkFFTResult VkFFT_DispatchPlan(VkFFTApplication* app, VkFFTAxis* a
if (axis->pushConstants.structSize > 0) {
vkCmdPushConstants(app->configuration.commandBuffer[0], axis->pipelineLayout, VK_SHADER_STAGE_COMPUTE_BIT, 0, (uint32_t)axis->pushConstants.structSize, axis->pushConstants.data);
}
vkCmdDispatch(app->configuration.commandBuffer[0], (uint32_t)dispatchSize[0], (uint32_t)dispatchSize[1], (uint32_t)dispatchSize[2]);
bool indirect_dispatch;
if (axis->specializationConstants.inverse){
indirect_dispatch = ((app->configuration.indirectDispatch & 0x2) && (app->configuration.indirectHostPointer != nullptr));
}
else{
indirect_dispatch = ((app->configuration.indirectDispatch & 0x1) && (app->configuration.indirectHostPointer != nullptr));
}

pfUINT indirect_offset;
if (indirect_dispatch){
unsigned int* host_indirect = (unsigned int*)((char*)app->configuration.indirectHostPointer + app->configuration.indirectBufferOffset + app->indirectDispatchID*16);
host_indirect[0] = (uint32_t)dispatchSize[0];
host_indirect[1] = (uint32_t)dispatchSize[1];
host_indirect[2] = (uint32_t)dispatchSize[2];
host_indirect[3] = axis->batchWorkGroup + 10000*axis->specializationConstants.inverse;
indirect_offset = app->configuration.indirectBufferOffset + 16*app->indirectDispatchID;
vkCmdDispatchIndirect(app->configuration.commandBuffer[0], app->configuration.indirectBuffer, indirect_offset);
app->indirectDispatchID++;
}
else {
vkCmdDispatch(app->configuration.commandBuffer[0], (uint32_t)dispatchSize[0], (uint32_t)dispatchSize[1], (uint32_t)dispatchSize[2]);
}
#elif(VKFFT_BACKEND==1)
void* args[10];
CUresult result = CUDA_SUCCESS;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -563,6 +563,7 @@ static inline VkFFTResult VkFFTConfigureDescriptorsR2CMultiUploadDecomposition(V
descriptorPoolSize.descriptorCount = (uint32_t)(axis->specializationConstants.numBuffersBound[0] + axis->specializationConstants.numBuffersBound[1]);
#endif
if ((axis_id == (app->configuration.FFTdim-1)) && (axis_upload_id == 0) && (app->configuration.performConvolution)) {
axis->specializationConstants.convolutionBindingID = (int)axis->numBindings;
axis->specializationConstants.numBuffersBound[axis->numBindings] = (int)axis->specializationConstants.kernelBlockNum;
#if(VKFFT_BACKEND==0)
descriptorPoolSize.descriptorCount += (uint32_t)axis->specializationConstants.kernelBlockNum;
Expand All @@ -571,6 +572,7 @@ static inline VkFFTResult VkFFTConfigureDescriptorsR2CMultiUploadDecomposition(V
}

if (app->configuration.useLUT == 1) {
axis->specializationConstants.LUTBindingID = (int)axis->numBindings;
axis->specializationConstants.numBuffersBound[axis->numBindings] = 1;
#if(VKFFT_BACKEND==0)
descriptorPoolSize.descriptorCount++;
Expand Down Expand Up @@ -774,6 +776,7 @@ static inline VkFFTResult VkFFTCheckUpdateBufferSet(VkFFTApplication* app, VkFFT
return VKFFT_SUCCESS;
}
static inline VkFFTResult VkFFTUpdateBufferSet(VkFFTApplication* app, VkFFTPlan* FFTPlan, VkFFTAxis* axis, pfUINT axis_id, pfUINT axis_upload_id, pfUINT inverse) {

if (axis->specializationConstants.performOffsetUpdate || axis->specializationConstants.performBufferSetUpdate) {
axis->specializationConstants.inputOffset.type = 31;
axis->specializationConstants.outputOffset.type = 31;
Expand Down Expand Up @@ -1198,6 +1201,7 @@ static inline VkFFTResult VkFFTUpdateBufferSet(VkFFTApplication* app, VkFFTPlan*
return VKFFT_SUCCESS;
}
static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTApplication* app, VkFFTPlan* FFTPlan, VkFFTAxis* axis, pfUINT axis_id, pfUINT axis_upload_id, pfUINT inverse) {

if (axis->specializationConstants.performOffsetUpdate || axis->specializationConstants.performBufferSetUpdate) {
#if(VKFFT_BACKEND==0)
const VkDescriptorType descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
Expand Down Expand Up @@ -1518,7 +1522,7 @@ static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTA
}
}
}
if ((i == 2) && (app->configuration.performConvolution)) {
if ((i == axis->specializationConstants.convolutionBindingID) && (app->configuration.performConvolution)) {
if (axis->specializationConstants.performBufferSetUpdate) {
pfUINT bufferId = 0;
pfUINT offset = j;
Expand All @@ -1544,7 +1548,7 @@ static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTA
axis->specializationConstants.kernelOffset.data.i = app->configuration.kernelOffset;
}
}
if ((i == axis->numBindings - 1) && (app->configuration.useLUT == 1)) {
if ((i == axis->specializationConstants.LUTBindingID) && (app->configuration.useLUT == 1)) {
#if(VKFFT_BACKEND==0)
if (axis->specializationConstants.performBufferSetUpdate) {
descriptorBufferInfo.buffer = axis->bufferLUT;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla
#elif(VKFFT_BACKEND==5)
#endif
VkFFTAxis* axis = (reverseBluesteinMultiUpload) ? &FFTPlan->inverseBluesteinAxes[axis_id][axis_upload_id] : &FFTPlan->axes[axis_id][axis_upload_id];

axis->batchWorkGroup = 1;
axis->specializationConstants.sourceFFTSize.type = 31;
axis->specializationConstants.sourceFFTSize.data.i = app->configuration.size[axis_id];
axis->specializationConstants.axis_id = (int)axis_id;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication*
#elif(VKFFT_BACKEND==5)
#endif
VkFFTAxis* axis = &FFTPlan->R2Cdecomposition;
axis->batchWorkGroup = 2;
axis->specializationConstants.sourceFFTSize.type = 31;
axis->specializationConstants.sourceFFTSize.data.i = (pfINT)app->configuration.size[0];
axis->specializationConstants.numFFTdims = (int)app->configuration.FFTdim;
Expand Down
7 changes: 6 additions & 1 deletion vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,8 @@ typedef struct {
VkBuffer* inputBuffer;//pointer to array of input buffers (or one buffer) used to read data from if isInputFormatted is enabled
VkBuffer* outputBuffer;//pointer to array of output buffers (or one buffer) used for write data to if isOutputFormatted is enabled
VkBuffer* kernel;//pointer to array of kernel buffers (or one buffer) used for read kernel data from if performConvolution is enabled
VkBuffer indirectBuffer; //buffer that contains workgroupsizes for indirect dispatch. Size hould be at least 4 x 4 bytes x the number of dispatches.
unsigned int* indirectHostPointer; // pointer to the array with the indirect workgroup sizes on the host side. During dispatch this array will be filled by VkFFT, which can later be updated by user. format us uint[4] = {x_size, y_size, z_size, id}, with id the axis that contains the batch number (0=x, 1=y, 2=z), plus 10000 if the dispatch concerns an inverse FFT.
#elif(VKFFT_BACKEND==1)
void** buffer;//pointer to device buffer used for computations
void** tempBuffer;//needed if reorderFourStep is enabled to transpose the array. Same size as buffer. Default 0. Setting to non zero value enables manual user allocation
Expand Down Expand Up @@ -186,6 +188,7 @@ typedef struct {
pfUINT inputBufferOffset;//specify if VkFFT has to offset the first element position inside the input buffer. In bytes. Default 0
pfUINT outputBufferOffset;//specify if VkFFT has to offset the first element position inside the output buffer. In bytes. Default 0
pfUINT kernelOffset;//specify if VkFFT has to offset the first element position inside the kernel. In bytes. Default 0
pfUINT indirectBufferOffset; //specify if VkFFT has to offset the first element posigion inside the indirectBuffer. In bytes. Default 0
pfUINT specifyOffsetsAtLaunch;//specify if offsets will be selected with launch parameters VkFFTLaunchParams (0 - off, 1 - on). Default 0

//optional: (default 0 if not stated otherwise)
Expand Down Expand Up @@ -321,6 +324,7 @@ typedef struct {
MTL::CommandBuffer* commandBuffer;//Filled at app execution
MTL::ComputeCommandEncoder* commandEncoder;//Filled at app execution
#endif
pfUINT indirectDispatch; //0 for direct dispatch, 1 for fwd indirect, 2 for inv indirect, 3 for both indirect
} VkFFTConfiguration;//parameters specified at plan creation

typedef struct {
Expand Down Expand Up @@ -1113,6 +1117,7 @@ typedef struct {
pfUINT bufferLUTSize;
pfUINT bufferRaderUintLUTSize;
pfUINT referenceLUT;
pfUINT batchWorkGroup;
} VkFFTAxis;

typedef struct {
Expand Down Expand Up @@ -1177,7 +1182,7 @@ typedef struct {
pfUINT bufferBluesteinSize[VKFFT_MAX_FFT_DIMENSIONS];
void* applicationBluesteinString[VKFFT_MAX_FFT_DIMENSIONS];
pfUINT applicationBluesteinStringSize[VKFFT_MAX_FFT_DIMENSIONS];

pfUINT indirectDispatchID;
pfUINT numRaderFFTPrimes;
pfUINT rader_primes[30];
pfUINT rader_buffer_size[30];
Expand Down