Skip to content

Commit 5a7435a

Browse files
committed
switch to using unsynchronized buffer access for raw data
1 parent 65f7c33 commit 5a7435a

File tree

2 files changed

+52
-23
lines changed

2 files changed

+52
-23
lines changed

beamformer.c

Lines changed: 45 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ upload_filter_coefficients(BeamformerCtx *ctx, Arena a)
5353
static void
5454
alloc_shader_storage(BeamformerCtx *ctx, Arena a)
5555
{
56+
ComputeShaderCtx *cs = &ctx->csctx;
5657
BeamformerParameters *bp = &ctx->params->raw;
5758
uv4 dec_data_dim = bp->dec_data_dim;
5859
uv2 rf_raw_dim = bp->rf_raw_dim;
@@ -61,35 +62,38 @@ alloc_shader_storage(BeamformerCtx *ctx, Arena a)
6162
ctx->csctx.rf_raw_dim = rf_raw_dim;
6263
ctx->csctx.dec_data_dim = dec_data_dim;
6364

64-
glDeleteBuffers(ARRAY_COUNT(ctx->csctx.rf_data_ssbos), ctx->csctx.rf_data_ssbos);
65-
glDeleteBuffers(1, &ctx->csctx.raw_data_ssbo);
66-
glGenBuffers(1, &ctx->csctx.raw_data_ssbo);
67-
glGenBuffers(ARRAY_COUNT(ctx->csctx.rf_data_ssbos), ctx->csctx.rf_data_ssbos);
68-
glBindBuffer(GL_SHADER_STORAGE_BUFFER, ctx->csctx.raw_data_ssbo);
69-
glBufferStorage(GL_SHADER_STORAGE_BUFFER, rf_raw_size, 0,
70-
GL_DYNAMIC_STORAGE_BIT|GL_MAP_WRITE_BIT);
65+
glDeleteBuffers(ARRAY_COUNT(cs->rf_data_ssbos), cs->rf_data_ssbos);
66+
glGenBuffers(ARRAY_COUNT(cs->rf_data_ssbos), cs->rf_data_ssbos);
7167

72-
for (u32 i = 0; i < ARRAY_COUNT(ctx->csctx.rf_data_ssbos); i++) {
73-
glBindBuffer(GL_SHADER_STORAGE_BUFFER, ctx->csctx.rf_data_ssbos[i]);
68+
glDeleteBuffers(1, &cs->raw_data_ssbo);
69+
glGenBuffers(1, &cs->raw_data_ssbo);
70+
71+
glBindBuffer(GL_SHADER_STORAGE_BUFFER, cs->raw_data_ssbo);
72+
glBufferStorage(GL_SHADER_STORAGE_BUFFER, ARRAY_COUNT(cs->raw_data_fences) * rf_raw_size,
73+
0, GL_DYNAMIC_STORAGE_BIT|GL_MAP_WRITE_BIT);
74+
75+
for (u32 i = 0; i < ARRAY_COUNT(cs->rf_data_ssbos); i++) {
76+
glBindBuffer(GL_SHADER_STORAGE_BUFFER, cs->rf_data_ssbos[i]);
7477
glBufferStorage(GL_SHADER_STORAGE_BUFFER, rf_decoded_size, 0, 0);
7578
}
7679

7780
/* NOTE: store hadamard in GPU once; it won't change for a particular imaging session */
78-
ctx->csctx.hadamard_dim = (uv2){.x = dec_data_dim.z, .y = dec_data_dim.z};
79-
size hadamard_elements = dec_data_dim.z * dec_data_dim.z;
80-
i32 *hadamard = alloc(&a, i32, hadamard_elements);
81+
cs->hadamard_dim = (uv2){.x = dec_data_dim.z, .y = dec_data_dim.z};
82+
size hadamard_elements = dec_data_dim.z * dec_data_dim.z;
83+
i32 *hadamard = alloc(&a, i32, hadamard_elements);
8184
fill_hadamard(hadamard, dec_data_dim.z);
8285

83-
rlUnloadShaderBuffer(ctx->csctx.hadamard_ssbo);
84-
ctx->csctx.hadamard_ssbo = rlLoadShaderBuffer(hadamard_elements * sizeof(i32), hadamard,
85-
GL_STATIC_DRAW);
86-
ctx->flags &= ~ALLOC_SSBOS;
86+
rlUnloadShaderBuffer(cs->hadamard_ssbo);
87+
cs->hadamard_ssbo = rlLoadShaderBuffer(hadamard_elements * sizeof(i32), hadamard, GL_STATIC_DRAW);
88+
ctx->flags &= ~ALLOC_SSBOS;
8789
}
8890

8991
static void
9092
do_compute_shader(BeamformerCtx *ctx, enum compute_shaders shader)
9193
{
9294
ComputeShaderCtx *csctx = &ctx->csctx;
95+
uv2 rf_raw_dim = ctx->params->raw.rf_raw_dim;
96+
size rf_raw_size = rf_raw_dim.x * rf_raw_dim.y * sizeof(i16);
9397

9498
glBeginQuery(GL_TIME_ELAPSED, csctx->timer_ids[shader]);
9599

@@ -101,13 +105,17 @@ do_compute_shader(BeamformerCtx *ctx, enum compute_shaders shader)
101105
u32 input_ssbo_idx = csctx->last_output_ssbo_index;
102106
switch (shader) {
103107
case CS_HADAMARD:
104-
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, csctx->raw_data_ssbo);
108+
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 1, csctx->raw_data_ssbo,
109+
csctx->raw_data_index * rf_raw_size, rf_raw_size);
110+
105111
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, csctx->rf_data_ssbos[output_ssbo_idx]);
106112
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, csctx->hadamard_ssbo);
107113
glDispatchCompute(ORONE(csctx->dec_data_dim.x / 32),
108114
ORONE(csctx->dec_data_dim.y / 32),
109115
ORONE(csctx->dec_data_dim.z));
116+
csctx->raw_data_fences[csctx->raw_data_index] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
110117
csctx->last_output_ssbo_index = !csctx->last_output_ssbo_index;
118+
csctx->raw_data_index = (csctx->raw_data_index + 1) % ARRAY_COUNT(csctx->raw_data_fences);
111119
break;
112120
case CS_LPF:
113121
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, csctx->rf_data_ssbos[input_ssbo_idx]);
@@ -531,12 +539,28 @@ do_beamformer(BeamformerCtx *ctx, Arena arena)
531539
if (!uv4_equal(ctx->out_data_dim, bp->output_points) || ctx->flags & ALLOC_OUT_TEX)
532540
alloc_output_image(ctx);
533541

534-
glBindBuffer(GL_SHADER_STORAGE_BUFFER, ctx->csctx.raw_data_ssbo);
535-
void *rf_data_buf = glMapBuffer(GL_SHADER_STORAGE_BUFFER, GL_WRITE_ONLY);
536-
ASSERT(rf_data_buf);
542+
i32 raw_index = ctx->csctx.raw_data_index;
543+
/* NOTE: if this times out it means the command queue is more than 3 frames behind.
544+
* In that case we need to re-evaluate the buffer size */
545+
i32 result = glClientWaitSync(ctx->csctx.raw_data_fences[raw_index], 0, 10000);
546+
if (result == GL_TIMEOUT_EXPIRED) {
547+
//ASSERT(0);
548+
}
549+
glDeleteSync(ctx->csctx.raw_data_fences[raw_index]);
550+
537551
uv2 rf_raw_dim = ctx->csctx.rf_raw_dim;
538552
size rf_raw_size = rf_raw_dim.x * rf_raw_dim.y * sizeof(i16);
539-
size rlen = os_read_pipe_data(ctx->data_pipe, rf_data_buf, rf_raw_size);
553+
554+
glBindBuffer(GL_SHADER_STORAGE_BUFFER, ctx->csctx.raw_data_ssbo);
555+
void *rf_data_buf = glMapBufferRange(GL_SHADER_STORAGE_BUFFER,
556+
raw_index * rf_raw_size, rf_raw_size,
557+
GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_WRITE_BIT);
558+
if (!rf_data_buf) {
559+
rlCheckErrors();
560+
ASSERT(0);
561+
}
562+
size rlen = os_read_pipe_data(ctx->data_pipe, rf_data_buf, rf_raw_size);
563+
540564
glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
541565

542566
if (rlen == rf_raw_size) ctx->flags |= DO_COMPUTE;

beamformer.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,8 +97,13 @@ typedef struct {
9797
GLsync timer_fence;
9898
f32 last_frame_time[CS_LAST];
9999

100-
/* NOTE: One SSBO for raw data and two for decoded data (swapped for chained stages)*/
101-
u32 raw_data_ssbo;
100+
/* NOTE: multiple raw data SSBOs for unsynchronized mapping.
101+
* Decoded data is only relavent in the context of a single frame, two are
102+
* used so that they can be swapped when chaining multiple compute stages */
103+
GLsync raw_data_fences[3];
104+
u32 raw_data_ssbo;
105+
u32 raw_data_index;
106+
102107
u32 rf_data_ssbos[2];
103108
u32 last_output_ssbo_index;
104109
u32 hadamard_ssbo;

0 commit comments

Comments
 (0)