@@ -53,6 +53,7 @@ upload_filter_coefficients(BeamformerCtx *ctx, Arena a)
53
53
static void
54
54
alloc_shader_storage (BeamformerCtx * ctx , Arena a )
55
55
{
56
+ ComputeShaderCtx * cs = & ctx -> csctx ;
56
57
BeamformerParameters * bp = & ctx -> params -> raw ;
57
58
uv4 dec_data_dim = bp -> dec_data_dim ;
58
59
uv2 rf_raw_dim = bp -> rf_raw_dim ;
@@ -61,35 +62,38 @@ alloc_shader_storage(BeamformerCtx *ctx, Arena a)
61
62
ctx -> csctx .rf_raw_dim = rf_raw_dim ;
62
63
ctx -> csctx .dec_data_dim = dec_data_dim ;
63
64
64
- glDeleteBuffers (ARRAY_COUNT (ctx -> csctx .rf_data_ssbos ), ctx -> csctx .rf_data_ssbos );
65
- glDeleteBuffers (1 , & ctx -> csctx .raw_data_ssbo );
66
- glGenBuffers (1 , & ctx -> csctx .raw_data_ssbo );
67
- glGenBuffers (ARRAY_COUNT (ctx -> csctx .rf_data_ssbos ), ctx -> csctx .rf_data_ssbos );
68
- glBindBuffer (GL_SHADER_STORAGE_BUFFER , ctx -> csctx .raw_data_ssbo );
69
- glBufferStorage (GL_SHADER_STORAGE_BUFFER , rf_raw_size , 0 ,
70
- GL_DYNAMIC_STORAGE_BIT |GL_MAP_WRITE_BIT );
65
+ glDeleteBuffers (ARRAY_COUNT (cs -> rf_data_ssbos ), cs -> rf_data_ssbos );
66
+ glGenBuffers (ARRAY_COUNT (cs -> rf_data_ssbos ), cs -> rf_data_ssbos );
71
67
72
- for (u32 i = 0 ; i < ARRAY_COUNT (ctx -> csctx .rf_data_ssbos ); i ++ ) {
73
- glBindBuffer (GL_SHADER_STORAGE_BUFFER , ctx -> csctx .rf_data_ssbos [i ]);
68
+ glDeleteBuffers (1 , & cs -> raw_data_ssbo );
69
+ glGenBuffers (1 , & cs -> raw_data_ssbo );
70
+
71
+ glBindBuffer (GL_SHADER_STORAGE_BUFFER , cs -> raw_data_ssbo );
72
+ glBufferStorage (GL_SHADER_STORAGE_BUFFER , ARRAY_COUNT (cs -> raw_data_fences ) * rf_raw_size ,
73
+ 0 , GL_DYNAMIC_STORAGE_BIT |GL_MAP_WRITE_BIT );
74
+
75
+ for (u32 i = 0 ; i < ARRAY_COUNT (cs -> rf_data_ssbos ); i ++ ) {
76
+ glBindBuffer (GL_SHADER_STORAGE_BUFFER , cs -> rf_data_ssbos [i ]);
74
77
glBufferStorage (GL_SHADER_STORAGE_BUFFER , rf_decoded_size , 0 , 0 );
75
78
}
76
79
77
80
/* NOTE: store hadamard in GPU once; it won't change for a particular imaging session */
78
- ctx -> csctx . hadamard_dim = (uv2 ){.x = dec_data_dim .z , .y = dec_data_dim .z };
79
- size hadamard_elements = dec_data_dim .z * dec_data_dim .z ;
80
- i32 * hadamard = alloc (& a , i32 , hadamard_elements );
81
+ cs -> hadamard_dim = (uv2 ){.x = dec_data_dim .z , .y = dec_data_dim .z };
82
+ size hadamard_elements = dec_data_dim .z * dec_data_dim .z ;
83
+ i32 * hadamard = alloc (& a , i32 , hadamard_elements );
81
84
fill_hadamard (hadamard , dec_data_dim .z );
82
85
83
- rlUnloadShaderBuffer (ctx -> csctx .hadamard_ssbo );
84
- ctx -> csctx .hadamard_ssbo = rlLoadShaderBuffer (hadamard_elements * sizeof (i32 ), hadamard ,
85
- GL_STATIC_DRAW );
86
- ctx -> flags &= ~ALLOC_SSBOS ;
86
+ rlUnloadShaderBuffer (cs -> hadamard_ssbo );
87
+ cs -> hadamard_ssbo = rlLoadShaderBuffer (hadamard_elements * sizeof (i32 ), hadamard , GL_STATIC_DRAW );
88
+ ctx -> flags &= ~ALLOC_SSBOS ;
87
89
}
88
90
89
91
static void
90
92
do_compute_shader (BeamformerCtx * ctx , enum compute_shaders shader )
91
93
{
92
94
ComputeShaderCtx * csctx = & ctx -> csctx ;
95
+ uv2 rf_raw_dim = ctx -> params -> raw .rf_raw_dim ;
96
+ size rf_raw_size = rf_raw_dim .x * rf_raw_dim .y * sizeof (i16 );
93
97
94
98
glBeginQuery (GL_TIME_ELAPSED , csctx -> timer_ids [shader ]);
95
99
@@ -101,13 +105,17 @@ do_compute_shader(BeamformerCtx *ctx, enum compute_shaders shader)
101
105
u32 input_ssbo_idx = csctx -> last_output_ssbo_index ;
102
106
switch (shader ) {
103
107
case CS_HADAMARD :
104
- glBindBufferBase (GL_SHADER_STORAGE_BUFFER , 1 , csctx -> raw_data_ssbo );
108
+ glBindBufferRange (GL_SHADER_STORAGE_BUFFER , 1 , csctx -> raw_data_ssbo ,
109
+ csctx -> raw_data_index * rf_raw_size , rf_raw_size );
110
+
105
111
glBindBufferBase (GL_SHADER_STORAGE_BUFFER , 2 , csctx -> rf_data_ssbos [output_ssbo_idx ]);
106
112
glBindBufferBase (GL_SHADER_STORAGE_BUFFER , 3 , csctx -> hadamard_ssbo );
107
113
glDispatchCompute (ORONE (csctx -> dec_data_dim .x / 32 ),
108
114
ORONE (csctx -> dec_data_dim .y / 32 ),
109
115
ORONE (csctx -> dec_data_dim .z ));
116
+ csctx -> raw_data_fences [csctx -> raw_data_index ] = glFenceSync (GL_SYNC_GPU_COMMANDS_COMPLETE , 0 );
110
117
csctx -> last_output_ssbo_index = !csctx -> last_output_ssbo_index ;
118
+ csctx -> raw_data_index = (csctx -> raw_data_index + 1 ) % ARRAY_COUNT (csctx -> raw_data_fences );
111
119
break ;
112
120
case CS_LPF :
113
121
glBindBufferBase (GL_SHADER_STORAGE_BUFFER , 1 , csctx -> rf_data_ssbos [input_ssbo_idx ]);
@@ -531,12 +539,28 @@ do_beamformer(BeamformerCtx *ctx, Arena arena)
531
539
if (!uv4_equal (ctx -> out_data_dim , bp -> output_points ) || ctx -> flags & ALLOC_OUT_TEX )
532
540
alloc_output_image (ctx );
533
541
534
- glBindBuffer (GL_SHADER_STORAGE_BUFFER , ctx -> csctx .raw_data_ssbo );
535
- void * rf_data_buf = glMapBuffer (GL_SHADER_STORAGE_BUFFER , GL_WRITE_ONLY );
536
- ASSERT (rf_data_buf );
542
+ i32 raw_index = ctx -> csctx .raw_data_index ;
543
+ /* NOTE: if this times out it means the command queue is more than 3 frames behind.
544
+ * In that case we need to re-evaluate the buffer size */
545
+ i32 result = glClientWaitSync (ctx -> csctx .raw_data_fences [raw_index ], 0 , 10000 );
546
+ if (result == GL_TIMEOUT_EXPIRED ) {
547
+ //ASSERT(0);
548
+ }
549
+ glDeleteSync (ctx -> csctx .raw_data_fences [raw_index ]);
550
+
537
551
uv2 rf_raw_dim = ctx -> csctx .rf_raw_dim ;
538
552
size rf_raw_size = rf_raw_dim .x * rf_raw_dim .y * sizeof (i16 );
539
- size rlen = os_read_pipe_data (ctx -> data_pipe , rf_data_buf , rf_raw_size );
553
+
554
+ glBindBuffer (GL_SHADER_STORAGE_BUFFER , ctx -> csctx .raw_data_ssbo );
555
+ void * rf_data_buf = glMapBufferRange (GL_SHADER_STORAGE_BUFFER ,
556
+ raw_index * rf_raw_size , rf_raw_size ,
557
+ GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_WRITE_BIT );
558
+ if (!rf_data_buf ) {
559
+ rlCheckErrors ();
560
+ ASSERT (0 );
561
+ }
562
+ size rlen = os_read_pipe_data (ctx -> data_pipe , rf_data_buf , rf_raw_size );
563
+
540
564
glUnmapBuffer (GL_SHADER_STORAGE_BUFFER );
541
565
542
566
if (rlen == rf_raw_size ) ctx -> flags |= DO_COMPUTE ;
0 commit comments