11"""Story generators for the CC HIC OMOP schema."""
22import datetime as dt
33from typing import Callable , Generator , Optional , Union , cast
4-
4+ from sqlsynthgen . utils import generate_time_series
55import numpy as np
66from mimesis import Generic
77import random
@@ -155,65 +155,84 @@ def gen_blood_pressure_events( # pylint: disable=too-many-arguments
155155 tables (measurements, observation, etc.).
156156 """
157157
158- def populate_blood_pressure_values (
158+
159+ def generate_paired_measurement (
159160 person_id : int ,
160161 visit_occurrence_id : int ,
161162 event_datetime : dt .datetime ,
163+ values : tuple [float , float ],
164+ measurement_concept_id : tuple [int ,int ],
165+ measurement_type_concept_ids : int ,
166+ unit_concept_id : int ,
167+ unit_source_value : str ,
162168 ) -> tuple [SqlRow , SqlRow ]:
163-
164- Systolic_blood_pressure_by_Noninvasive = 21492239
165- Diastolic_blood_pressure_by_Noninvasive = 21492240
166- measurement_type_concept_id = 32817 # EHR measurement
167- avg_systolic = 114.236842
168- avg_diastolic = 74.447368
169- avg_difference = avg_systolic - avg_diastolic
170- unit_concept_id = 8876 # mmHg
171-
172- gender = cast (int , person ["gender_concept_id" ])
173- if gender == 8507 :
174- systolic_value = random_normal (src_stats ["bp_profile" ][0 ]["average_under_60_systolic" ],src_stats ["bp_profile" ][0 ]["stddev_under_60_systolic" ])
175- diastolic_value = src_stats ["bp_profile" ][0 ]["average_systolic_diastolic_difference" ] + systolic_value
176- elif gender == 8532 :
177- systolic_value = random_normal (src_stats ["bp_profile" ][1 ]["average_under_60_systolic" ],src_stats ["bp_profile" ][1 ]["stddev_under_60_systolic" ])
178- diastolic_value = src_stats ["bp_profile" ][1 ]["average_systolic_diastolic_difference" ] + systolic_value
179- else :
180- systolic_value = avg_systolic
181- diastolic_value = avg_diastolic
182169
170+
171+ ### This can be abastracted to generate any number of set of measurements
183172 """Generate two rows for the measurement table."""
184- systolic : SqlRow = {
185- "measurement_concept_id" : cast (int , Systolic_blood_pressure_by_Noninvasive ),
173+ measurement1 : SqlRow = {
174+ "measurement_concept_id" : cast (int , measurement_concept_id [ 0 ] ),
186175 "person_id" : person_id ,
187176 "visit_occurrence_id" : visit_occurrence_id ,
188177 "measurement_datetime" : event_datetime ,
189178 "measurement_date" : event_datetime .date (),
190- "measurement_type_concept_id" : measurement_type_concept_id ,
179+ "measurement_type_concept_id" : measurement_type_concept_ids ,
191180 "unit_concept_id" : unit_concept_id ,
192- "unit_source_value" : "mmHg" ,
193- "value_as_number" : systolic_value ,
181+ "unit_source_value" : unit_source_value ,
182+ "value_as_number" : values [ 0 ] ,
194183 }
195184
196- diastolic : SqlRow = {
197- "measurement_concept_id" : cast (int , Diastolic_blood_pressure_by_Noninvasive ),
185+ measurement2 : SqlRow = {
186+ "measurement_concept_id" : cast (int , measurement_concept_id [ 1 ] ),
198187 "person_id" : person_id ,
199188 "visit_occurrence_id" : visit_occurrence_id ,
200189 "measurement_datetime" : event_datetime ,
201190 "measurement_date" : event_datetime .date (),
202- "measurement_type_concept_id" : measurement_type_concept_id ,
191+ "measurement_type_concept_id" : measurement_type_concept_ids ,
203192 "unit_concept_id" : unit_concept_id ,
204- "unit_source_value" : "mmHg" ,
205- "value_as_number" : diastolic_value ,
193+ "unit_source_value" : unit_source_value ,
194+ "value_as_number" : values [ 1 ] ,
206195 }
207- return systolic , diastolic
196+ return measurement1 , measurement2
208197
209198 event_datetimes = random_event_times (avg_rate , visit_occurrence )
199+
200+ avg_systolic = 114.236842
201+ avg_diastolic = 74.447368
202+ sys_bp_non_invasive_concept_id = 21492239
203+ dias_bp_non_invasive_concept_id = 21492240
204+ measurement_type_concept_id = 32817 # EHR measurement
205+ unit_source_value = "mmHg"
206+ unit_concept_id = 8876 # mmHg
207+
208+ gender = cast (int , person ["gender_concept_id" ])
209+ if gender == 8507 :
210+ systolic_value = np .round (generate_time_series (len (event_datetimes ), 'iid' ,
211+ {'mean' : src_stats ["bp_profile" ][0 ]["average_under_60_systolic" ],
212+ 'std' : src_stats ["bp_profile" ][0 ]["stddev_under_60_systolic" ]},
213+ random_state = 42 ))
214+ diastolic_value = np .round (random_normal (src_stats ["bp_profile" ][0 ]["average_systolic_diastolic_difference" ],src_stats ["bp_profile" ][0 ]["average_systolic_diastolic_difference" ]* 0.1 ) + systolic_value )
215+ elif gender == 8532 :
216+ systolic_value = np .round (generate_time_series (len (event_datetimes ), 'iid' ,
217+ {'mean' : src_stats ["bp_profile" ][1 ]["average_under_60_systolic" ],
218+ 'std' : src_stats ["bp_profile" ][1 ]["stddev_under_60_systolic" ]},
219+ random_state = 42 ))
220+ diastolic_value = np .round (random_normal (src_stats ["bp_profile" ][1 ]["average_systolic_diastolic_difference" ],
221+ src_stats ["bp_profile" ][1 ][
222+ "average_systolic_diastolic_difference" ] * 0.1 ) + systolic_value )
223+ else :
224+ systolic_value = avg_systolic
225+ diastolic_value = avg_diastolic
226+
210227 events : list [tuple [str , SqlRow ]] = []
211- for event_datetime in sorted (event_datetimes ):
212- systolic , diastolic = populate_blood_pressure_values (cast (int , person ["person_id" ]),
228+ for index , event_datetime in enumerate ( sorted (event_datetimes ) ):
229+ systolic_dict , diastolic_dict = generate_paired_measurement (cast (int , person ["person_id" ]),
213230 cast (int , visit_occurrence ["visit_occurrence_id" ]),
214- event_datetime )
215- events .append (("measurement" , systolic ))
216- events .append (("measurement" , diastolic ))
231+ event_datetime ,(systolic_value [index ], diastolic_value [index ]),
232+ (sys_bp_non_invasive_concept_id ,dias_bp_non_invasive_concept_id ),
233+ measurement_type_concept_id ,unit_concept_id ,unit_source_value )
234+ events .append (("measurement" , systolic_dict )),
235+ events .append (("measurement" , diastolic_dict ))
217236 return events
218237
219238def generate (
@@ -240,11 +259,15 @@ def generate(
240259 death_row = (yield death ) if death else None
241260 visit_occurrence = yield gen_visit_occurrence (person , death_row , src_stats )
242261
262+ # abs to avoid negative rates due to random normal variation
243263 # abs to avoid negative rates due to random normal variation
244264 avg_rate = abs (random_normal (
245265 src_stats ["avg_measurements_per_visit_hour" ][0 ]['avg_measurements_per_hour' ],
246- src_stats ["avg_measurements_per_visit_hour" ][0 ]['stddev_measurements_per_hour' ] ))
266+ src_stats ["avg_measurements_per_visit_hour" ][0 ]['stddev_measurements_per_hour' ] )
267+ )
268+
247269
270+ print (f"Generating blood pressure events at an average rate of { avg_rate } per hour." )
248271 for event in gen_blood_pressure_events (
249272 avg_rate ,
250273 visit_occurrence ,
0 commit comments