Skip to content

Commit cd8f26a

Browse files
committed
adding time series to blood pressure generator #185
1 parent 417519d commit cd8f26a

File tree

1 file changed

+61
-38
lines changed

1 file changed

+61
-38
lines changed

person_story.py

Lines changed: 61 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""Story generators for the CC HIC OMOP schema."""
22
import datetime as dt
33
from typing import Callable, Generator, Optional, Union, cast
4-
4+
from sqlsynthgen.utils import generate_time_series
55
import numpy as np
66
from mimesis import Generic
77
import random
@@ -155,65 +155,84 @@ def gen_blood_pressure_events( # pylint: disable=too-many-arguments
155155
tables (measurements, observation, etc.).
156156
"""
157157

158-
def populate_blood_pressure_values(
158+
159+
def generate_paired_measurement(
159160
person_id: int,
160161
visit_occurrence_id: int,
161162
event_datetime: dt.datetime,
163+
values: tuple[float, float],
164+
measurement_concept_id: tuple[int,int],
165+
measurement_type_concept_ids: int,
166+
unit_concept_id: int,
167+
unit_source_value: str,
162168
) -> tuple[SqlRow, SqlRow]:
163-
164-
Systolic_blood_pressure_by_Noninvasive = 21492239
165-
Diastolic_blood_pressure_by_Noninvasive = 21492240
166-
measurement_type_concept_id = 32817 # EHR measurement
167-
avg_systolic = 114.236842
168-
avg_diastolic = 74.447368
169-
avg_difference = avg_systolic - avg_diastolic
170-
unit_concept_id = 8876 # mmHg
171-
172-
gender = cast(int, person["gender_concept_id"])
173-
if gender == 8507:
174-
systolic_value = random_normal(src_stats["bp_profile"][0]["average_under_60_systolic"],src_stats["bp_profile"][0]["stddev_under_60_systolic"])
175-
diastolic_value = src_stats["bp_profile"][0]["average_systolic_diastolic_difference"] + systolic_value
176-
elif gender == 8532:
177-
systolic_value = random_normal(src_stats["bp_profile"][1]["average_under_60_systolic"],src_stats["bp_profile"][1]["stddev_under_60_systolic"])
178-
diastolic_value = src_stats["bp_profile"][1]["average_systolic_diastolic_difference"] + systolic_value
179-
else:
180-
systolic_value = avg_systolic
181-
diastolic_value = avg_diastolic
182169

170+
171+
### This can be abastracted to generate any number of set of measurements
183172
"""Generate two rows for the measurement table."""
184-
systolic: SqlRow = {
185-
"measurement_concept_id": cast(int, Systolic_blood_pressure_by_Noninvasive),
173+
measurement1: SqlRow = {
174+
"measurement_concept_id": cast(int, measurement_concept_id[0]),
186175
"person_id": person_id,
187176
"visit_occurrence_id": visit_occurrence_id,
188177
"measurement_datetime": event_datetime,
189178
"measurement_date": event_datetime.date(),
190-
"measurement_type_concept_id": measurement_type_concept_id,
179+
"measurement_type_concept_id": measurement_type_concept_ids,
191180
"unit_concept_id": unit_concept_id,
192-
"unit_source_value": "mmHg",
193-
"value_as_number": systolic_value,
181+
"unit_source_value": unit_source_value,
182+
"value_as_number": values[0],
194183
}
195184

196-
diastolic: SqlRow = {
197-
"measurement_concept_id": cast(int, Diastolic_blood_pressure_by_Noninvasive),
185+
measurement2: SqlRow = {
186+
"measurement_concept_id": cast(int, measurement_concept_id[1]),
198187
"person_id": person_id,
199188
"visit_occurrence_id": visit_occurrence_id,
200189
"measurement_datetime": event_datetime,
201190
"measurement_date": event_datetime.date(),
202-
"measurement_type_concept_id": measurement_type_concept_id,
191+
"measurement_type_concept_id": measurement_type_concept_ids,
203192
"unit_concept_id": unit_concept_id,
204-
"unit_source_value": "mmHg",
205-
"value_as_number": diastolic_value,
193+
"unit_source_value": unit_source_value,
194+
"value_as_number": values[1],
206195
}
207-
return systolic, diastolic
196+
return measurement1, measurement2
208197

209198
event_datetimes = random_event_times(avg_rate, visit_occurrence)
199+
200+
avg_systolic = 114.236842
201+
avg_diastolic = 74.447368
202+
sys_bp_non_invasive_concept_id = 21492239
203+
dias_bp_non_invasive_concept_id = 21492240
204+
measurement_type_concept_id = 32817 # EHR measurement
205+
unit_source_value = "mmHg"
206+
unit_concept_id = 8876 # mmHg
207+
208+
gender = cast(int, person["gender_concept_id"])
209+
if gender == 8507:
210+
systolic_value = np.round(generate_time_series(len(event_datetimes), 'iid',
211+
{'mean': src_stats["bp_profile"][0]["average_under_60_systolic"],
212+
'std': src_stats["bp_profile"][0]["stddev_under_60_systolic"]},
213+
random_state=42))
214+
diastolic_value = np.round(random_normal(src_stats["bp_profile"][0]["average_systolic_diastolic_difference"],src_stats["bp_profile"][0]["average_systolic_diastolic_difference"]*0.1) + systolic_value)
215+
elif gender == 8532:
216+
systolic_value = np.round(generate_time_series(len(event_datetimes), 'iid',
217+
{'mean': src_stats["bp_profile"][1]["average_under_60_systolic"],
218+
'std': src_stats["bp_profile"][1]["stddev_under_60_systolic"]},
219+
random_state=42))
220+
diastolic_value = np.round(random_normal(src_stats["bp_profile"][1]["average_systolic_diastolic_difference"],
221+
src_stats["bp_profile"][1][
222+
"average_systolic_diastolic_difference"] * 0.1) + systolic_value)
223+
else:
224+
systolic_value = avg_systolic
225+
diastolic_value = avg_diastolic
226+
210227
events: list[tuple[str, SqlRow]] = []
211-
for event_datetime in sorted(event_datetimes):
212-
systolic, diastolic = populate_blood_pressure_values(cast(int, person["person_id"]),
228+
for index, event_datetime in enumerate(sorted(event_datetimes)):
229+
systolic_dict, diastolic_dict = generate_paired_measurement(cast(int, person["person_id"]),
213230
cast(int, visit_occurrence["visit_occurrence_id"]),
214-
event_datetime)
215-
events.append(("measurement", systolic))
216-
events.append(("measurement", diastolic))
231+
event_datetime,(systolic_value[index], diastolic_value[index]),
232+
(sys_bp_non_invasive_concept_id,dias_bp_non_invasive_concept_id),
233+
measurement_type_concept_id,unit_concept_id,unit_source_value)
234+
events.append(("measurement", systolic_dict)),
235+
events.append(("measurement", diastolic_dict))
217236
return events
218237

219238
def generate(
@@ -240,11 +259,15 @@ def generate(
240259
death_row = (yield death) if death else None
241260
visit_occurrence = yield gen_visit_occurrence(person, death_row, src_stats)
242261

262+
# abs to avoid negative rates due to random normal variation
243263
# abs to avoid negative rates due to random normal variation
244264
avg_rate = abs(random_normal(
245265
src_stats["avg_measurements_per_visit_hour"][0]['avg_measurements_per_hour'],
246-
src_stats["avg_measurements_per_visit_hour"][0]['stddev_measurements_per_hour'] ))
266+
src_stats["avg_measurements_per_visit_hour"][0]['stddev_measurements_per_hour'] )
267+
)
268+
247269

270+
print(f"Generating blood pressure events at an average rate of {avg_rate} per hour.")
248271
for event in gen_blood_pressure_events(
249272
avg_rate,
250273
visit_occurrence,

0 commit comments

Comments
 (0)