10
10
Faker .seed (42 )
11
11
random .seed (42 )
12
12
13
+ # Load real OFAC names from split files
14
+ def load_ofac_names ():
15
+ """Load real OFAC sanctioned names from the split files"""
16
+ individuals = []
17
+ companies = []
18
+
19
+ try :
20
+ with open ('ofac_individuals.txt' , 'r' , encoding = 'utf-8' ) as f :
21
+ individuals = [line .strip () for line in f if line .strip ()]
22
+ print (f"Loaded { len (individuals )} OFAC individuals" )
23
+ except FileNotFoundError :
24
+ print ("Warning: ofac_individuals.txt not found." )
25
+
26
+ try :
27
+ with open ('ofac_companies.txt' , 'r' , encoding = 'utf-8' ) as f :
28
+ companies = [line .strip () for line in f if line .strip ()]
29
+ print (f"Loaded { len (companies )} OFAC companies" )
30
+ except FileNotFoundError :
31
+ print ("Warning: ofac_companies.txt not found." )
32
+
33
+ if not individuals and not companies :
34
+ print ("Warning: No OFAC files found. Using fallback fake names." )
35
+
36
+ return individuals , companies
37
+
38
+ # Load OFAC names at startup
39
+ OFAC_INDIVIDUALS , OFAC_COMPANIES = load_ofac_names ()
40
+
13
41
# Constants
14
42
CURRENT_DATE = datetime .today ()
15
43
COUNTRIES = ["USA" , "Canada" , "UK" , "Germany" , "South Africa" , "Cuba" , "Iran" , "Russia" ]
@@ -128,7 +156,6 @@ def generate_customers(num_customers=50):
128
156
for i in range (num_customers ):
129
157
account_id = 10000 + i
130
158
is_company = random .random () < 0.2 # 20% companies
131
- name = fake .company () if is_company else fake .name ()
132
159
133
160
# More realistic age distribution
134
161
if is_company :
@@ -178,18 +205,34 @@ def generate_customers(num_customers=50):
178
205
179
206
address = fake .address ().replace ("\n " , ", " )
180
207
181
- # Store potential sanctioned entities (some customers will appear in sanctions)
182
- if blacklisted or (nationality in HIGH_RISK_COUNTRIES and random .random () < 0.3 ):
208
+ # Determine if this customer should be sanctioned and use OFAC name if so
209
+ will_be_sanctioned = blacklisted or (nationality in HIGH_RISK_COUNTRIES and random .random () < 0.3 )
210
+
211
+ if will_be_sanctioned :
212
+ # Use real OFAC names for customers that will appear in sanctions
213
+ if is_company and OFAC_COMPANIES :
214
+ name = random .choice (OFAC_COMPANIES )
215
+ print (f"OFAC Company - { name } " )
216
+ elif not is_company and OFAC_INDIVIDUALS :
217
+ name = random .choice (OFAC_INDIVIDUALS )
218
+ print (f"OFAC Individual - { name } " )
219
+ else :
220
+ name = fake .company () if is_company else fake .name () # Fallback to generated name
221
+
222
+ # Add to sanctioned entities list
183
223
sanctioned_entities .append ({
184
- "name" : name ,
224
+ "name" : name , # Same name as customer record
185
225
"address" : address ,
186
226
"entity_type" : "company" if is_company else "individual" ,
187
227
"nationality" : nationality
188
228
})
229
+ else :
230
+ # Normal customer - use fake generated name
231
+ name = fake .company () if is_company else fake .name ()
189
232
190
233
customers .append ({
191
234
"customer_id" : i + 1 ,
192
- "name" : name ,
235
+ "name" : name , # Now uses OFAC name if customer will be sanctioned
193
236
"account" : account_id ,
194
237
"dob" : dob .strftime ("%Y-%m-%d" ),
195
238
"nationality" : nationality ,
@@ -202,7 +245,7 @@ def generate_customers(num_customers=50):
202
245
# MongoDB accounts - exactly same data but different structure
203
246
accounts .append ({
204
247
"account_id" : account_id ,
205
- "name" : name ,
248
+ "name" : name , # Same name as customer record
206
249
"entity_type" : "company" if is_company else "individual" ,
207
250
"contact_information" : {
208
251
"address" : address ,
@@ -526,7 +569,8 @@ def generate_sars(customers, transactions):
526
569
def generate_sanctions (sanctioned_entities , num_additional = 50 ):
527
570
sanctions = []
528
571
529
- # First, add some of our actual customers to sanctions (creates real relationships)
572
+ # TODO
573
+ # Consider removing the limit of 20 here to get all sanctioned entities in future
530
574
for i , entity in enumerate (sanctioned_entities [:min (20 , len (sanctioned_entities ))]):
531
575
# Use actual customer data for realistic sanctions entries
532
576
sanctions .append ({
@@ -539,10 +583,18 @@ def generate_sanctions(sanctioned_entities, num_additional=50):
539
583
"list_type" : random .choices (["OFAC" , "UN" , "EU" ], weights = [50 , 30 , 20 ], k = 1 )[0 ]
540
584
})
541
585
542
- # Then add additional fictional sanctioned entities
586
+ # Add additional real OFAC sanctioned entities
543
587
for i in range (num_additional ):
544
588
is_company = random .random () < 0.4 # 40% companies in sanctions
545
- name = fake .company () if is_company else fake .name ()
589
+
590
+ # Use real OFAC names when available
591
+ if is_company and OFAC_COMPANIES :
592
+ name = random .choice (OFAC_COMPANIES )
593
+ elif not is_company and OFAC_INDIVIDUALS :
594
+ name = random .choice (OFAC_INDIVIDUALS )
595
+ else :
596
+ # Fallback to fake names if OFAC list not available
597
+ name = fake .company () if is_company else fake .name ()
546
598
547
599
# Sanctions more likely from high-risk countries
548
600
country = random .choices (HIGH_RISK_COUNTRIES + ["North Korea" , "Syria" , "Venezuela" ],
@@ -627,4 +679,4 @@ def generate_sanctions(sanctioned_entities, num_additional=50):
627
679
print (f" - { sum (1 for t in pg_transactions if t ['is_laundering' ]):,} suspicious transactions" )
628
680
print (f" - { len (sars ):,} SARs generated" )
629
681
print (f" - { len (sanctions ):,} sanctions entries" )
630
- print (f" - Files saved to postgres/ and mongo_seed/ directories" )
682
+ print (f" - Files saved to postgres/ and mongo_seed/ directories" )
0 commit comments