@@ -38,6 +38,32 @@ def load_ofac_names():
38
38
# Load OFAC names at startup
39
39
OFAC_INDIVIDUALS , OFAC_COMPANIES = load_ofac_names ()
40
40
41
+ # Track used OFAC names to prevent duplicates
42
+ USED_OFAC_INDIVIDUALS = set ()
43
+ USED_OFAC_COMPANIES = set ()
44
+
45
+ def reset_ofac_tracking ():
46
+ """Reset the tracking of used OFAC names for a fresh generation run"""
47
+ global USED_OFAC_INDIVIDUALS , USED_OFAC_COMPANIES
48
+ USED_OFAC_INDIVIDUALS .clear ()
49
+ USED_OFAC_COMPANIES .clear ()
50
+
51
+ def get_unique_ofac_name (is_company = False ):
52
+ """Get a unique OFAC name that hasn't been used before"""
53
+ if is_company :
54
+ available_names = [name for name in OFAC_COMPANIES if name not in USED_OFAC_COMPANIES ]
55
+ if available_names :
56
+ name = random .choice (available_names )
57
+ USED_OFAC_COMPANIES .add (name )
58
+ return name
59
+ else :
60
+ available_names = [name for name in OFAC_INDIVIDUALS if name not in USED_OFAC_INDIVIDUALS ]
61
+ if available_names :
62
+ name = random .choice (available_names )
63
+ USED_OFAC_INDIVIDUALS .add (name )
64
+ return name
65
+ return None # No unique names available
66
+
41
67
# Constants
42
68
CURRENT_DATE = datetime .today ()
43
69
COUNTRIES = ["USA" , "Canada" , "UK" , "Germany" , "South Africa" , "Cuba" , "Iran" , "Russia" ]
@@ -210,14 +236,13 @@ def generate_customers(num_customers=50):
210
236
211
237
if will_be_sanctioned :
212
238
# Use real OFAC names for customers that will appear in sanctions
213
- if is_company and OFAC_COMPANIES :
214
- name = random .choice (OFAC_COMPANIES )
215
- print (f"OFAC Company - { name } " )
216
- elif not is_company and OFAC_INDIVIDUALS :
217
- name = random .choice (OFAC_INDIVIDUALS )
218
- print (f"OFAC Individual - { name } " )
239
+ ofac_name = get_unique_ofac_name (is_company )
240
+ if ofac_name :
241
+ name = ofac_name
242
+ print (f"OFAC { 'Company' if is_company else 'Individual' } - { name } " )
219
243
else :
220
244
name = fake .company () if is_company else fake .name () # Fallback to generated name
245
+ print (f"Warning: No unique OFAC { 'company' if is_company else 'individual' } names available, using fake name" )
221
246
222
247
# Add to sanctioned entities list
223
248
sanctioned_entities .append ({
@@ -588,12 +613,11 @@ def generate_sanctions(sanctioned_entities, num_additional=50):
588
613
is_company = random .random () < 0.4 # 40% companies in sanctions
589
614
590
615
# Use real OFAC names when available
591
- if is_company and OFAC_COMPANIES :
592
- name = random .choice (OFAC_COMPANIES )
593
- elif not is_company and OFAC_INDIVIDUALS :
594
- name = random .choice (OFAC_INDIVIDUALS )
616
+ ofac_name = get_unique_ofac_name (is_company )
617
+ if ofac_name :
618
+ name = ofac_name
595
619
else :
596
- # Fallback to fake names if OFAC list not available
620
+ # Fallback to fake names if no unique OFAC names available
597
621
name = fake .company () if is_company else fake .name ()
598
622
599
623
# Sanctions more likely from high-risk countries
@@ -637,6 +661,7 @@ def generate_sanctions(sanctioned_entities, num_additional=50):
637
661
638
662
# Main execution
639
663
print ("Starting AML data generation..." )
664
+ reset_ofac_tracking () # Reset OFAC name tracking for fresh generation
640
665
print ("Generating 5,000 customers and accounts..." )
641
666
customers , accounts , sanctioned_entities = generate_customers (5000 )
642
667
print (f"Generated { len (customers )} customers, { len (accounts )} accounts, { len (sanctioned_entities )} potential sanctioned entities" )
0 commit comments