Skip to content

Commit 5b81f51

Browse files
authored
Merge pull request #49 from johnseekins/facility_groups
Vera.org Facility data and additional facility types/groupings
2 parents f1e6818 + 88fbd69 commit 5b81f51

File tree

13 files changed

+592
-81
lines changed

13 files changed

+592
-81
lines changed

default_data.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
"https://www.ice.gov/detention-facilities?page=0&exposed_form_display=1",
7171
"https://www.ice.gov/detain/detention-facilities/baker-county-facility",
7272
],
73+
"vera_id": "",
7374
"wikidata": {"page_url": "", "search_query": ""},
7475
"wikipedia": {"page_url": "", "search_query": ""},
7576
},
@@ -131,6 +132,7 @@
131132
"total": 1.916666666666666,
132133
},
133134
"source_urls": ["https://www.ice.gov/doclib/detention/FY25_detentionStats09112025.xlsx"],
135+
"vera_id": "",
134136
"wikidata": {"page_url": "", "search_query": ""},
135137
"wikipedia": {"page_url": "", "search_query": ""},
136138
},
@@ -200,6 +202,7 @@
200202
"https://www.ice.gov/detention-facilities?page=4&exposed_form_display=1",
201203
"https://www.ice.gov/detain/detention-facilities/northwest-ice-processing-center-nwipc",
202204
],
205+
"vera_id": "",
203206
"wikidata": {"page_url": "", "search_query": ""},
204207
"wikipedia": {"page_url": "", "search_query": ""},
205208
},
@@ -261,6 +264,7 @@
261264
"total": 2.4642857142857095,
262265
},
263266
"source_urls": ["https://www.ice.gov/doclib/detention/FY25_detentionStats09112025.xlsx"],
267+
"vera_id": "",
264268
"wikidata": {"page_url": "", "search_query": ""},
265269
"wikipedia": {"page_url": "", "search_query": ""},
266270
},
@@ -326,6 +330,7 @@
326330
"total": 5.038690476190489,
327331
},
328332
"source_urls": ["https://www.ice.gov/doclib/detention/FY25_detentionStats09112025.xlsx"],
333+
"vera_id": "",
329334
"wikidata": {"page_url": "", "search_query": ""},
330335
"wikipedia": {"page_url": "", "search_query": ""},
331336
},
@@ -395,6 +400,7 @@
395400
"https://www.ice.gov/detention-facilities?page=3&exposed_form_display=1",
396401
"https://www.ice.gov/detain/detention-facilities/grayson-county-detention-center",
397402
],
403+
"vera_id": "",
398404
"wikidata": {"page_url": "", "search_query": ""},
399405
"wikipedia": {"page_url": "", "search_query": ""},
400406
},
@@ -460,6 +466,7 @@
460466
"total": 16.732142857143007,
461467
},
462468
"source_urls": ["https://www.ice.gov/doclib/detention/FY25_detentionStats09112025.xlsx"],
469+
"vera_id": "",
463470
"wikidata": {"page_url": "", "search_query": ""},
464471
"wikipedia": {"page_url": "", "search_query": ""},
465472
},
@@ -525,6 +532,7 @@
525532
"total": 20.55952380952385,
526533
},
527534
"source_urls": ["https://www.ice.gov/doclib/detention/FY25_detentionStats09112025.xlsx"],
535+
"vera_id": "",
528536
"wikidata": {"page_url": "", "search_query": ""},
529537
"wikipedia": {"page_url": "", "search_query": ""},
530538
},
@@ -594,6 +602,7 @@
594602
"https://www.ice.gov/detention-facilities?page=5&exposed_form_display=1",
595603
"https://www.ice.gov/detain/detention-facilities/san-luis-regional-detention-center",
596604
],
605+
"vera_id": "",
597606
"wikidata": {"page_url": "", "search_query": ""},
598607
"wikipedia": {"page_url": "", "search_query": ""},
599608
},
@@ -608,7 +617,7 @@
608617
},
609618
"address_str": "409 FM 1144,KARNES CITY,TX,78118",
610619
"facility_type": {
611-
"description": "A publicly-owned facility operated by state/local government(s), or private contractors, in which ICE contracts to use all bed space via a Dedicated Intergovernmental Service Agreement; or facilities used by ICE pursuant to Intergovernmental Service Agreements, which house only ICE detainees \u2013 typically these are operated by private contractors pursuant to their agreements with local governments.",
620+
"description": "A publicly-owned facility operated by state/local government(s), or private contractors, in which ICE contracts to use all bed space via a Dedicated Intergovernmental Service Agreement; or facilities used by ICE pursuant to Intergovernmental Service Agreements, which house only ICE detainees. Typically these are operated by private contractors pursuant to their agreements with local governments.",
612621
"expanded_name": "Dedicated Intergovernmental Service Agreement",
613622
"id": "DIGSA",
614623
},
@@ -663,6 +672,7 @@
663672
"https://www.ice.gov/detention-facilities?page=3&exposed_form_display=1",
664673
"https://www.ice.gov/detain/detention-facilities/karnes-county-ipc",
665674
],
675+
"vera_id": "",
666676
"wikidata": {"page_url": "", "search_query": ""},
667677
"wikipedia": {"page_url": "", "search_query": ""},
668678
},
@@ -728,6 +738,7 @@
728738
"https://www.ice.gov/detention-facilities?page=1&exposed_form_display=1",
729739
"https://www.ice.gov/detain/detention-facilities/delaney-hall-detention-facility",
730740
],
741+
"vera_id": "",
731742
"wikidata": {"page_url": "", "search_query": ""},
732743
"wikipedia": {"page_url": "", "search_query": ""},
733744
},
@@ -793,6 +804,7 @@
793804
"total": 28.62202380952395,
794805
},
795806
"source_urls": ["https://www.ice.gov/doclib/detention/FY25_detentionStats09112025.xlsx"],
807+
"vera_id": "",
796808
"wikidata": {"page_url": "", "search_query": ""},
797809
"wikipedia": {"page_url": "", "search_query": ""},
798810
},
@@ -862,6 +874,7 @@
862874
"https://www.ice.gov/detention-facilities?page=4&exposed_form_display=1",
863875
"https://www.ice.gov/detain/detention-facilities/moshannon-valley-processing-center",
864876
],
877+
"vera_id": "",
865878
"wikidata": {"page_url": "", "search_query": ""},
866879
"wikipedia": {"page_url": "", "search_query": ""},
867880
},
@@ -919,6 +932,7 @@
919932
"total": 13.041666666666726,
920933
},
921934
"source_urls": ["https://www.ice.gov/doclib/detention/FY25_detentionStats09112025.xlsx"],
935+
"vera_id": "",
922936
"wikidata": {"page_url": "", "search_query": ""},
923937
"wikipedia": {"page_url": "", "search_query": ""},
924938
},
@@ -984,6 +998,7 @@
984998
"https://www.ice.gov/detention-facilities?page=0&exposed_form_display=1",
985999
"https://www.ice.gov/detain/detention-facilities/butler-county-sheriffs-office",
9861000
],
1001+
"vera_id": "",
9871002
"wikidata": {"page_url": "", "search_query": ""},
9881003
"wikipedia": {"page_url": "", "search_query": ""},
9891004
},
@@ -1053,6 +1068,7 @@
10531068
"https://www.ice.gov/detention-facilities?page=4&exposed_form_display=1",
10541069
"https://www.ice.gov/detain/detention-facilities/phelps-county-jail",
10551070
],
1071+
"vera_id": "",
10561072
"wikidata": {"page_url": "", "search_query": ""},
10571073
"wikipedia": {"page_url": "", "search_query": ""},
10581074
},
@@ -1122,6 +1138,7 @@
11221138
"https://www.ice.gov/detention-facilities?page=1&exposed_form_display=1",
11231139
"https://www.ice.gov/detain/detention-facilities/laredo-detention-center",
11241140
],
1141+
"vera_id": "",
11251142
"wikidata": {"page_url": "", "search_query": ""},
11261143
"wikipedia": {"page_url": "", "search_query": ""},
11271144
},
@@ -1187,6 +1204,7 @@
11871204
"https://www.ice.gov/detention-facilities?page=1&exposed_form_display=1",
11881205
"https://www.ice.gov/detain/detention-facilities/fort-bliss-detention-facility",
11891206
],
1207+
"vera_id": "",
11901208
"wikidata": {"page_url": "", "search_query": ""},
11911209
"wikipedia": {"page_url": "", "search_query": ""},
11921210
},
@@ -1235,6 +1253,7 @@
12351253
"https://www.ice.gov/detention-facilities?page=4&exposed_form_display=1",
12361254
"https://www.ice.gov/detain/detention-facilities/naval-station-guantanamo-bay",
12371255
],
1256+
"vera_id": "",
12381257
"wikidata": {"page_url": "", "search_query": ""},
12391258
"wikipedia": {"page_url": "", "search_query": ""},
12401259
},
@@ -1296,6 +1315,7 @@
12961315
"total": 1.5208333333333308,
12971316
},
12981317
"source_urls": ["https://www.ice.gov/doclib/detention/FY25_detentionStats09112025.xlsx"],
1318+
"vera_id": "",
12991319
"wikidata": {"page_url": "", "search_query": ""},
13001320
"wikipedia": {"page_url": "", "search_query": ""},
13011321
},

enrichers/general.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,20 +36,33 @@ def _enrich_facility(facility_data: tuple) -> tuple:
3636
"""enrich a single facility"""
3737
facility_id, facility = facility_data
3838
facility_name = facility["name"]
39+
if len(facility["source_urls"]) == 1 and "vera-institute/ice-detention-trends" in facility["source_urls"][0]:
40+
logger.debug(" Skipping enrichment of facility with only vera.org data: %s", facility["name"])
41+
return facility_id, facility
3942
logger.info("Enriching facility %s...", facility_name)
4043
enriched_facility = copy.deepcopy(facility)
4144

4245
wiki_res = wikipedia.Wikipedia(facility_name=facility_name).search()
4346
wd_res = wikidata.Wikidata(facility_name=facility_name).search()
4447
osm = openstreetmap.OpenStreetMap(facility_name=facility_name, address=facility.get("address", {}))
4548
osm_res = osm.search()
46-
enriched_facility["wikipedia"]["page_url"] = wiki_res.get("url", "")
49+
url = wiki_res.get("url", None)
50+
if url:
51+
enriched_facility["wikipedia"]["page_url"] = url
4752
enriched_facility["wikipedia"]["search_query"] = wiki_res.get("search_query_steps", "")
48-
enriched_facility["wikidata"]["page_url"] = wd_res.get("url", "")
53+
url = wd_res.get("url", None)
54+
if url:
55+
enriched_facility["wikidata"]["page_url"] = url
4956
enriched_facility["wikidata"]["search_query"] = wd_res.get("search_query_steps", "")
50-
enriched_facility["osm"]["latitude"] = osm_res.get("details", {}).get("latitude", osm.default_coords["latitude"])
51-
enriched_facility["osm"]["longitude"] = osm_res.get("details", {}).get("longitude", osm.default_coords["longitude"])
52-
enriched_facility["osm"]["url"] = osm_res.get("url", "")
57+
lat = osm_res.get("details", {}).get("latitude", None)
58+
long = osm_res.get("details", {}).get("longitude", None)
59+
if lat:
60+
enriched_facility["osm"]["latitude"] = lat
61+
if long:
62+
enriched_facility["osm"]["longitude"] = lat
63+
url = osm_res.get("url", None)
64+
if url:
65+
enriched_facility["osm"]["url"] = url
5366
enriched_facility["osm"]["search_query"] = osm_res.get("search_query_steps", "")
5467

5568
logger.debug(enriched_facility)

file_utils.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,14 @@ def export_to_file(
2323
match file_type:
2424
case "xlsx":
2525
with xlsxwriter.Workbook(full_name, {"remove_timezone": True}) as wb:
26-
writer.write_excel(workbook=wb, include_header=True, autofit=True)
26+
_ = writer.write_excel(workbook=wb, include_header=True, autofit=True)
2727
case "csv":
2828
with open(full_name, "w", newline="", encoding="utf-8") as f_out:
2929
writer.write_csv(file=f_out, include_header=True)
3030
case "parquet":
3131
writer.write_parquet(full_name, use_pyarrow=True)
32+
case _:
33+
logger.warning("Invalid dataframe output type %s", file_type)
3234
elif file_type == "json":
3335
with open(full_name, "w", encoding="utf-8") as f_out:
3436
json.dump(facilities_data, f_out, indent=2, sort_keys=True, default=str)
@@ -103,7 +105,7 @@ def print_summary(facilities_data: dict) -> None:
103105
false_positives = 0
104106
errors = 0
105107
for facility in facilities_data["facilities"].values():
106-
query = facility.get("wikipedia", {}).get("search_query", "")
108+
query: str = facility.get("wikipedia", {}).get("search_query", "")
107109
if "REJECTED" in query:
108110
false_positives += 1
109111
elif "ERROR" in query:

ice_scrapers/__init__.py

Lines changed: 45 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -4,63 +4,72 @@
44
may call them
55
"""
66

7-
# extracted ADP sheet header list 2025-09-07
8-
facility_sheet_header = [
9-
"Name",
10-
"Address",
11-
"City",
12-
"State",
13-
"Zip",
14-
"AOR",
15-
"Type Detailed",
16-
"Male/Female",
17-
"FY25 ALOS",
18-
"Level A",
19-
"Level B",
20-
"Level C",
21-
"Level D",
22-
"Male Crim",
23-
"Male Non-Crim",
24-
"Female Crim",
25-
"Female Non-Crim",
26-
"ICE Threat Level 1",
27-
"ICE Threat Level 2",
28-
"ICE Threat Level 3",
29-
"No ICE Threat Level",
30-
"Mandatory",
31-
"Guaranteed Minimum",
32-
"Last Inspection Type",
33-
"Last Inspection End Date",
34-
"Pending FY25 Inspection",
35-
"Last Inspection Standard",
36-
"Last Final Rating",
37-
]
38-
397
ice_inspection_types = {
408
# found in https://www.ice.gov/foia/odo-facility-inspections
419
"ODO": "Office of Detention Oversight",
4210
# found in https://ia803100.us.archive.org/16/items/6213032-ORSA-MOU-ICE/6213032-ORSA-MOU-ICE_text.pdf
4311
"ORSA": "Operational Review Self-Assessment",
4412
}
4513

14+
# extracted from https://vera-institute.files.svdcdn.com/production/downloads/dashboard_appendix.pdf 2025-09-23
15+
ice_facility_group_mapping = {
16+
"Non-Dedicated": ["IGSA"],
17+
"Dedicated": ["DIGSA", "CDF", "SPC"],
18+
"Federal": ["BOF", "USMSIGA", "USMS IGA", "USMS CDF", "DOD", "MOC"],
19+
"Hold/Staging": ["Hold", "Staging"],
20+
"Family/Youth": ["Family", "Juvenile", "FAMILY"],
21+
"Medical": ["Hospital"],
22+
"Hotel": ["Hotel"],
23+
"Other/Unknown": ["Other", "Unknown", "Pending"],
24+
}
25+
4626
# extracted from https://www.ice.gov/doclib/detention/FY25_detentionStats08292025.xlsx 2025-09-07
27+
# and https://vera-institute.files.svdcdn.com/production/downloads/dashboard_appendix.pdf 2025-09-23
4728
ice_facility_types = {
4829
"BOP": {
4930
"expanded_name": "Federal Bureau of Prisons",
5031
"description": "A facility operated by the Federal Bureau of Prisons",
5132
},
33+
"CDF": {
34+
"expanded_name": "Contract Detention Facility",
35+
"description": "Name derived from listing at https://www.vera.org/ice-detention-trends",
36+
},
5237
"DIGSA": {
5338
"expanded_name": "Dedicated Intergovernmental Service Agreement",
5439
"description": "A publicly-owned facility operated by state/local government(s), or private contractors, in which ICE contracts to use all bed space via a Dedicated Intergovernmental Service Agreement; or facilities used by ICE pursuant to Intergovernmental Service Agreements, which house only ICE detainees – typically these are operated by private contractors pursuant to their agreements with local governments.",
5540
},
5641
"DOD": {
57-
"expanded_name": "Department of Defense",
58-
"description": "Military facility",
42+
"expanded_name": "Department of Defence",
43+
"description": "Department of Defence facilities - Often Army bases",
44+
},
45+
"FAMILY": {
46+
"expanded_name": "Family",
47+
"description": "A facility in which families are able to remain together while awaiting their proceedings",
48+
},
49+
"Family": {
50+
"expanded_name": "Family",
51+
"description": "A facility in which families are able to remain together while awaiting their proceedings",
52+
},
53+
"Hospital": {
54+
"expanded_name": "Hospital",
55+
"description": "A medical facility",
5956
},
6057
"IGSA": {
6158
"expanded_name": "Intergovernmental Service Agreement",
6259
"description": "A publicly-owned facility operated by state/local government(s), or private contractors, in which ICE contracts for bed space via an Intergovernmental Service Agreement; or local jails used by ICE pursuant to Intergovernmental Service Agreements, which house both ICE and non-ICE detainees, typically county prisoners awaiting trial or serving short sentences, but sometimes also USMS prisoners.",
6360
},
61+
"Juvenile": {
62+
"expanded_name": "Juvenile",
63+
"description": "An IGSA facility capable of housing juveniles (separate from adults) for a temporary period of time",
64+
},
65+
"Other": {
66+
"expanded_name": "Other",
67+
"description": "Facilities including but not limited to transportation-related facilities, hotels, and/or other facilities",
68+
},
69+
"Unknown": {
70+
"expanded_name": "Unknown",
71+
"description": "A facility whose type could not be identified",
72+
},
6473
"SPC": {
6574
"expanded_name": "Service Processing Center",
6675
"description": "A facility owned by the government and staffed by a combination of federal and contract employees.",
@@ -82,10 +91,6 @@
8291
"expanded_name": "United States Marshals Service Contract Detention Facility",
8392
"description": "Name derived from listing at https://www.vera.org/ice-detention-trends",
8493
},
85-
"CDF": {
86-
"expanded_name": "Contract Detention Facility",
87-
"description": "Name derived from listing at https://www.vera.org/ice-detention-trends",
88-
},
8994
"Staging": {
9095
"description": "Some facilities in the ICE spreadsheet are marked 'Staging'. Hard to determine why.",
9196
"expanded_name": "Staging",
@@ -129,6 +134,7 @@
129134
from .utils import ( # noqa: E402
130135
get_ice_scrape_pages, # noqa: F401
131136
repair_locality, # noqa: F401
137+
repair_name, # noqa: F401
132138
repair_street, # noqa: F401
133139
repair_zip, # noqa: F401
134140
special_facilities, # noqa: F401
@@ -140,5 +146,6 @@
140146
merge_field_offices, # noqa: F401
141147
scrape_field_offices, # noqa: F401
142148
)
149+
from .vera_data import collect_vera_facility_data # noqa: F401,E402
143150
from .custom_facilities import insert_additional_facilities # noqa: F401,E402
144151
from .general import facilities_scrape_wrapper # noqa: F401,E402

ice_scrapers/custom_facilities.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"""
44
Handle manually discovered/managed facilities
55
"""
6-
custom_facilities = {
6+
custom_facilities: dict = {
77
"2309 North Highway 83,McCook,NE,69001": {
88
"_repaired_record": False,
99
"address": {

0 commit comments

Comments
 (0)