Skip to content

Commit 02d307b

Browse files
committed
handle feeds with 1 agency & routes.agency_id = null πŸ›βœ…
fixes #45
1 parent 9ec03e3 commit 02d307b

File tree

14 files changed

+87
-10
lines changed

14 files changed

+87
-10
lines changed

β€Žcli.jsβ€Ž

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,9 +165,9 @@ pipeline(
165165
if (!err) return;
166166
if (err instanceof DataError) {
167167
console.error(String(err))
168-
return;
168+
} else if (err.code !== 'EPIPE') {
169+
console.error(err)
169170
}
170-
if (err.code !== 'EPIPE') console.error(err)
171171
process.exit(1)
172172
}
173173
)

β€Žlib/data-error.jsβ€Ž

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,7 @@ class DataError extends Error {
55
super(`${fileName}: ${message}`)
66
this.fileName = fileName
77
const expl = Array.isArray(explanation)
8-
// todo [breaking]: indent with \t
9-
? `\n ${explanation.join('\n\t')}`
8+
? `\n ${explanation.join('\n ')}`
109
: ''
1110
const asString = `${fileName}: ${message}${expl}`
1211
this.toString = () => asString

β€Žlib/routes.jsβ€Ž

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
'use strict'
22

3+
const DataError = require('./data-error')
4+
35
// Google's "Extended GTFS Route Types"
46
// https://developers.google.com/transit/gtfs/reference/extended-route-types
57
const googleExtendedRouteTypes = [
@@ -288,10 +290,27 @@ COPY "${opt.schema}".routes (
288290
`
289291
}
290292

291-
const formatRoutesRow = (r) => {
293+
const formatRoutesRow = (r, opt, workingState) => {
294+
const agency_id = r.agency_id || null
295+
if (agency_id === null) {
296+
// The GTFS spec allows routes.agency_id to be empty/null if there is exactly one agency in the feed.
297+
// It seems that GTFS has allowed this at least since 2016:
298+
// https://github.com/google/transit/blame/217e9bf/gtfs/spec/en/reference.md#L544-L554
299+
if (workingState.nrOfRowsByName.get('agency') !== 1) {
300+
// todo: throw special error indicating an error in the input data
301+
throw new DataError(
302+
'routes',
303+
'agency_id must not be empty/null',
304+
[
305+
'The GTFS spec allows routes.agency_id to be empty/null only if there is exactly one agency in the feed.'
306+
],
307+
)
308+
}
309+
}
310+
292311
return [
293312
r.route_id || null,
294-
r.agency_id || null,
313+
agency_id,
295314
r.route_short_name || null,
296315
r.route_long_name || null,
297316
r.route_desc || null,

β€Žlib/stop_times.jsβ€Ž

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ $$ LANGUAGE SQL IMMUTABLE;
161161
CREATE OR REPLACE VIEW "${opt.schema}".arrivals_departures AS
162162
WITH stop_times_based AS NOT MATERIALIZED (
163163
SELECT
164-
routes.agency_id,
164+
agency.agency_id,
165165
trips.route_id,
166166
route_short_name,
167167
route_long_name,
@@ -212,7 +212,14 @@ WITH stop_times_based AS NOT MATERIALIZED (
212212
LEFT JOIN "${opt.schema}".stops stations ON stops.parent_station = stations.stop_id
213213
JOIN "${opt.schema}".trips ON s.trip_id = trips.trip_id
214214
JOIN "${opt.schema}".routes ON trips.route_id = routes.route_id
215-
LEFT JOIN "${opt.schema}".agency ON routes.agency_id = agency.agency_id
215+
LEFT JOIN "${opt.schema}".agency ON (
216+
-- The GTFS spec allows routes.agency_id to be NULL if there is exactly one agency in the feed.
217+
-- Note: We implicitly rely on other parts of the code base to validate that agency has just one row!
218+
-- It seems that GTFS has allowed this at least since 2016:
219+
-- https://github.com/google/transit/blame/217e9bf/gtfs/spec/en/reference.md#L544-L554
220+
routes.agency_id IS NULL -- match first (and only) agency
221+
OR routes.agency_id = agency.agency_id -- match by ID
222+
)
216223
JOIN "${opt.schema}".service_days ON trips.service_id = service_days.service_id
217224
)
218225
-- todo: this slows down slightly
@@ -422,7 +429,14 @@ WITH stop_times_based AS NOT MATERIALIZED (
422429
to_stations.stop_timezone as to_station_tz
423430
FROM "${opt.schema}".trips
424431
LEFT JOIN "${opt.schema}".routes ON trips.route_id = routes.route_id
425-
LEFT JOIN "${opt.schema}".agency ON routes.agency_id = agency.agency_id
432+
LEFT JOIN "${opt.schema}".agency ON (
433+
-- The GTFS spec allows routes.agency_id to be NULL if there is exactly one agency in the feed.
434+
-- Note: We implicitly rely on other parts of the code base to validate that agency has just one row!
435+
-- It seems that GTFS has allowed this at least since 2016:
436+
-- https://github.com/google/transit/blame/217e9bf/gtfs/spec/en/reference.md#L544-L554
437+
routes.agency_id IS NULL -- match first (and only) agency
438+
OR routes.agency_id = agency.agency_id -- match by ID
439+
)
426440
LEFT JOIN "${opt.schema}".stop_times ON trips.trip_id = stop_times.trip_id
427441
LEFT JOIN "${opt.schema}".stops from_stops ON stop_times.stop_id = from_stops.stop_id
428442
LEFT JOIN "${opt.schema}".stops from_stations ON from_stops.parent_station = from_stations.stop_id

β€Žtest/calendar-dates-only.shβ€Ž

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,14 @@ if [[ "$arrN" != "1563629400" ]]; then
3434
echo "invalid 2nd t_arrival: $arrN" 1>&2
3535
exit 1
3636
fi
37+
38+
agency_id_null=$(cat << EOF
39+
select count(*)
40+
from arrivals_departures
41+
where agency_id IS NULL
42+
EOF)
43+
agency_id_null_count="$(psql --csv -t -c "$agency_id_null")"
44+
if [[ "$agency_id_null_count" != "0" ]]; then
45+
echo ">0 rows with agency_id = null" 1>&2
46+
exit 1
47+
fi
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
route_id,agency_id,route_short_name,route_long_name,route_type,route_desc,route_url,route_color,route_text_color,route_sort_order
2-
A,MTA,Ada,Ada Lovelace Bus Line,3,,,,,
2+
A,,Ada,Ada Lovelace Bus Line,3,,,,,

β€Žtest/index.shβ€Ž

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,5 +13,6 @@ psql -t -c 'SELECT version()'
1313
./postgraphile.sh
1414
./routes-without-agency-id.sh
1515
./stops-without-level-id.sh
16+
./invalid-empty-agency-id.sh
1617

1718
echo -e "\n\nβœ”οΈŽ tests passing"
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#!/bin/bash
2+
3+
set -e
4+
set -o pipefail
5+
cd "$(dirname $0)"
6+
set -x
7+
8+
if ../cli.js -d --trips-without-shape-id -s -- \
9+
invalid-empty-agency-id/*.txt >/dev/null; then
10+
echo "import didn't fail" 1>&2
11+
exit 1
12+
else
13+
echo 'import failed βœ”'
14+
fi
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
agency_id,agency_name,agency_url,agency_timezone,agency_lang,agency_phone,agency_fare_url,agency_email
2+
MTA,Minimal Transit Agency,https://mta.example.org/,Europe/Berlin,,,,
3+
another,another transit agency,https://another.example.org/,Asia/Kolkata,,,,
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
service_id,date,exception_type
2+
some-days,20230713,1

0 commit comments

Comments
Β (0)