Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .eslintrc.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
"node_modules"
],
"rules": {
"no-unused-vars": "off",
"no-unused-vars": "warn",
"no-irregular-whitespace": "off"
}
}
2 changes: 1 addition & 1 deletion .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
- name: setup Node
uses: actions/setup-node@v4
with:
node-version: 20.x
node-version: 22.x
- run: npm install

- name: build binaries
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/smoke-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ jobs:
- name: setup Node
uses: actions/setup-node@v1
with:
node-version: '20.x'
node-version: '22.x'
- name: install sponge (moreutils)
run: sudo apt install -y moreutils

Expand Down
53 changes: 3 additions & 50 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,8 @@ jobs:
strategy:
matrix:
node-version:
- '20.x'
- '22.x'
- '24.x'
postgis-docker-tag:
- '14-3.5-alpine'
- '15-3.5-alpine'
- '16-3.5-alpine'
- '17-3.5-alpine'

steps:
- name: checkout
Expand All @@ -33,55 +27,14 @@ jobs:
uses: actions/setup-node@v4
with:
node-version: ${{ matrix.node-version }}
- name: install sponge (moreutils)
run: sudo apt install -y moreutils

- name: install & start PostgreSQL with PostGIS
# todo: currently, it uses mdillon, which doesn't have PostgreSQL 14
# uses: huaxk/postgis-action@v1
# with:
# postgresql version: '${{ matrix.postgis-docker-tag }}'
# postgresql password: password
# postgresql user: postgres
# postgresql db: postgres
- name: install DuckDB
run: |
docker run -d \
-e POSTGRES_USER=$PGUSER -e POSTGRES_PASSWORD=$PGPASSWORD -e POSTGRES_DB=$PGDATABASE \
-p 5432:5432 postgis/postgis:${{ matrix.postgis-docker-tag }} \
-c timezone=Europe/Berlin
env:
PGUSER: postgres
PGPASSWORD: password
PGDATABASE: postgres

- name: install PostgREST
run: |
set -euo pipefail
set -x
dl_url="$(
curl -fsSL \
-H "User-Agent: $user_agent" \
-H 'Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}' \
'https://api.github.com/repos/PostgREST/postgrest/releases/latest' \
| jq -rc '.assets[] | select(.name | test("linux-static-x86-64")) | .browser_download_url'
)"
wget -nv -U "$user_agent" \
--header='Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}' \
-O /tmp/postgrest.tar.xz \
"$dl_url"
tar -C /usr/local/bin -J -x postgrest </tmp/postgrest.tar.xz
/usr/local/bin/postgrest --version
env:
user_agent: 'public-transport/gtfs-via-postgres CI'
curl -fsSL -U '${{ github.repository }} CI' 'https://install.duckdb.org' | sh
export PATH="$HOME/.duckdb/cli/latest:$PATH"

- run: npm install

- run: npm run lint
- name: npm test
run : npm test
env:
PGHOST: localhost
PGPORT: '5432'
PGUSER: postgres
PGPASSWORD: password
PGDATABASE: postgres
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,13 @@ pnpm-debug.log
/shrinkwrap.yaml

/test/amtrak-gtfs-2021-10-06
/test/*.duckdb

/*.gtfs
/*.gtfs.zip
/*.gtfs.tar.gz
/*.gtfs.tar.zst

/*.duckdb
/*.duckdb.gz
/*.duckdb.br
17 changes: 6 additions & 11 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,24 +1,19 @@
FROM node:alpine
LABEL org.opencontainers.image.title="gtfs-via-postgres"
LABEL org.opencontainers.image.description="Process GTFS using PostgreSQL."
LABEL org.opencontainers.image.title="gtfs-via-duckdb"
LABEL org.opencontainers.image.description="Analyze GTFS datasets using DuckDB."
LABEL org.opencontainers.image.authors="Jannis R <[email protected]>"
LABEL org.opencontainers.image.documentation="https://github.com/public-transport/gtfs-via-postgres"
LABEL org.opencontainers.image.source="https://github.com/public-transport/gtfs-via-postgres"
LABEL org.opencontainers.image.revision="4.0.0"
LABEL org.opencontainers.image.documentation="https://github.com/public-transport/gtfs-via-duckdb"
LABEL org.opencontainers.image.source="https://github.com/public-transport/gtfs-via-duckdb"
LABEL org.opencontainers.image.revision="5.0.0"
LABEL org.opencontainers.image.licenses="(Apache-2.0 AND Prosperity-3.0.0)"

WORKDIR /app

# Both moreutils (providing sponge) and postgresql-client (providing psql) are not required but come in handy for users.
RUN apk add --no-cache \
postgresql-client \
moreutils

ADD package.json /app
RUN npm install --production && npm cache clean --force

ADD . /app
RUN ln -s /app/cli.js /usr/local/bin/gtfs-via-postgres
RUN ln -s /app/cli.js /usr/local/bin/gtfs-via-duckdb

VOLUME /gtfs
WORKDIR /gtfs
Expand Down
2 changes: 1 addition & 1 deletion LICENSE-APACHE
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@

END OF TERMS AND CONDITIONS

Copyright 2020 gtfs-via-postgres contributors
Copyright 2020 gtfs-via-postgres & gtfs-via-duckdb contributors

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down
2 changes: 1 addition & 1 deletion LICENSE-PROSPERITY.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

Contributor: Jannis R

Source Code: https://github.com/public-transport/gtfs-via-postgres
Source Code: https://github.com/public-transport/gtfs-via-duckdb

## Purpose

Expand Down
6 changes: 3 additions & 3 deletions benchmark/arrs_deps_by_route_name_and_time.sql
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
SELECT *
FROM arrivals_departures
WHERE route_short_name = 'S1'
AND t_departure >= '2022-08-09T07:10+02' AND t_departure <= '2022-08-09T07:30+02'
AND date >= dates_filter_min('2022-08-09T07:10+02')
AND date <= dates_filter_max('2022-08-09T07:30+02')
AND t_departure >= '2025-05-27T07:10:00+02' AND t_departure <= '2025-05-27T07:30:00+02'
AND date >= dates_filter_min('2025-05-27T07:10:00+02'::timestamp with time zone)
AND date <= dates_filter_max('2025-05-27T07:30:00+02'::timestamp with time zone)
6 changes: 3 additions & 3 deletions benchmark/arrs_deps_by_station_and_time.sql
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
SELECT *
FROM arrivals_departures
WHERE station_id = 'de:11000:900100001' -- S+U Friedrichstr. (Berlin)
AND t_departure >= '2022-08-09T07:10+02' AND t_departure <= '2022-08-09T07:30+02'
AND date >= dates_filter_min('2022-08-09T07:10+02')
AND date <= dates_filter_max('2022-08-09T07:30+02')
AND t_departure >= '2025-05-27T07:10:00+02' AND t_departure <= '2025-05-27T07:30:00+02'
AND date >= dates_filter_min('2025-05-27T07:10:00+02')
AND date <= dates_filter_max('2025-05-27T07:30:00+02')
6 changes: 6 additions & 0 deletions benchmark/arrs_deps_by_station_and_time_manual.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
SELECT *
FROM arrivals_departures
WHERE station_id = 'de:11000:900100001' -- S+U Friedrichstr. (Berlin)
AND t_departure >= '2025-05-27T07:10:00+02' AND t_departure <= '2025-05-27T07:30:00+02'
AND date >= '2025-05-25'
AND date <= '2025-05-27'
6 changes: 3 additions & 3 deletions benchmark/arrs_deps_by_station_and_time_seq_0.sql
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
SELECT *
FROM arrivals_departures
WHERE station_id = 'de:11000:900100001' -- S+U Friedrichstr. (Berlin)
AND t_departure >= '2022-08-09T07:10+02' AND t_departure <= '2022-08-09T07:30+02'
AND date >= dates_filter_min('2022-08-09T07:10+02')
AND date <= dates_filter_max('2022-08-09T07:30+02')
AND t_departure >= '2025-05-27T07:10:00+02' AND t_departure <= '2025-05-27T07:30:00+02'
AND date >= dates_filter_min('2025-05-27T07:10:00+02')
AND date <= dates_filter_max('2025-05-27T07:30:00+02')
AND stop_sequence = 0
7 changes: 7 additions & 0 deletions benchmark/arrs_deps_by_station_and_time_seq_0_manual.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
SELECT *
FROM arrivals_departures
WHERE station_id = 'de:11000:900100001' -- S+U Friedrichstr. (Berlin)
AND t_departure >= '2025-05-27T07:10:00+02' AND t_departure <= '2025-05-27T07:30:00+02'
AND date >= '2025-05-25'
AND date <= '2025-05-27'
AND stop_sequence = 0
6 changes: 3 additions & 3 deletions benchmark/arrs_deps_by_stop_and_time.sql
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
SELECT *
FROM arrivals_departures
WHERE stop_id = 'de:11000:900100001::4' -- S+U Friedrichstr. (Berlin)
AND t_departure >= '2022-08-09T07:10+02' AND t_departure <= '2022-08-09T07:30+02'
AND date >= dates_filter_min('2022-08-09T07:10+02')
AND date <= dates_filter_max('2022-08-09T07:30+02')
AND t_departure >= '2025-05-27T07:10:00+02' AND t_departure <= '2025-05-27T07:30:00+02'
AND date >= dates_filter_min('2025-05-27T07:10:00+02')
AND date <= dates_filter_max('2025-05-27T07:30:00+02')
6 changes: 6 additions & 0 deletions benchmark/arrs_deps_by_stop_and_time_manual.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
SELECT *
FROM arrivals_departures
WHERE stop_id = 'de:11000:900100001::4' -- S+U Friedrichstr. (Berlin)
AND t_departure >= '2025-05-27T07:10:00+02' AND t_departure <= '2025-05-27T07:30:00+02'
AND date >= '2025-05-25'
AND date <= '2025-05-27'
6 changes: 3 additions & 3 deletions benchmark/arrs_deps_by_time.sql
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
SELECT *
FROM arrivals_departures
WHERE t_departure >= '2022-08-09T07:10+02' AND t_departure <= '2022-08-09T07:30+02'
AND date >= dates_filter_min('2022-08-09T07:10+02'::timestamp with time zone)
AND date <= dates_filter_max('2022-08-09T07:30+02'::timestamp with time zone)
WHERE t_departure >= '2025-05-27T07:10:00+02' AND t_departure <= '2025-05-27T07:30:00+02'
AND "date" >= dates_filter_min('2025-05-27T07:10:00+02'::timestamp with time zone)
AND "date" <= dates_filter_max('2025-05-27T07:30:00+02'::timestamp with time zone)
6 changes: 3 additions & 3 deletions benchmark/arrs_deps_by_time_manual.sql
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
SELECT *
FROM arrivals_departures
WHERE t_departure >= '2022-08-09T07:10+02' AND t_departure <= '2022-08-09T07:30+02'
AND date >= '2022-08-08'
AND date <= '2022-08-09'
WHERE t_departure >= '2025-05-27T07:10:00+02' AND t_departure <= '2025-05-27T07:30:00+02'
AND date >= '2025-05-25'
AND date <= '2025-05-27'
4 changes: 2 additions & 2 deletions benchmark/arrs_deps_by_trip_and_date.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
SELECT *
FROM arrivals_departures
WHERE trip_id = '168977951'
AND date > '2022-08-08' AND date <= '2022-08-09'
WHERE trip_id = '262623609' -- route_id=10144_109, route_short_name=S2
AND date = '2025-05-27'
56 changes: 24 additions & 32 deletions benchmark/as-md.js
Original file line number Diff line number Diff line change
@@ -1,39 +1,31 @@
#!/usr/bin/env node

const {pipeline, Transform} = require('stream')
const csvParser = require('csv-parser')
const {ok} = require('assert')
const {createInterface} = require('node:readline')

let firstRow = true
const linewise = createInterface({
input: process.stdin,
// Note: We use the crlfDelay option to recognize all instances of CR LF as a single line break.
crlfDelay: Infinity,
})

pipeline(
process.stdin,
csvParser(),
new Transform({
objectMode: true,
transform: function (row, _, cb) {
if (firstRow) {
firstRow = false
;(async () => {
let firstRow = true
for await (const line of linewise) {
const row = JSON.parse(line)

const keys = Object.keys(row).filter(key => key !== 'filename')
process.stdout.write(`| ${keys.join(' | ')} |\n`)
process.stdout.write(`| ${keys.map(_ => '-').join(' | ')} |\n`)
}
if (firstRow) {
firstRow = false

const formattedVals = Object.entries(row)
.map(([key, val]) => {
if (key === 'query') return '<pre>' + val.replace(/\n/g, '<br>') + '</pre>'
return val
})
process.stdout.write(`| ${formattedVals.join(' | ')} |\n`)
const keys = Object.keys(row).filter(key => key !== 'filename')
process.stdout.write(`| ${keys.join(' | ')} |\n`)
process.stdout.write(`| ${keys.map(_ => '-').join(' | ')} |\n`)

Check warning on line 21 in benchmark/as-md.js

View workflow job for this annotation

GitHub Actions / run tests (24.x)

'_' is defined but never used

Check warning on line 21 in benchmark/as-md.js

View workflow job for this annotation

GitHub Actions / run tests (22.x)

'_' is defined but never used

Check warning on line 21 in benchmark/as-md.js

View workflow job for this annotation

GitHub Actions / run tests (22.x)

'_' is defined but never used
}

cb()
},
}),
process.stdout,
(err) => {
if (!err) return;
console.error(err)
process.exit(1)
},
)
const formattedVals = Object.entries(row)
.map(([key, val]) => {
if (key === 'query') return '<pre>' + val.replace(/\n/g, '<br>') + '</pre>'
return typeof val === 'number' && !Number.isInteger(val) ? Math.round(val * 100) / 100 : val
})
process.stdout.write(`| ${formattedVals.join(' | ')} |\n`)
}
})()
6 changes: 3 additions & 3 deletions benchmark/connections_by_route_name_and_time.sql
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
SELECT *
FROM connections
WHERE route_short_name = 'S1'
AND t_departure >= '2022-08-09T07:10+02' AND t_departure <= '2022-08-09T07:30+02'
AND date >= dates_filter_min('2022-08-09T07:10+02')
AND date <= dates_filter_max('2022-08-09T07:30+02')
AND t_departure >= '2025-05-27T07:10:00+02' AND t_departure <= '2025-05-27T07:30:00+02'
AND date >= dates_filter_min('2025-05-27T07:10:00+02')
AND date <= dates_filter_max('2025-05-27T07:30:00+02')
8 changes: 4 additions & 4 deletions benchmark/connections_by_station_and_time.sql
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
SELECT *
FROM connections
WHERE from_station_id = 'de:11000:900100001' -- S+U Friedrichstr. (Berlin)
AND t_departure >= '2022-08-09T07:10+02' AND t_departure <= '2022-08-09T07:30+02'
AND date >= dates_filter_min('2022-08-09T07:10+02')
AND date <= dates_filter_max('2022-08-09T07:30+02')
WHERE from_station_id = 'de:11000:900194006' -- S Schöneweide/Sterndamm (Berlin)
AND t_departure >= '2025-05-27T07:10:00+02' AND t_departure <= '2025-05-27T07:30:00+02'
AND date >= dates_filter_min('2025-05-27T07:10:00+02')
AND date <= dates_filter_max('2025-05-27T07:30:00+02')
10 changes: 5 additions & 5 deletions benchmark/connections_by_station_and_time_seq_0.sql
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
SELECT *
FROM connections
WHERE from_station_id = 'de:11000:900100001' -- S+U Friedrichstr. (Berlin)
AND t_departure >= '2022-08-09T07:10+02' AND t_departure <= '2022-08-09T07:30+02'
AND date >= dates_filter_min('2022-08-09T07:10+02')
AND date <= dates_filter_max('2022-08-09T07:30+02')
AND from_stop_sequence = 0
WHERE from_station_id = 'de:11000:900194006' -- S Schöneweide/Sterndamm (Berlin)
AND t_departure >= '2025-05-27T07:10:00+02' AND t_departure <= '2025-05-27T07:30:00+02'
AND date >= dates_filter_min('2025-05-27T07:10:00+02')
AND date <= dates_filter_max('2025-05-27T07:30:00+02')
AND from_stop_sequence_consec = 0
6 changes: 3 additions & 3 deletions benchmark/connections_by_stop_and_time.sql
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
SELECT *
FROM connections
WHERE from_stop_id = 'de:11000:900100001::4' -- S+U Friedrichstr. (Berlin)
AND t_departure >= '2022-08-09T07:10+02' AND t_departure <= '2022-08-09T07:30+02'
AND date >= dates_filter_min('2022-08-09T07:10+02')
AND date <= dates_filter_max('2022-08-09T07:30+02')
AND t_departure >= '2025-05-27T07:10:00+02' AND t_departure <= '2025-05-27T07:30:00+02'
AND date >= dates_filter_min('2025-05-27T07:10:00+02')
AND date <= dates_filter_max('2025-05-27T07:30:00+02')
6 changes: 3 additions & 3 deletions benchmark/connections_by_time.sql
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
SELECT *
FROM connections
WHERE t_departure >= '2022-08-09T07:10+02' AND t_departure <= '2022-08-09T07:30+02'
AND date >= dates_filter_min('2022-08-09T07:10+02'::timestamp with time zone)
AND date <= dates_filter_max('2022-08-09T07:30+02'::timestamp with time zone)
WHERE t_departure >= '2025-05-27T07:10:00+02' AND t_departure <= '2025-05-27T07:30:00+02'
AND date >= dates_filter_min('2025-05-27T07:10:00+02'::timestamp with time zone)
AND date <= dates_filter_max('2025-05-27T07:30:00+02'::timestamp with time zone)
ORDER BY t_departure
LIMIT 100
5 changes: 2 additions & 3 deletions benchmark/connections_by_time_manual.sql
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
SELECT *
FROM connections
WHERE t_departure >= '2022-08-09T07:10+02' AND t_departure <= '2022-08-09T07:30+02'
AND date >= '2022-08-08'
AND date <= '2022-08-09'
WHERE t_departure >= '2025-05-27T07:10:00+02' AND t_departure <= '2025-05-27T07:30:00+02'
AND date >= '2025-05-25' AND date <= '2025-05-27'
ORDER BY t_departure
LIMIT 100
4 changes: 2 additions & 2 deletions benchmark/connections_by_trip_and_date.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
SELECT *
FROM connections
WHERE trip_id = '168977951'
AND date > '2022-08-08' AND date <= '2022-08-09'
WHERE trip_id = '262535123' -- route_id=17452_900 (M4)
AND date >= '2025-05-26' AND date <= '2025-06-01'
Loading
Loading