Skip to content

Commit 96081a4

Browse files
committed
Improve job stat history retention policy
Previously a delete query was run on the `bgw_job_stat_history` table to remove job history. This could be slow once the job history table became very large. We now use a temporary table to keep recent history and truncate the job history table instead. We also introduce a `job_history_bsearch` function which finds the ids of the jobs that need to be deleted using binary search. This PR is based on experiments done previously (timescale/timescaledb-extras#47).
1 parent 883c70e commit 96081a4

File tree

9 files changed

+926
-10
lines changed

9 files changed

+926
-10
lines changed

.unreleased/pr_8494

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Implements: #8494 Improve job stat history retention policy

sql/job_stat_history_log_retention.sql

Lines changed: 96 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,23 +4,110 @@
44

55
-- A retention policy is set up for the table _timescaledb_internal.job_errors (Error Log Retention Policy [2])
66
-- By default, it will run once a month and and drop rows older than a month.
7+
8+
-- We use binary search on the id column to figure out which rows should be retained from the job history table
9+
-- Doing it this way allows us to use the index on the `id` column, which (empirically) we found is faster than querying on the `execution_finish` column directly without an index
10+
-- This works because `execution_finish` is always ordered.
11+
-- We can consider alternative approaches to simplify this in the future.
12+
CREATE OR REPLACE FUNCTION _timescaledb_functions.job_history_bsearch(search_point TIMESTAMPTZ) RETURNS BIGINT
13+
AS
14+
$$
15+
DECLARE
16+
id_lower BIGINT;
17+
id_upper BIGINT;
18+
id_middle BIGINT DEFAULT 0;
19+
target_tz TIMESTAMPTZ;
20+
BEGIN
21+
22+
SELECT COALESCE(min(id), 0), COALESCE(max(id), 0)
23+
INTO id_lower, id_upper
24+
FROM _timescaledb_internal.bgw_job_stat_history;
25+
26+
IF id_lower = 0 AND id_upper = 0 THEN
27+
RETURN NULL;
28+
END IF;
29+
30+
-- We want the first entry in the table where execution_finish is >= search_point
31+
WHILE id_lower < id_upper LOOP
32+
id_middle := id_lower + (id_upper - id_lower) / 2;
33+
34+
SELECT execution_finish
35+
INTO target_tz
36+
FROM _timescaledb_internal.bgw_job_stat_history
37+
WHERE id = id_middle;
38+
39+
-- If the id_middle is not found, shift to a previous id that's still in the search space
40+
IF NOT FOUND THEN
41+
SELECT execution_finish, id
42+
INTO target_tz, id_middle
43+
FROM _timescaledb_internal.bgw_job_stat_history
44+
WHERE id <= id_middle AND id >= id_lower
45+
ORDER BY id LIMIT 1;
46+
47+
IF NOT FOUND THEN
48+
id_middle := id_lower;
49+
END IF;
50+
51+
END IF;
52+
53+
IF target_tz >= search_point THEN
54+
id_upper := id_middle;
55+
ELSE
56+
id_lower := id_middle + 1;
57+
END IF;
58+
END LOOP;
59+
60+
-- Handle the case where no ids need to be deleted and return NULL instead
61+
SELECT execution_finish
62+
INTO target_tz
63+
FROM _timescaledb_internal.bgw_job_stat_history
64+
WHERE id = id_lower;
65+
66+
IF target_tz < search_point THEN
67+
RETURN NULL;
68+
END IF;
69+
70+
RETURN id_lower;
71+
END
72+
$$
73+
LANGUAGE plpgsql SET search_path TO pg_catalog, pg_temp;
74+
775
CREATE OR REPLACE FUNCTION _timescaledb_functions.policy_job_stat_history_retention(job_id integer, config JSONB) RETURNS integer
876
LANGUAGE PLPGSQL AS
977
$BODY$
1078
DECLARE
11-
drop_after INTERVAL;
1279
numrows INTEGER;
80+
search_point TIMESTAMPTZ;
81+
id_found BIGINT;
1382
BEGIN
14-
drop_after := config->>'drop_after';
83+
PERFORM set_config('lock_timeout', coalesce(config->>'lock_timeout', '5s'), true /* is local */);
1584

16-
DELETE
17-
FROM _timescaledb_internal.bgw_job_stat_history
18-
WHERE execution_finish < (now() - drop_after);
85+
-- We need to prevent concurrent changes on this table when running this retention job
86+
-- We take an AccessExclusiveLock at the start since we TRUNCATE later
87+
LOCK TABLE _timescaledb_internal.bgw_job_stat_history IN ACCESS EXCLUSIVE MODE;
88+
89+
search_point := now() - (config->>'drop_after')::interval;
90+
91+
id_found := _timescaledb_functions.job_history_bsearch(search_point);
92+
93+
IF id_found IS NULL THEN
94+
RETURN 0;
95+
END IF;
96+
97+
CREATE TEMP TABLE __tmp_bgw_job_stat_history ON COMMIT DROP AS
98+
SELECT * FROM _timescaledb_internal.bgw_job_stat_history
99+
WHERE id >= id_found
100+
ORDER BY id;
101+
102+
TRUNCATE _timescaledb_internal.bgw_job_stat_history;
103+
104+
INSERT INTO _timescaledb_internal.bgw_job_stat_history
105+
SELECT * FROM __tmp_bgw_job_stat_history;
19106

20-
GET DIAGNOSTICS numrows = ROW_COUNT;
107+
GET DIAGNOSTICS numrows = ROW_COUNT;
21108

22-
RETURN numrows;
23-
END;
109+
RETURN numrows;
110+
END
24111
$BODY$ SET search_path TO pg_catalog, pg_temp;
25112

26113
CREATE OR REPLACE FUNCTION _timescaledb_functions.policy_job_stat_history_retention_check(config JSONB) RETURNS VOID
@@ -34,7 +121,7 @@ BEGIN
34121
IF config->>'drop_after' IS NULL THEN
35122
RAISE EXCEPTION 'drop_after interval not provided';
36123
END IF ;
37-
END;
124+
END
38125
$BODY$ SET search_path TO pg_catalog, pg_temp;
39126

40127
INSERT INTO _timescaledb_config.bgw_job (

sql/updates/reverse-dev.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,3 +252,4 @@ ALTER EXTENSION timescaledb DROP TABLE _timescaledb_catalog.continuous_aggs_mate
252252

253253
DROP TABLE IF EXISTS _timescaledb_catalog.continuous_aggs_materialization_ranges;
254254

255+
DROP FUNCTION _timescaledb_functions.job_history_bsearch(TIMESTAMPTZ);

0 commit comments

Comments
 (0)