Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit aed30b7

Browse files
committed
Migrate stream_ordering to a bigint
1 parent 9cfb3dd commit aed30b7

File tree

3 files changed

+144
-1
lines changed

3 files changed

+144
-1
lines changed

synapse/storage/databases/main/events_bg_updates.py

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,23 @@
2929
logger = logging.getLogger(__name__)
3030

3131

32+
_REPLACE_STREAM_ORDRING_SQL_COMMANDS = (
33+
# there should be no leftover rows without a stream_ordering2, but just in case...
34+
"UPDATE events SET stream_ordering2 = stream_ordering WHERE stream_ordering2 IS NULL",
35+
# finally, we can drop the rule and switch the columns
36+
"DROP RULE populate_stream_ordering2 ON events",
37+
"ALTER TABLE events DROP COLUMN stream_ordering",
38+
"ALTER TABLE events RENAME COLUMN stream_ordering2 TO stream_ordering",
39+
)
40+
41+
3242
class _BackgroundUpdates:
3343
EVENT_ORIGIN_SERVER_TS_NAME = "event_origin_server_ts"
3444
EVENT_FIELDS_SENDER_URL_UPDATE_NAME = "event_fields_sender_url"
3545
DELETE_SOFT_FAILED_EXTREMITIES = "delete_soft_failed_extremities"
46+
POPULATE_STREAM_ORDERING2 = "populate_stream_ordering2"
47+
INDEX_STREAM_ORDERING2 = "index_stream_ordering2"
48+
REPLACE_STREAM_ORDERING_COLUMN = "replace_stream_ordering_column"
3649

3750

3851
@attr.s(slots=True, frozen=True)
@@ -142,6 +155,24 @@ def __init__(self, database: DatabasePool, db_conn, hs):
142155
self._purged_chain_cover_index,
143156
)
144157

158+
# bg updates for replacing stream_ordering with a BIGINT
159+
# (these only run on postgres.)
160+
self.db_pool.updates.register_background_update_handler(
161+
_BackgroundUpdates.POPULATE_STREAM_ORDERING2,
162+
self._background_populate_stream_ordering2,
163+
)
164+
self.db_pool.updates.register_background_index_update(
165+
_BackgroundUpdates.INDEX_STREAM_ORDERING2,
166+
index_name="events_stream_ordering",
167+
table="events",
168+
columns=["stream_ordering2"],
169+
unique=True,
170+
)
171+
self.db_pool.updates.register_background_update_handler(
172+
_BackgroundUpdates.REPLACE_STREAM_ORDERING_COLUMN,
173+
self._background_replace_stream_ordering_column,
174+
)
175+
145176
async def _background_reindex_fields_sender(self, progress, batch_size):
146177
target_min_stream_id = progress["target_min_stream_id_inclusive"]
147178
max_stream_id = progress["max_stream_id_exclusive"]
@@ -1012,3 +1043,75 @@ def purged_chain_cover_txn(txn) -> int:
10121043
await self.db_pool.updates._end_background_update("purged_chain_cover")
10131044

10141045
return result
1046+
1047+
async def _background_populate_stream_ordering2(
1048+
self, progress: JsonDict, batch_size: int
1049+
) -> int:
1050+
"""Populate events.stream_ordering2, then replace stream_ordering
1051+
1052+
This is to deal with the fact that stream_ordering was initially created as a
1053+
32-bit integer field.
1054+
"""
1055+
batch_size = max(batch_size, 1)
1056+
1057+
def process(txn: Cursor) -> int:
1058+
# if this is the first pass, find the minimum stream ordering
1059+
last_stream = progress.get("last_stream")
1060+
if last_stream is None:
1061+
txn.execute(
1062+
"""
1063+
SELECT stream_ordering FROM events ORDER BY stream_ordering LIMIT 1
1064+
"""
1065+
)
1066+
rows = txn.fetchall()
1067+
if not rows:
1068+
return 0
1069+
last_stream = rows[0][0] - 1
1070+
1071+
txn.execute(
1072+
"""
1073+
UPDATE events SET stream_ordering2=stream_ordering
1074+
WHERE stream_ordering > ? AND stream_ordering <= ?
1075+
""",
1076+
(last_stream, last_stream + batch_size),
1077+
)
1078+
row_count = txn.rowcount
1079+
1080+
self.db_pool.updates._background_update_progress_txn(
1081+
txn,
1082+
_BackgroundUpdates.POPULATE_STREAM_ORDERING2,
1083+
{"last_stream": last_stream + batch_size},
1084+
)
1085+
return row_count
1086+
1087+
result = await self.db_pool.runInteraction(
1088+
"_background_populate_stream_ordering2", process
1089+
)
1090+
1091+
if result != 0:
1092+
return result
1093+
1094+
await self.db_pool.updates._end_background_update(
1095+
_BackgroundUpdates.POPULATE_STREAM_ORDERING2
1096+
)
1097+
return 0
1098+
1099+
async def _background_replace_stream_ordering_column(
1100+
self, progress: JsonDict, batch_size: int
1101+
) -> int:
1102+
"""Drop the old 'stream_ordering' column and rename 'stream_ordering2' into its place."""
1103+
1104+
def process(txn: Cursor) -> None:
1105+
for sql in _REPLACE_STREAM_ORDRING_SQL_COMMANDS:
1106+
logger.info("completing stream_ordering migration: %s", sql)
1107+
txn.execute(sql)
1108+
1109+
await self.db_pool.runInteraction(
1110+
"_background_replace_stream_ordering_column", process
1111+
)
1112+
1113+
await self.db_pool.updates._end_background_update(
1114+
_BackgroundUpdates.REPLACE_STREAM_ORDERING_COLUMN
1115+
)
1116+
1117+
return 0

synapse/storage/schema/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
SCHEMA_VERSION = 59
15+
SCHEMA_VERSION = 60
1616
"""Represents the expectations made by the codebase about the database schema
1717
1818
This should be incremented whenever the codebase changes its requirements on the
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/* Copyright 2021 The Matrix.org Foundation C.I.C
2+
*
3+
* Licensed under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* Unless required by applicable law or agreed to in writing, software
10+
* distributed under the License is distributed on an "AS IS" BASIS,
11+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
* See the License for the specific language governing permissions and
13+
* limitations under the License.
14+
*/
15+
16+
-- This migration handles the process of changing the type of `stream_ordering` to
17+
-- a BIGINT.
18+
--
19+
-- Note that this is only a problem on postgres as sqlite only has one "integer" type
20+
-- which can cope with values up to 2^63.
21+
22+
-- First add a new column to contain the bigger stream_ordering
23+
ALTER TABLE events ADD COLUMN stream_ordering2 BIGINT;
24+
25+
-- Create a rule which will populate it for new rows.
26+
CREATE OR REPLACE RULE "populate_stream_ordering2" AS
27+
ON INSERT TO events
28+
DO UPDATE events SET stream_ordering2=NEW.stream_ordering WHERE stream_ordering=NEW.stream_ordering;
29+
30+
-- Start a bg process to populate it for old events
31+
INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
32+
(6001, 'populate_stream_ordering2', '{}');
33+
34+
-- ... and another to build an index on it
35+
INSERT INTO background_updates (ordering, update_name, progress_json, depends_on) VALUES
36+
(6001, 'index_stream_ordering2', '{}', 'populate_stream_ordering2');
37+
38+
-- ... and another to do the switcheroo
39+
INSERT INTO background_updates (ordering, update_name, progress_json, depends_on) VALUES
40+
(6001, 'replace_stream_ordering_column', '{}', 'index_stream_ordering2');

0 commit comments

Comments
 (0)