-
-
Couldn't load subscription status.
- Fork 2.1k
Delete messages from device_inbox table when deleting device
#10969
Changes from 20 commits
e2fc0ac
12d20b8
9f46c0f
40ee652
09a0b67
a543a27
1994a2a
6b74a0e
26faaff
d42c17c
f484316
1ebfc7a
38ca3c9
ca72c96
33e366d
53ef462
e6784f2
b3cd342
9a849bd
c4e92f3
c17eb78
8e7f8fb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| Fix a long-standing bug where messages in the `device_inbox` table for deleted devices would persist indefinitely. Contributed by @dklimpel and @JohannesKleine. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,9 +19,10 @@ | |
| from synapse.logging.opentracing import log_kv, set_tag, trace | ||
| from synapse.replication.tcp.streams import ToDeviceStream | ||
| from synapse.storage._base import SQLBaseStore, db_to_json | ||
| from synapse.storage.database import DatabasePool | ||
| from synapse.storage.database import DatabasePool, LoggingTransaction | ||
| from synapse.storage.engines import PostgresEngine | ||
| from synapse.storage.util.id_generators import MultiWriterIdGenerator, StreamIdGenerator | ||
| from synapse.types import JsonDict | ||
| from synapse.util import json_encoder | ||
| from synapse.util.caches.expiringcache import ExpiringCache | ||
| from synapse.util.caches.stream_change_cache import StreamChangeCache | ||
|
|
@@ -552,6 +553,7 @@ def _add_messages_to_local_device_inbox_txn( | |
|
|
||
| class DeviceInboxBackgroundUpdateStore(SQLBaseStore): | ||
| DEVICE_INBOX_STREAM_ID = "device_inbox_stream_drop" | ||
| REMOVE_DELETED_DEVICES = "remove_deleted_devices_from_device_inbox" | ||
|
|
||
| def __init__(self, database: DatabasePool, db_conn, hs): | ||
| super().__init__(database, db_conn, hs) | ||
|
|
@@ -567,6 +569,11 @@ def __init__(self, database: DatabasePool, db_conn, hs): | |
| self.DEVICE_INBOX_STREAM_ID, self._background_drop_index_device_inbox | ||
| ) | ||
|
|
||
| self.db_pool.updates.register_background_update_handler( | ||
| self.REMOVE_DELETED_DEVICES, | ||
| self._remove_deleted_devices_from_device_inbox, | ||
| ) | ||
|
|
||
| async def _background_drop_index_device_inbox(self, progress, batch_size): | ||
| def reindex_txn(conn): | ||
| txn = conn.cursor() | ||
|
|
@@ -579,6 +586,73 @@ def reindex_txn(conn): | |
|
|
||
| return 1 | ||
|
|
||
| async def _remove_deleted_devices_from_device_inbox( | ||
| self, progress: JsonDict, batch_size: int | ||
| ) -> int: | ||
| """A background update that deletes all device_inboxes for deleted devices. | ||
|
|
||
| This should only need to be run once (when users upgrade to v1.45.0) | ||
|
|
||
| Args: | ||
| progress: JsonDict used to store progress of this background update | ||
| batch_size: the maximum number of rows to retrieve in a single select query | ||
|
|
||
| Returns: | ||
| The number of deleted rows | ||
| """ | ||
|
|
||
| def _remove_deleted_devices_from_device_inbox_txn( | ||
| txn: LoggingTransaction, | ||
| ) -> int: | ||
|
|
||
| sql = """ | ||
| WITH get_devices AS | ||
| (SELECT device_inbox.device_id, device_inbox.user_id | ||
| FROM device_inbox | ||
| WHERE (device_inbox.device_id, device_inbox.user_id) | ||
| NOT IN | ||
| (SELECT device_id, user_id FROM devices) | ||
| LIMIT ?) | ||
| SELECT DISTINCT * FROM get_devices; | ||
| """ | ||
|
|
||
| txn.execute(sql, (batch_size,)) | ||
| rows = txn.fetchall() | ||
|
|
||
| num_deleted = 0 | ||
| for row in rows: | ||
| num_deleted += self.db_pool.simple_delete_txn( | ||
| txn, | ||
| "device_inbox", | ||
| {"device_id": row[0], "user_id": row[1]}, | ||
| ) | ||
|
|
||
| if rows: | ||
| self.db_pool.updates._background_update_progress_txn( | ||
| txn, | ||
| self.REMOVE_DELETED_DEVICES, | ||
| {"device_id": rows[-1][0], "user_id": rows[-1][1]}, | ||
| ) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not sure if it necessary to call the update pogress. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you run my explain query on your database to get a better cost comparison? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See: #10969 (comment) |
||
|
|
||
| return num_deleted | ||
|
|
||
| number_deleted = await self.db_pool.runInteraction( | ||
| "_remove_deleted_devices_from_device_inbox", | ||
| _remove_deleted_devices_from_device_inbox_txn, | ||
| ) | ||
|
|
||
| # The task is finished when no more lines are deleted. | ||
| # The `batch_size` only specifies how many devices are cleaned per run. | ||
| # More than one line is deleted in the deviceinbox per run and device, | ||
| # so it is possible that the number of deleted lines is larger | ||
| # than the batch size. | ||
| if not number_deleted: | ||
| await self.db_pool.updates._end_background_update( | ||
| self.REMOVE_DELETED_DEVICES | ||
| ) | ||
|
|
||
| return number_deleted | ||
|
|
||
|
|
||
| class DeviceInboxStore(DeviceInboxWorkerStore, DeviceInboxBackgroundUpdateStore): | ||
| pass | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,23 @@ | ||
| /* Copyright 2021 The Matrix.org Foundation C.I.C | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
|
|
||
| --- Remove messages from the device_inbox table which where sent to an | ||
| --- allready deleted device. | ||
| --- This schould run as background task, it may take a little bit longer | ||
| --- to finish. | ||
dklimpel marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| INSERT INTO background_updates (ordering, update_name, progress_json) VALUES | ||
| (6402, 'remove_deleted_devices_from_device_inbox', '{}'); | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,90 @@ | ||
| # Copyright 2021 The Matrix.org Foundation C.I.C. | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the 'License'); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an 'AS IS' BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
| from synapse.rest import admin | ||
| from synapse.rest.client import devices | ||
|
|
||
| from tests.unittest import HomeserverTestCase | ||
|
|
||
|
|
||
| class DeviceInboxBackgroundUpdateStoreTestCase(HomeserverTestCase): | ||
|
|
||
| servlets = [ | ||
| admin.register_servlets, | ||
| devices.register_servlets, | ||
| ] | ||
|
|
||
| def prepare(self, reactor, clock, hs): | ||
| self.store = hs.get_datastore() | ||
| self.user_id = self.register_user("foo", "pass") | ||
|
|
||
| def test_background_remove_deleted_devices_from_device_inbox(self): | ||
| """Test that the background task to delete old device_inboxes works properly.""" | ||
|
|
||
| # create a valid device | ||
| self.get_success( | ||
| self.store.store_device(self.user_id, "cur_device", "display_name") | ||
| ) | ||
|
|
||
| # Add device_inbox to devices | ||
| self.get_success( | ||
| self.store.db_pool.simple_insert( | ||
| "device_inbox", | ||
| { | ||
| "user_id": self.user_id, | ||
| "device_id": "cur_device", | ||
| "stream_id": 1, | ||
| "message_json": "{}", | ||
| }, | ||
| ) | ||
| ) | ||
| self.get_success( | ||
| self.store.db_pool.simple_insert( | ||
| "device_inbox", | ||
| { | ||
| "user_id": self.user_id, | ||
| "device_id": "old_device", | ||
| "stream_id": 2, | ||
| "message_json": "{}", | ||
| }, | ||
| ) | ||
| ) | ||
|
|
||
| # Insert and run the background update. | ||
| self.get_success( | ||
| self.store.db_pool.simple_insert( | ||
| "background_updates", | ||
| { | ||
| "update_name": "remove_deleted_devices_from_device_inbox", | ||
| "progress_json": "{}", | ||
| }, | ||
| ) | ||
| ) | ||
|
|
||
| # ... and tell the DataStore that it hasn't finished all updates yet | ||
| self.store.db_pool.updates._all_done = False | ||
|
|
||
| self.wait_for_background_updates() | ||
|
|
||
| # Make sure the background task deleted old device_inbox | ||
| res = self.get_success( | ||
| self.store.db_pool.simple_select_onecol( | ||
| table="device_inbox", | ||
| keyvalues={}, | ||
| retcol="device_id", | ||
| desc="get_device_id_from_device_inbox", | ||
| ) | ||
| ) | ||
| self.assertEqual(1, len(res)) | ||
| self.assertEqual(res[0], "cur_device") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
My concern with this approach is that it starts off fast (as it will quickly find rows to delete), but will become increasingly heavyweight until at the end it will do a full sequential scan of
device_inbox.I think a simple fix for that is to order by
stream_idand then track the lateststream_idthat was reached (via theprogressjson):(Note that since
stream_idis not unique we need to use an inclusivestream_id >= ?clause, since we might not have deleted all dead device messages for thestream_idreturned from the previous query)I would also then delete only rows matching the
(user_id, device_id, stream_id)tuple, to avoid problems of deleting a large number of rows all at once due to a single device having lots of device messages (which I think is common).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok. I work on it. This is similar to my first approach in a543a27#diff-3811c5dc2d8444a4922451939dbd64b55056168b6055094c27207cd7ff809552
Also: comment
But we have more informations now.