|
88 | 88 | )
|
89 | 89 | from synapse.types.state import StateFilter
|
90 | 90 | from synapse.util.async_helpers import Linearizer, concurrently_execute
|
91 |
| -from synapse.util.iterutils import batch_iter |
| 91 | +from synapse.util.iterutils import batch_iter, partition |
92 | 92 | from synapse.util.retryutils import NotRetryingDestination
|
93 | 93 | from synapse.util.stringutils import shortstr
|
94 | 94 |
|
@@ -865,7 +865,7 @@ async def _process_pulled_events(
|
865 | 865 | [event.event_id for event in events]
|
866 | 866 | )
|
867 | 867 |
|
868 |
| - new_events = [] |
| 868 | + new_events: List[EventBase] = [] |
869 | 869 | for event in events:
|
870 | 870 | event_id = event.event_id
|
871 | 871 |
|
@@ -895,12 +895,66 @@ async def _process_pulled_events(
|
895 | 895 | str(len(new_events)),
|
896 | 896 | )
|
897 | 897 |
|
898 |
| - # We want to sort these by depth so we process them and |
899 |
| - # tell clients about them in order. |
900 |
| - sorted_events = sorted(new_events, key=lambda x: x.depth) |
901 |
| - for ev in sorted_events: |
902 |
| - with nested_logging_context(ev.event_id): |
903 |
| - await self._process_pulled_event(origin, ev, backfilled=backfilled) |
| 898 | + @trace |
| 899 | + async def _process_new_pulled_events(new_events: Collection[EventBase]) -> None: |
| 900 | + # We want to sort these by depth so we process them and tell clients about |
| 901 | + # them in order. It's also more efficient to backfill this way (`depth` |
| 902 | + # ascending) because one backfill event is likely to be the `prev_event` of |
| 903 | + # the next event we're going to process. |
| 904 | + sorted_events = sorted(new_events, key=lambda x: x.depth) |
| 905 | + for ev in sorted_events: |
| 906 | + with nested_logging_context(ev.event_id): |
| 907 | + await self._process_pulled_event(origin, ev, backfilled=backfilled) |
| 908 | + |
| 909 | + # Check if we've already tried to process these events at some point in the |
| 910 | + # past. We aren't concerned with the expontntial backoff here, just whether it |
| 911 | + # has failed to be processed before. |
| 912 | + event_ids_with_failed_pull_attempts = ( |
| 913 | + await self._store.get_event_ids_with_failed_pull_attempts( |
| 914 | + [event.event_id for event in new_events] |
| 915 | + ) |
| 916 | + ) |
| 917 | + |
| 918 | + # We construct the event lists in source order from `/backfill` response because |
| 919 | + # it's a) easiest, but also b) the order in which we process things matters for |
| 920 | + # MSC2716 historical batches because many historical events are all at the same |
| 921 | + # `depth` and we rely on the tenuous sort that the other server gave us and hope |
| 922 | + # they're doing their best. The brittle nature of this ordering for historical |
| 923 | + # messages over federation is one of the reasons why we don't want to continue |
| 924 | + # on MSC2716 until we have online topological ordering. |
| 925 | + events_with_failed_pull_attempts, fresh_events = partition( |
| 926 | + new_events, lambda e: e.event_id in event_ids_with_failed_pull_attempts |
| 927 | + ) |
| 928 | + set_tag( |
| 929 | + SynapseTags.FUNC_ARG_PREFIX + "events_with_failed_pull_attempts", |
| 930 | + str(event_ids_with_failed_pull_attempts), |
| 931 | + ) |
| 932 | + set_tag( |
| 933 | + SynapseTags.RESULT_PREFIX + "events_with_failed_pull_attempts.length", |
| 934 | + str(len(events_with_failed_pull_attempts)), |
| 935 | + ) |
| 936 | + set_tag( |
| 937 | + SynapseTags.FUNC_ARG_PREFIX + "fresh_events", |
| 938 | + str([event.event_id for event in fresh_events]), |
| 939 | + ) |
| 940 | + set_tag( |
| 941 | + SynapseTags.RESULT_PREFIX + "fresh_events.length", |
| 942 | + str(len(fresh_events)), |
| 943 | + ) |
| 944 | + |
| 945 | + # Process previously failed backfill events in the background to not waste |
| 946 | + # time on something that is likely to fail again. |
| 947 | + if len(events_with_failed_pull_attempts) > 0: |
| 948 | + run_as_background_process( |
| 949 | + "_process_new_pulled_events_with_failed_pull_attempts", |
| 950 | + _process_new_pulled_events, |
| 951 | + events_with_failed_pull_attempts, |
| 952 | + ) |
| 953 | + |
| 954 | + # We can optimistically try to process and wait for the event to be fully |
| 955 | + # persisted if we've never tried before. |
| 956 | + if len(fresh_events) > 0: |
| 957 | + await _process_new_pulled_events(fresh_events) |
904 | 958 |
|
905 | 959 | @trace
|
906 | 960 | @tag_args
|
|
0 commit comments