22
22
Iterable ,
23
23
List ,
24
24
Optional ,
25
+ Set ,
25
26
Tuple ,
26
27
overload ,
27
28
)
55
56
from synapse .storage .util .id_generators import MultiWriterIdGenerator , StreamIdGenerator
56
57
from synapse .storage .util .sequence import build_sequence_generator
57
58
from synapse .types import JsonDict , get_domain_from_id
58
- from synapse .util .caches .descriptors import cached
59
+ from synapse .util .caches .descriptors import cached , cachedList
59
60
from synapse .util .caches .lrucache import LruCache
60
61
from synapse .util .iterutils import batch_iter
61
62
from synapse .util .metrics import Measure
@@ -1046,7 +1047,7 @@ async def have_events_in_timeline(self, event_ids):
1046
1047
1047
1048
return {r ["event_id" ] for r in rows }
1048
1049
1049
- async def have_seen_events (self , event_ids ) :
1050
+ async def have_seen_events (self , event_ids : Collection [ str ]) -> Set [ str ] :
1050
1051
"""Given a list of event ids, check if we have already processed them.
1051
1052
1052
1053
Args:
@@ -1055,23 +1056,46 @@ async def have_seen_events(self, event_ids):
1055
1056
Returns:
1056
1057
set[str]: The events we have already seen.
1057
1058
"""
1059
+ res = await self ._have_seen_events_dict (event_ids )
1060
+ return {x for (x , y ) in res .items () if y }
1061
+
1062
+ @cachedList ("have_seen_event" , "event_ids" )
1063
+ async def _have_seen_events_dict (
1064
+ self , event_ids : Collection [str ]
1065
+ ) -> Dict [str , bool ]:
1066
+ """Helper for have_seen_events
1067
+
1068
+ Returns a dict, which is the right format for @cachedList
1069
+ """
1058
1070
# if the event cache contains the event, obviously we've seen it.
1059
- results = {x for x in event_ids if self ._get_event_cache .contains (x )}
1071
+ cache_results = {x for x in event_ids if self ._get_event_cache .contains (x )}
1072
+ results = {x : True for x in cache_results }
1060
1073
1061
1074
def have_seen_events_txn (txn , chunk ):
1075
+ # assume everything in this chunk is not found initially
1076
+ results .update ({x : False for x in chunk })
1077
+
1078
+ # check the db and update the results for any row that is found
1062
1079
sql = "SELECT event_id FROM events as e WHERE "
1063
1080
clause , args = make_in_list_sql_clause (
1064
1081
txn .database_engine , "e.event_id" , chunk
1065
1082
)
1066
1083
txn .execute (sql + clause , args )
1067
- results .update (row [0 ] for row in txn )
1084
+ results .update ({ row [0 ]: True for row in txn } )
1068
1085
1069
- for chunk in batch_iter ((x for x in event_ids if x not in results ), 100 ):
1086
+ for chunk in batch_iter ((x for x in event_ids if x not in cache_results ), 100 ):
1070
1087
await self .db_pool .runInteraction (
1071
1088
"have_seen_events" , have_seen_events_txn , chunk
1072
1089
)
1090
+
1073
1091
return results
1074
1092
1093
+ @cached (max_entries = 100000 )
1094
+ async def have_seen_event (self , event_id ):
1095
+ # this only exists for the benefit of the @cachedList descriptor on
1096
+ # _have_seen_events_dict
1097
+ raise NotImplementedError ()
1098
+
1075
1099
def _get_current_state_event_counts_txn (self , txn , room_id ):
1076
1100
"""
1077
1101
See get_current_state_event_counts.
0 commit comments