mirror of
https://github.com/element-hq/synapse.git
synced 2024-11-26 19:47:05 +03:00
Refactor EventPersistenceQueue
(#10145)
some cleanup, pulled out of #10134.
This commit is contained in:
parent
d7808a2dde
commit
1dfdc87b9b
2 changed files with 89 additions and 77 deletions
1
changelog.d/10145.misc
Normal file
1
changelog.d/10145.misc
Normal file
|
@ -0,0 +1 @@
|
||||||
|
Refactor EventPersistenceQueue.
|
|
@ -16,9 +16,23 @@
|
||||||
|
|
||||||
import itertools
|
import itertools
|
||||||
import logging
|
import logging
|
||||||
from collections import deque, namedtuple
|
from collections import deque
|
||||||
from typing import Collection, Dict, Iterable, List, Optional, Set, Tuple
|
from typing import (
|
||||||
|
Awaitable,
|
||||||
|
Callable,
|
||||||
|
Collection,
|
||||||
|
Deque,
|
||||||
|
Dict,
|
||||||
|
Generic,
|
||||||
|
Iterable,
|
||||||
|
List,
|
||||||
|
Optional,
|
||||||
|
Set,
|
||||||
|
Tuple,
|
||||||
|
TypeVar,
|
||||||
|
)
|
||||||
|
|
||||||
|
import attr
|
||||||
from prometheus_client import Counter, Histogram
|
from prometheus_client import Counter, Histogram
|
||||||
|
|
||||||
from twisted.internet import defer
|
from twisted.internet import defer
|
||||||
|
@ -37,7 +51,7 @@ from synapse.types import (
|
||||||
StateMap,
|
StateMap,
|
||||||
get_domain_from_id,
|
get_domain_from_id,
|
||||||
)
|
)
|
||||||
from synapse.util.async_helpers import ObservableDeferred
|
from synapse.util.async_helpers import ObservableDeferred, yieldable_gather_results
|
||||||
from synapse.util.metrics import Measure
|
from synapse.util.metrics import Measure
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
@ -89,25 +103,47 @@ times_pruned_extremities = Counter(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class _EventPeristenceQueue:
|
@attr.s(auto_attribs=True, frozen=True, slots=True)
|
||||||
|
class _EventPersistQueueItem:
|
||||||
|
events_and_contexts: List[Tuple[EventBase, EventContext]]
|
||||||
|
backfilled: bool
|
||||||
|
deferred: ObservableDeferred
|
||||||
|
|
||||||
|
|
||||||
|
_PersistResult = TypeVar("_PersistResult")
|
||||||
|
|
||||||
|
|
||||||
|
class _EventPeristenceQueue(Generic[_PersistResult]):
|
||||||
"""Queues up events so that they can be persisted in bulk with only one
|
"""Queues up events so that they can be persisted in bulk with only one
|
||||||
concurrent transaction per room.
|
concurrent transaction per room.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_EventPersistQueueItem = namedtuple(
|
def __init__(
|
||||||
"_EventPersistQueueItem", ("events_and_contexts", "backfilled", "deferred")
|
self,
|
||||||
)
|
per_item_callback: Callable[
|
||||||
|
[List[Tuple[EventBase, EventContext]], bool],
|
||||||
|
Awaitable[_PersistResult],
|
||||||
|
],
|
||||||
|
):
|
||||||
|
"""Create a new event persistence queue
|
||||||
|
|
||||||
def __init__(self):
|
The per_item_callback will be called for each item added via add_to_queue,
|
||||||
self._event_persist_queues = {}
|
and its result will be returned via the Deferreds returned from add_to_queue.
|
||||||
self._currently_persisting_rooms = set()
|
"""
|
||||||
|
self._event_persist_queues: Dict[str, Deque[_EventPersistQueueItem]] = {}
|
||||||
|
self._currently_persisting_rooms: Set[str] = set()
|
||||||
|
self._per_item_callback = per_item_callback
|
||||||
|
|
||||||
def add_to_queue(self, room_id, events_and_contexts, backfilled):
|
async def add_to_queue(
|
||||||
|
self,
|
||||||
|
room_id: str,
|
||||||
|
events_and_contexts: Iterable[Tuple[EventBase, EventContext]],
|
||||||
|
backfilled: bool,
|
||||||
|
) -> _PersistResult:
|
||||||
"""Add events to the queue, with the given persist_event options.
|
"""Add events to the queue, with the given persist_event options.
|
||||||
|
|
||||||
NB: due to the normal usage pattern of this method, it does *not*
|
If we are not already processing events in this room, starts off a background
|
||||||
follow the synapse logcontext rules, and leaves the logcontext in
|
process to to so, calling the per_item_callback for each item.
|
||||||
place whether or not the returned deferred is ready.
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
room_id (str):
|
room_id (str):
|
||||||
|
@ -115,38 +151,36 @@ class _EventPeristenceQueue:
|
||||||
backfilled (bool):
|
backfilled (bool):
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
defer.Deferred: a deferred which will resolve once the events are
|
the result returned by the `_per_item_callback` passed to
|
||||||
persisted. Runs its callbacks *without* a logcontext. The result
|
`__init__`.
|
||||||
is the same as that returned by the callback passed to
|
|
||||||
`handle_queue`.
|
|
||||||
"""
|
"""
|
||||||
queue = self._event_persist_queues.setdefault(room_id, deque())
|
queue = self._event_persist_queues.setdefault(room_id, deque())
|
||||||
if queue:
|
|
||||||
# if the last item in the queue has the same `backfilled` setting,
|
# if the last item in the queue has the same `backfilled` setting,
|
||||||
# we can just add these new events to that item.
|
# we can just add these new events to that item.
|
||||||
|
if queue and queue[-1].backfilled == backfilled:
|
||||||
end_item = queue[-1]
|
end_item = queue[-1]
|
||||||
if end_item.backfilled == backfilled:
|
else:
|
||||||
end_item.events_and_contexts.extend(events_and_contexts)
|
# need to make a new queue item
|
||||||
return end_item.deferred.observe()
|
deferred = ObservableDeferred(defer.Deferred(), consumeErrors=True)
|
||||||
|
|
||||||
deferred = ObservableDeferred(defer.Deferred(), consumeErrors=True)
|
end_item = _EventPersistQueueItem(
|
||||||
|
events_and_contexts=[],
|
||||||
queue.append(
|
|
||||||
self._EventPersistQueueItem(
|
|
||||||
events_and_contexts=events_and_contexts,
|
|
||||||
backfilled=backfilled,
|
backfilled=backfilled,
|
||||||
deferred=deferred,
|
deferred=deferred,
|
||||||
)
|
)
|
||||||
)
|
queue.append(end_item)
|
||||||
|
|
||||||
return deferred.observe()
|
end_item.events_and_contexts.extend(events_and_contexts)
|
||||||
|
self._handle_queue(room_id)
|
||||||
|
return await make_deferred_yieldable(end_item.deferred.observe())
|
||||||
|
|
||||||
def handle_queue(self, room_id, per_item_callback):
|
def _handle_queue(self, room_id):
|
||||||
"""Attempts to handle the queue for a room if not already being handled.
|
"""Attempts to handle the queue for a room if not already being handled.
|
||||||
|
|
||||||
The given callback will be invoked with for each item in the queue,
|
The queue's callback will be invoked with for each item in the queue,
|
||||||
of type _EventPersistQueueItem. The per_item_callback will continuously
|
of type _EventPersistQueueItem. The per_item_callback will continuously
|
||||||
be called with new items, unless the queue becomnes empty. The return
|
be called with new items, unless the queue becomes empty. The return
|
||||||
value of the function will be given to the deferreds waiting on the item,
|
value of the function will be given to the deferreds waiting on the item,
|
||||||
exceptions will be passed to the deferreds as well.
|
exceptions will be passed to the deferreds as well.
|
||||||
|
|
||||||
|
@ -156,7 +190,6 @@ class _EventPeristenceQueue:
|
||||||
If another callback is currently handling the queue then it will not be
|
If another callback is currently handling the queue then it will not be
|
||||||
invoked.
|
invoked.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if room_id in self._currently_persisting_rooms:
|
if room_id in self._currently_persisting_rooms:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
@ -167,7 +200,9 @@ class _EventPeristenceQueue:
|
||||||
queue = self._get_drainining_queue(room_id)
|
queue = self._get_drainining_queue(room_id)
|
||||||
for item in queue:
|
for item in queue:
|
||||||
try:
|
try:
|
||||||
ret = await per_item_callback(item)
|
ret = await self._per_item_callback(
|
||||||
|
item.events_and_contexts, item.backfilled
|
||||||
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
with PreserveLoggingContext():
|
with PreserveLoggingContext():
|
||||||
item.deferred.errback()
|
item.deferred.errback()
|
||||||
|
@ -214,7 +249,7 @@ class EventsPersistenceStorage:
|
||||||
self._clock = hs.get_clock()
|
self._clock = hs.get_clock()
|
||||||
self._instance_name = hs.get_instance_name()
|
self._instance_name = hs.get_instance_name()
|
||||||
self.is_mine_id = hs.is_mine_id
|
self.is_mine_id = hs.is_mine_id
|
||||||
self._event_persist_queue = _EventPeristenceQueue()
|
self._event_persist_queue = _EventPeristenceQueue(self._persist_event_batch)
|
||||||
self._state_resolution_handler = hs.get_state_resolution_handler()
|
self._state_resolution_handler = hs.get_state_resolution_handler()
|
||||||
|
|
||||||
async def persist_events(
|
async def persist_events(
|
||||||
|
@ -241,26 +276,21 @@ class EventsPersistenceStorage:
|
||||||
for event, ctx in events_and_contexts:
|
for event, ctx in events_and_contexts:
|
||||||
partitioned.setdefault(event.room_id, []).append((event, ctx))
|
partitioned.setdefault(event.room_id, []).append((event, ctx))
|
||||||
|
|
||||||
deferreds = []
|
async def enqueue(item):
|
||||||
for room_id, evs_ctxs in partitioned.items():
|
room_id, evs_ctxs = item
|
||||||
d = self._event_persist_queue.add_to_queue(
|
return await self._event_persist_queue.add_to_queue(
|
||||||
room_id, evs_ctxs, backfilled=backfilled
|
room_id, evs_ctxs, backfilled=backfilled
|
||||||
)
|
)
|
||||||
deferreds.append(d)
|
|
||||||
|
|
||||||
for room_id in partitioned:
|
ret_vals = await yieldable_gather_results(enqueue, partitioned.items())
|
||||||
self._maybe_start_persisting(room_id)
|
|
||||||
|
|
||||||
# Each deferred returns a map from event ID to existing event ID if the
|
# Each call to add_to_queue returns a map from event ID to existing event ID if
|
||||||
# event was deduplicated. (The dict may also include other entries if
|
# the event was deduplicated. (The dict may also include other entries if
|
||||||
# the event was persisted in a batch with other events).
|
# the event was persisted in a batch with other events).
|
||||||
#
|
#
|
||||||
# Since we use `defer.gatherResults` we need to merge the returned list
|
# Since we use `yieldable_gather_results` we need to merge the returned list
|
||||||
# of dicts into one.
|
# of dicts into one.
|
||||||
ret_vals = await make_deferred_yieldable(
|
replaced_events: Dict[str, str] = {}
|
||||||
defer.gatherResults(deferreds, consumeErrors=True)
|
|
||||||
)
|
|
||||||
replaced_events = {}
|
|
||||||
for d in ret_vals:
|
for d in ret_vals:
|
||||||
replaced_events.update(d)
|
replaced_events.update(d)
|
||||||
|
|
||||||
|
@ -287,16 +317,12 @@ class EventsPersistenceStorage:
|
||||||
event if it was deduplicated due to an existing event matching the
|
event if it was deduplicated due to an existing event matching the
|
||||||
transaction ID.
|
transaction ID.
|
||||||
"""
|
"""
|
||||||
deferred = self._event_persist_queue.add_to_queue(
|
# add_to_queue returns a map from event ID to existing event ID if the
|
||||||
event.room_id, [(event, context)], backfilled=backfilled
|
|
||||||
)
|
|
||||||
|
|
||||||
self._maybe_start_persisting(event.room_id)
|
|
||||||
|
|
||||||
# The deferred returns a map from event ID to existing event ID if the
|
|
||||||
# event was deduplicated. (The dict may also include other entries if
|
# event was deduplicated. (The dict may also include other entries if
|
||||||
# the event was persisted in a batch with other events.)
|
# the event was persisted in a batch with other events.)
|
||||||
replaced_events = await make_deferred_yieldable(deferred)
|
replaced_events = await self._event_persist_queue.add_to_queue(
|
||||||
|
event.room_id, [(event, context)], backfilled=backfilled
|
||||||
|
)
|
||||||
replaced_event = replaced_events.get(event.event_id)
|
replaced_event = replaced_events.get(event.event_id)
|
||||||
if replaced_event:
|
if replaced_event:
|
||||||
event = await self.main_store.get_event(replaced_event)
|
event = await self.main_store.get_event(replaced_event)
|
||||||
|
@ -308,29 +334,14 @@ class EventsPersistenceStorage:
|
||||||
pos = PersistedEventPosition(self._instance_name, event_stream_id)
|
pos = PersistedEventPosition(self._instance_name, event_stream_id)
|
||||||
return event, pos, self.main_store.get_room_max_token()
|
return event, pos, self.main_store.get_room_max_token()
|
||||||
|
|
||||||
def _maybe_start_persisting(self, room_id: str):
|
async def _persist_event_batch(
|
||||||
"""Pokes the `_event_persist_queue` to start handling new items in the
|
|
||||||
queue, if not already in progress.
|
|
||||||
|
|
||||||
Causes the deferreds returned by `add_to_queue` to resolve with: a
|
|
||||||
dictionary of event ID to event ID we didn't persist as we already had
|
|
||||||
another event persisted with the same TXN ID.
|
|
||||||
"""
|
|
||||||
|
|
||||||
async def persisting_queue(item):
|
|
||||||
with Measure(self._clock, "persist_events"):
|
|
||||||
return await self._persist_events(
|
|
||||||
item.events_and_contexts, backfilled=item.backfilled
|
|
||||||
)
|
|
||||||
|
|
||||||
self._event_persist_queue.handle_queue(room_id, persisting_queue)
|
|
||||||
|
|
||||||
async def _persist_events(
|
|
||||||
self,
|
self,
|
||||||
events_and_contexts: List[Tuple[EventBase, EventContext]],
|
events_and_contexts: List[Tuple[EventBase, EventContext]],
|
||||||
backfilled: bool = False,
|
backfilled: bool = False,
|
||||||
) -> Dict[str, str]:
|
) -> Dict[str, str]:
|
||||||
"""Calculates the change to current state and forward extremities, and
|
"""Callback for the _event_persist_queue
|
||||||
|
|
||||||
|
Calculates the change to current state and forward extremities, and
|
||||||
persists the given events and with those updates.
|
persists the given events and with those updates.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
|
|
Loading…
Reference in a new issue