Sliding sync: various fixes to background update (#17636)

Follows on from #17512, other fixes include: #17633, #17634, #17635
This commit is contained in:
Erik Johnston 2024-09-01 10:18:45 +01:00 committed by GitHub
parent ca69d0f571
commit d52c17ce01
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 59 additions and 16 deletions

1
changelog.d/17636.misc Normal file
View file

@ -0,0 +1 @@
Pre-populate room data used in experimental [MSC3575](https://github.com/matrix-org/matrix-spec-proposals/pull/3575) Sliding Sync `/sync` endpoint for quick filtering/sorting.

View file

@ -1861,7 +1861,7 @@ class PersistEventsStore:
VALUES ( VALUES (
?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
(SELECT stream_ordering FROM events WHERE event_id = ?), (SELECT stream_ordering FROM events WHERE event_id = ?),
(SELECT instance_name FROM events WHERE event_id = ?) (SELECT COALESCE(instance_name, 'master') FROM events WHERE event_id = ?)
{("," + ", ".join("?" for _ in sliding_sync_snapshot_values)) if sliding_sync_snapshot_values else ""} {("," + ", ".join("?" for _ in sliding_sync_snapshot_values)) if sliding_sync_snapshot_values else ""}
) )
ON CONFLICT (room_id, user_id) ON CONFLICT (room_id, user_id)

View file

@ -41,6 +41,7 @@ from synapse.storage.databases.main.events import (
SlidingSyncMembershipSnapshotSharedInsertValues, SlidingSyncMembershipSnapshotSharedInsertValues,
SlidingSyncStateInsertValues, SlidingSyncStateInsertValues,
) )
from synapse.storage.databases.main.events_worker import DatabaseCorruptionError
from synapse.storage.databases.main.state_deltas import StateDeltasStore from synapse.storage.databases.main.state_deltas import StateDeltasStore
from synapse.storage.databases.main.stream import StreamWorkerStore from synapse.storage.databases.main.stream import StreamWorkerStore
from synapse.storage.types import Cursor from synapse.storage.types import Cursor
@ -1857,6 +1858,7 @@ class EventsBackgroundUpdatesStore(StreamWorkerStore, StateDeltasStore, SQLBaseS
initial_phase = True initial_phase = True
last_room_id = progress.get("last_room_id", "") last_room_id = progress.get("last_room_id", "")
last_user_id = progress.get("last_user_id", "")
last_event_stream_ordering = progress["last_event_stream_ordering"] last_event_stream_ordering = progress["last_event_stream_ordering"]
def _find_memberships_to_update_txn( def _find_memberships_to_update_txn(
@ -1887,11 +1889,11 @@ class EventsBackgroundUpdatesStore(StreamWorkerStore, StateDeltasStore, SQLBaseS
FROM local_current_membership AS c FROM local_current_membership AS c
INNER JOIN events AS e USING (event_id) INNER JOIN events AS e USING (event_id)
LEFT JOIN rooms AS r ON (c.room_id = r.room_id) LEFT JOIN rooms AS r ON (c.room_id = r.room_id)
WHERE c.room_id > ? WHERE (c.room_id, c.user_id) > (?, ?)
ORDER BY c.room_id ASC ORDER BY c.room_id ASC, c.user_id ASC
LIMIT ? LIMIT ?
""", """,
(last_room_id, batch_size), (last_room_id, last_user_id, batch_size),
) )
elif last_event_stream_ordering is not None: elif last_event_stream_ordering is not None:
# It's important to sort by `event_stream_ordering` *ascending* (oldest to # It's important to sort by `event_stream_ordering` *ascending* (oldest to
@ -1993,6 +1995,7 @@ class EventsBackgroundUpdatesStore(StreamWorkerStore, StateDeltasStore, SQLBaseS
WHERE WHERE
room_id = ? room_id = ?
AND m.user_id = ? AND m.user_id = ?
AND (m.membership = ? OR m.membership = ?)
AND e.event_id != ? AND e.event_id != ?
ORDER BY e.topological_ordering DESC ORDER BY e.topological_ordering DESC
LIMIT 1 LIMIT 1
@ -2000,6 +2003,8 @@ class EventsBackgroundUpdatesStore(StreamWorkerStore, StateDeltasStore, SQLBaseS
( (
room_id, room_id,
user_id, user_id,
Membership.INVITE,
Membership.KNOCK,
event_id, event_id,
), ),
) )
@ -2081,9 +2086,17 @@ class EventsBackgroundUpdatesStore(StreamWorkerStore, StateDeltasStore, SQLBaseS
# have `current_state_events` and we should have some current state # have `current_state_events` and we should have some current state
# for each room # for each room
if current_state_ids_map: if current_state_ids_map:
fetched_events = await self.get_events( try:
current_state_ids_map.values() fetched_events = await self.get_events(
) current_state_ids_map.values()
)
except DatabaseCorruptionError as e:
logger.warning(
"Failed to fetch state for room '%s' due to corrupted events. Ignoring. Error: %s",
room_id,
e,
)
continue
current_state_map: StateMap[EventBase] = { current_state_map: StateMap[EventBase] = {
state_key: fetched_events[event_id] state_key: fetched_events[event_id]
@ -2124,7 +2137,7 @@ class EventsBackgroundUpdatesStore(StreamWorkerStore, StateDeltasStore, SQLBaseS
False False
) )
elif membership in (Membership.INVITE, Membership.KNOCK) or ( elif membership in (Membership.INVITE, Membership.KNOCK) or (
membership == Membership.LEAVE and is_outlier membership in (Membership.LEAVE, Membership.BAN) and is_outlier
): ):
invite_or_knock_event_id = membership_event_id invite_or_knock_event_id = membership_event_id
invite_or_knock_membership = membership invite_or_knock_membership = membership
@ -2135,7 +2148,7 @@ class EventsBackgroundUpdatesStore(StreamWorkerStore, StateDeltasStore, SQLBaseS
# us a consistent view of the room state regardless of your # us a consistent view of the room state regardless of your
# membership (i.e. the room shouldn't disappear if your using the # membership (i.e. the room shouldn't disappear if your using the
# `is_encrypted` filter and you leave). # `is_encrypted` filter and you leave).
if membership == Membership.LEAVE and is_outlier: if membership in (Membership.LEAVE, Membership.BAN) and is_outlier:
invite_or_knock_event_id, invite_or_knock_membership = ( invite_or_knock_event_id, invite_or_knock_membership = (
await self.db_pool.runInteraction( await self.db_pool.runInteraction(
"sliding_sync_membership_snapshots_bg_update._find_previous_membership", "sliding_sync_membership_snapshots_bg_update._find_previous_membership",
@ -2182,7 +2195,15 @@ class EventsBackgroundUpdatesStore(StreamWorkerStore, StateDeltasStore, SQLBaseS
await_full_state=False, await_full_state=False,
) )
fetched_events = await self.get_events(state_ids_map.values()) try:
fetched_events = await self.get_events(state_ids_map.values())
except DatabaseCorruptionError as e:
logger.warning(
"Failed to fetch state for room '%s' due to corrupted events. Ignoring. Error: %s",
room_id,
e,
)
continue
state_map: StateMap[EventBase] = { state_map: StateMap[EventBase] = {
state_key: fetched_events[event_id] state_key: fetched_events[event_id]
@ -2296,7 +2317,7 @@ class EventsBackgroundUpdatesStore(StreamWorkerStore, StateDeltasStore, SQLBaseS
( (
room_id, room_id,
_room_id_from_rooms_table, _room_id_from_rooms_table,
_user_id, user_id,
_sender, _sender,
_membership_event_id, _membership_event_id,
_membership, _membership,
@ -2308,8 +2329,11 @@ class EventsBackgroundUpdatesStore(StreamWorkerStore, StateDeltasStore, SQLBaseS
progress = { progress = {
"initial_phase": initial_phase, "initial_phase": initial_phase,
"last_room_id": room_id, "last_room_id": room_id,
"last_event_stream_ordering": membership_event_stream_ordering, "last_user_id": user_id,
"last_event_stream_ordering": last_event_stream_ordering,
} }
if not initial_phase:
progress["last_event_stream_ordering"] = membership_event_stream_ordering
await self.db_pool.updates._background_update_progress( await self.db_pool.updates._background_update_progress(
_BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BG_UPDATE, _BackgroundUpdates.SLIDING_SYNC_MEMBERSHIP_SNAPSHOTS_BG_UPDATE,

View file

@ -98,6 +98,26 @@ if TYPE_CHECKING:
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class DatabaseCorruptionError(RuntimeError):
"""We found an event in the DB that has a persisted event ID that doesn't
match its computed event ID."""
def __init__(
self, room_id: str, persisted_event_id: str, computed_event_id: str
) -> None:
self.room_id = room_id
self.persisted_event_id = persisted_event_id
self.computed_event_id = computed_event_id
message = (
f"Database corruption: Event {persisted_event_id} in room {room_id} "
f"from the database appears to have been modified (calculated "
f"event id {computed_event_id})"
)
super().__init__(message)
# These values are used in the `enqueue_event` and `_fetch_loop` methods to # These values are used in the `enqueue_event` and `_fetch_loop` methods to
# control how we batch/bulk fetch events from the database. # control how we batch/bulk fetch events from the database.
# The values are plucked out of thing air to make initial sync run faster # The values are plucked out of thing air to make initial sync run faster
@ -1364,10 +1384,8 @@ class EventsWorkerStore(SQLBaseStore):
if original_ev.event_id != event_id: if original_ev.event_id != event_id:
# it's difficult to see what to do here. Pretty much all bets are off # it's difficult to see what to do here. Pretty much all bets are off
# if Synapse cannot rely on the consistency of its database. # if Synapse cannot rely on the consistency of its database.
raise RuntimeError( raise DatabaseCorruptionError(
f"Database corruption: Event {event_id} in room {d['room_id']} " d["room_id"], event_id, original_ev.event_id
f"from the database appears to have been modified (calculated "
f"event id {original_ev.event_id})"
) )
event_map[event_id] = original_ev event_map[event_id] = original_ev