Sliding sync: don't fetch room summary for named rooms. (#17683)

For rooms with a name we can skip fetching a full room summary, as we
don't need to calculate heroes, and instead just fetch the room counts
directly.

This also changes things to not return counts and heroes for non-joined
rooms. For left/banned rooms we were returning zero values anyway, and
for invite/knock rooms we don't really want to leak such information
(even if some of is included in the stripped state).
This commit is contained in:
Erik Johnston 2024-09-11 13:16:57 +01:00 committed by Erik Johnston
parent b732d13d4c
commit 76f7c91e44
7 changed files with 121 additions and 76 deletions

1
changelog.d/17683.misc Normal file
View file

@ -0,0 +1 @@
Speed up sliding sync by reducing amount of data pulled out of the database for large rooms.

View file

@ -784,32 +784,10 @@ class SlidingSyncHandler:
): ):
avatar_changed = True avatar_changed = True
# We only need the room summary for calculating heroes, however if we do
# fetch it then we can use it to calculate `joined_count` and
# `invited_count`.
room_membership_summary: Optional[Mapping[str, MemberSummary]] = None room_membership_summary: Optional[Mapping[str, MemberSummary]] = None
empty_membership_summary = MemberSummary([], 0)
# We need the room summary for:
# - Always for initial syncs (or the first time we send down the room)
# - When the room has no name, we need `heroes`
# - When the membership has changed so we need to give updated `heroes` and
# `joined_count`/`invited_count`.
#
# Ideally, instead of just looking at `name_changed`, we'd check if the room
# name is not set but this is a good enough approximation that saves us from
# having to pull out the full event. This just means, we're generating the
# summary whenever the room name changes instead of only when it changes to
# `None`.
if initial or name_changed or membership_changed:
# We can't trace the function directly because it's cached and the `@cached`
# decorator doesn't mix with `@trace` yet.
with start_active_span("get_room_summary"):
if room_membership_for_user_at_to_token.membership in (
Membership.LEAVE,
Membership.BAN,
):
# TODO: Figure out how to get the membership summary for left/banned rooms
room_membership_summary = {}
else:
room_membership_summary = await self.store.get_room_summary(room_id)
# TODO: Reverse/rewind back to the `to_token`
# `heroes` are required if the room name is not set. # `heroes` are required if the room name is not set.
# #
@ -828,11 +806,45 @@ class SlidingSyncHandler:
# get them on initial syncs (or the first time we send down the room) or if the # get them on initial syncs (or the first time we send down the room) or if the
# membership has changed which may change the heroes. # membership has changed which may change the heroes.
if name_event_id is None and (initial or (not initial and membership_changed)): if name_event_id is None and (initial or (not initial and membership_changed)):
assert room_membership_summary is not None # We need the room summary to extract the heroes from
if room_membership_for_user_at_to_token.membership != Membership.JOIN:
# TODO: Figure out how to get the membership summary for left/banned rooms
# For invite/knock rooms we don't include the information.
room_membership_summary = {}
else:
room_membership_summary = await self.store.get_room_summary(room_id)
# TODO: Reverse/rewind back to the `to_token`
hero_user_ids = extract_heroes_from_room_summary( hero_user_ids = extract_heroes_from_room_summary(
room_membership_summary, me=user.to_string() room_membership_summary, me=user.to_string()
) )
# Fetch the membership counts for rooms we're joined to.
#
# Similarly to other metadata, we only need to calculate the member
# counts if this is an initial sync or the memberships have changed.
joined_count: Optional[int] = None
invited_count: Optional[int] = None
if (
initial or membership_changed
) and room_membership_for_user_at_to_token.membership == Membership.JOIN:
# If we have the room summary (because we calculated heroes above)
# then we can simply pull the counts from there.
if room_membership_summary is not None:
empty_membership_summary = MemberSummary([], 0)
joined_count = room_membership_summary.get(
Membership.JOIN, empty_membership_summary
).count
invited_count = room_membership_summary.get(
Membership.INVITE, empty_membership_summary
).count
else:
member_counts = await self.store.get_member_counts(room_id)
joined_count = member_counts.get(Membership.JOIN, 0)
invited_count = member_counts.get(Membership.INVITE, 0)
# Fetch the `required_state` for the room # Fetch the `required_state` for the room
# #
# No `required_state` for invite/knock rooms (just `stripped_state`) # No `required_state` for invite/knock rooms (just `stripped_state`)
@ -1090,20 +1102,6 @@ class SlidingSyncHandler:
set_tag(SynapseTags.RESULT_PREFIX + "initial", initial) set_tag(SynapseTags.RESULT_PREFIX + "initial", initial)
joined_count: Optional[int] = None
if initial or membership_changed:
assert room_membership_summary is not None
joined_count = room_membership_summary.get(
Membership.JOIN, empty_membership_summary
).count
invited_count: Optional[int] = None
if initial or membership_changed:
assert room_membership_summary is not None
invited_count = room_membership_summary.get(
Membership.INVITE, empty_membership_summary
).count
return SlidingSyncResult.RoomResult( return SlidingSyncResult.RoomResult(
name=room_name, name=room_name,
avatar=room_avatar, avatar=room_avatar,

View file

@ -112,6 +112,7 @@ class SQLBaseStore(metaclass=ABCMeta):
self._attempt_to_invalidate_cache( self._attempt_to_invalidate_cache(
"get_number_joined_users_in_room", (room_id,) "get_number_joined_users_in_room", (room_id,)
) )
self._attempt_to_invalidate_cache("get_member_counts", (room_id,))
self._attempt_to_invalidate_cache("get_local_users_in_room", (room_id,)) self._attempt_to_invalidate_cache("get_local_users_in_room", (room_id,))
# There's no easy way of invalidating this cache for just the users # There's no easy way of invalidating this cache for just the users
@ -153,6 +154,7 @@ class SQLBaseStore(metaclass=ABCMeta):
self._attempt_to_invalidate_cache("get_current_hosts_in_room", (room_id,)) self._attempt_to_invalidate_cache("get_current_hosts_in_room", (room_id,))
self._attempt_to_invalidate_cache("get_users_in_room_with_profiles", (room_id,)) self._attempt_to_invalidate_cache("get_users_in_room_with_profiles", (room_id,))
self._attempt_to_invalidate_cache("get_number_joined_users_in_room", (room_id,)) self._attempt_to_invalidate_cache("get_number_joined_users_in_room", (room_id,))
self._attempt_to_invalidate_cache("get_member_counts", (room_id,))
self._attempt_to_invalidate_cache("get_local_users_in_room", (room_id,)) self._attempt_to_invalidate_cache("get_local_users_in_room", (room_id,))
self._attempt_to_invalidate_cache("does_pair_of_users_share_a_room", None) self._attempt_to_invalidate_cache("does_pair_of_users_share_a_room", None)
self._attempt_to_invalidate_cache("get_user_in_room_with_profile", None) self._attempt_to_invalidate_cache("get_user_in_room_with_profile", None)

View file

@ -312,18 +312,10 @@ class RoomMemberWorkerStore(EventsWorkerStore, CacheInvalidationWorkerStore):
# We do this all in one transaction to keep the cache small. # We do this all in one transaction to keep the cache small.
# FIXME: get rid of this when we have room_stats # FIXME: get rid of this when we have room_stats
# Note, rejected events will have a null membership field, so counts = self._get_member_counts_txn(txn, room_id)
# we we manually filter them out.
sql = """
SELECT count(*), membership FROM current_state_events
WHERE type = 'm.room.member' AND room_id = ?
AND membership IS NOT NULL
GROUP BY membership
"""
txn.execute(sql, (room_id,))
res: Dict[str, MemberSummary] = {} res: Dict[str, MemberSummary] = {}
for count, membership in txn: for membership, count in counts.items():
res.setdefault(membership, MemberSummary([], count)) res.setdefault(membership, MemberSummary([], count))
# Order by membership (joins -> invites -> leave (former insiders) -> # Order by membership (joins -> invites -> leave (former insiders) ->
@ -369,6 +361,31 @@ class RoomMemberWorkerStore(EventsWorkerStore, CacheInvalidationWorkerStore):
"get_room_summary", _get_room_summary_txn "get_room_summary", _get_room_summary_txn
) )
@cached()
async def get_member_counts(self, room_id: str) -> Mapping[str, int]:
"""Get a mapping of number of users by membership"""
return await self.db_pool.runInteraction(
"get_member_counts", self._get_member_counts_txn, room_id
)
def _get_member_counts_txn(
self, txn: LoggingTransaction, room_id: str
) -> Dict[str, int]:
"""Get a mapping of number of users by membership"""
# Note, rejected events will have a null membership field, so
# we we manually filter them out.
sql = """
SELECT count(*), membership FROM current_state_events
WHERE type = 'm.room.member' AND room_id = ?
AND membership IS NOT NULL
GROUP BY membership
"""
txn.execute(sql, (room_id,))
return {membership: count for count, membership in txn}
@cached() @cached()
async def get_number_joined_users_in_room(self, room_id: str) -> int: async def get_number_joined_users_in_room(self, room_id: str) -> int:
return await self.db_pool.simple_select_one_onecol( return await self.db_pool.simple_select_one_onecol(

View file

@ -736,6 +736,7 @@ class MainStateBackgroundUpdateStore(RoomMemberWorkerStore):
CURRENT_STATE_INDEX_UPDATE_NAME = "current_state_members_idx" CURRENT_STATE_INDEX_UPDATE_NAME = "current_state_members_idx"
EVENT_STATE_GROUP_INDEX_UPDATE_NAME = "event_to_state_groups_sg_index" EVENT_STATE_GROUP_INDEX_UPDATE_NAME = "event_to_state_groups_sg_index"
DELETE_CURRENT_STATE_UPDATE_NAME = "delete_old_current_state_events" DELETE_CURRENT_STATE_UPDATE_NAME = "delete_old_current_state_events"
MEMBERS_CURRENT_STATE_UPDATE_NAME = "current_state_events_members_room_index"
def __init__( def __init__(
self, self,
@ -764,6 +765,13 @@ class MainStateBackgroundUpdateStore(RoomMemberWorkerStore):
self.DELETE_CURRENT_STATE_UPDATE_NAME, self.DELETE_CURRENT_STATE_UPDATE_NAME,
self._background_remove_left_rooms, self._background_remove_left_rooms,
) )
self.db_pool.updates.register_background_index_update(
self.MEMBERS_CURRENT_STATE_UPDATE_NAME,
index_name="current_state_events_members_room_index",
table="current_state_events",
columns=["room_id", "membership"],
where_clause="type='m.room.member'",
)
async def _background_remove_left_rooms( async def _background_remove_left_rooms(
self, progress: JsonDict, batch_size: int self, progress: JsonDict, batch_size: int

View file

@ -0,0 +1,19 @@
--
-- This file is licensed under the Affero General Public License (AGPL) version 3.
--
-- Copyright (C) 2024 New Vector, Ltd
--
-- This program is free software: you can redistribute it and/or modify
-- it under the terms of the GNU Affero General Public License as
-- published by the Free Software Foundation, either version 3 of the
-- License, or (at your option) any later version.
--
-- See the GNU Affero General Public License for more details:
-- <https://www.gnu.org/licenses/agpl-3.0.html>.
-- Add a background updates to add a new index:
-- `current_state_events(room_id, membership) WHERE type = 'm.room.member'
-- This makes counting membership in rooms (for syncs) much faster
INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
(8701, 'current_state_events_members_room_index', '{}');

View file

@ -371,14 +371,17 @@ class SlidingSyncRoomsMetaTestCase(SlidingSyncBase):
"mxc://UPDATED_DUMMY_MEDIA_ID", "mxc://UPDATED_DUMMY_MEDIA_ID",
response_body["rooms"][room_id1], response_body["rooms"][room_id1],
) )
self.assertEqual(
response_body["rooms"][room_id1]["joined_count"], # We don't give extra room information to invitees
1, self.assertNotIn(
"joined_count",
response_body["rooms"][room_id1],
) )
self.assertEqual( self.assertNotIn(
response_body["rooms"][room_id1]["invited_count"], "invited_count",
1, response_body["rooms"][room_id1],
) )
self.assertIsNone( self.assertIsNone(
response_body["rooms"][room_id1].get("is_dm"), response_body["rooms"][room_id1].get("is_dm"),
) )
@ -450,15 +453,16 @@ class SlidingSyncRoomsMetaTestCase(SlidingSyncBase):
"mxc://DUMMY_MEDIA_ID", "mxc://DUMMY_MEDIA_ID",
response_body["rooms"][room_id1], response_body["rooms"][room_id1],
) )
self.assertEqual(
response_body["rooms"][room_id1]["joined_count"], # FIXME: We possibly want to return joined and invited counts for rooms
# FIXME: The actual number should be "1" (user2) but we currently don't # you're banned form
# support this for rooms where the user has left/been banned. self.assertNotIn(
0, "joined_count",
response_body["rooms"][room_id1],
) )
self.assertEqual( self.assertNotIn(
response_body["rooms"][room_id1]["invited_count"], "invited_count",
0, response_body["rooms"][room_id1],
) )
self.assertIsNone( self.assertIsNone(
response_body["rooms"][room_id1].get("is_dm"), response_body["rooms"][room_id1].get("is_dm"),
@ -692,19 +696,15 @@ class SlidingSyncRoomsMetaTestCase(SlidingSyncBase):
[], [],
) )
self.assertEqual( # FIXME: We possibly want to return joined and invited counts for rooms
response_body["rooms"][room_id1]["joined_count"], # you're banned form
# FIXME: The actual number should be "1" (user2) but we currently don't self.assertNotIn(
# support this for rooms where the user has left/been banned. "joined_count",
0, response_body["rooms"][room_id1],
) )
self.assertEqual( self.assertNotIn(
response_body["rooms"][room_id1]["invited_count"], "invited_count",
# We shouldn't see user5 since they were invited after user1 was banned. response_body["rooms"][room_id1],
#
# FIXME: The actual number should be "1" (user3) but we currently don't
# support this for rooms where the user has left/been banned.
0,
) )
def test_rooms_meta_heroes_incremental_sync_no_change(self) -> None: def test_rooms_meta_heroes_incremental_sync_no_change(self) -> None: