mirror of
https://github.com/element-hq/synapse.git
synced 2024-11-25 19:15:51 +03:00
Speed up user directory rebuild for users some more... (#15665)
This commit is contained in:
parent
1f55c04cbc
commit
c7e9c1d5ae
2 changed files with 117 additions and 78 deletions
1
changelog.d/15665.misc
Normal file
1
changelog.d/15665.misc
Normal file
|
@ -0,0 +1 @@
|
||||||
|
Speed up rebuilding of the user directory for local users.
|
|
@ -17,6 +17,7 @@ import re
|
||||||
import unicodedata
|
import unicodedata
|
||||||
from typing import (
|
from typing import (
|
||||||
TYPE_CHECKING,
|
TYPE_CHECKING,
|
||||||
|
Collection,
|
||||||
Iterable,
|
Iterable,
|
||||||
List,
|
List,
|
||||||
Mapping,
|
Mapping,
|
||||||
|
@ -45,7 +46,7 @@ from synapse.util.stringutils import non_null_str_or_none
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from synapse.server import HomeServer
|
from synapse.server import HomeServer
|
||||||
|
|
||||||
from synapse.api.constants import EventTypes, HistoryVisibility, JoinRules
|
from synapse.api.constants import EventTypes, HistoryVisibility, JoinRules, UserTypes
|
||||||
from synapse.storage.database import (
|
from synapse.storage.database import (
|
||||||
DatabasePool,
|
DatabasePool,
|
||||||
LoggingDatabaseConnection,
|
LoggingDatabaseConnection,
|
||||||
|
@ -356,8 +357,25 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
|
||||||
Add all local users to the user directory.
|
Add all local users to the user directory.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def _get_next_batch(txn: LoggingTransaction) -> Optional[List[str]]:
|
def _populate_user_directory_process_users_txn(
|
||||||
sql = "SELECT user_id FROM %s LIMIT %s" % (
|
txn: LoggingTransaction,
|
||||||
|
) -> Optional[int]:
|
||||||
|
if self.database_engine.supports_returning:
|
||||||
|
# Note: we use an ORDER BY in the SELECT to force usage of an
|
||||||
|
# index. Otherwise, postgres does a sequential scan that is
|
||||||
|
# surprisingly slow (I think due to the fact it will read/skip
|
||||||
|
# over lots of already deleted rows).
|
||||||
|
sql = f"""
|
||||||
|
DELETE FROM {TEMP_TABLE + "_users"}
|
||||||
|
WHERE user_id IN (
|
||||||
|
SELECT user_id FROM {TEMP_TABLE + "_users"} ORDER BY user_id LIMIT ?
|
||||||
|
)
|
||||||
|
RETURNING user_id
|
||||||
|
"""
|
||||||
|
txn.execute(sql, (batch_size,))
|
||||||
|
user_result = cast(List[Tuple[str]], txn.fetchall())
|
||||||
|
else:
|
||||||
|
sql = "SELECT user_id FROM %s ORDER BY user_id LIMIT %s" % (
|
||||||
TEMP_TABLE + "_users",
|
TEMP_TABLE + "_users",
|
||||||
str(batch_size),
|
str(batch_size),
|
||||||
)
|
)
|
||||||
|
@ -378,34 +396,24 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
|
||||||
assert count_result is not None
|
assert count_result is not None
|
||||||
progress["remaining"] = count_result[0]
|
progress["remaining"] = count_result[0]
|
||||||
|
|
||||||
return users_to_work_on
|
|
||||||
|
|
||||||
users_to_work_on = await self.db_pool.runInteraction(
|
|
||||||
"populate_user_directory_temp_read", _get_next_batch
|
|
||||||
)
|
|
||||||
|
|
||||||
# No more users -- complete the transaction.
|
|
||||||
if not users_to_work_on:
|
if not users_to_work_on:
|
||||||
await self.db_pool.updates._end_background_update(
|
return None
|
||||||
"populate_user_directory_process_users"
|
|
||||||
)
|
|
||||||
return 1
|
|
||||||
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"Processing the next %d users of %d remaining"
|
"Processing the next %d users of %d remaining",
|
||||||
% (len(users_to_work_on), progress["remaining"])
|
len(users_to_work_on),
|
||||||
|
progress["remaining"],
|
||||||
)
|
)
|
||||||
|
|
||||||
# First filter down to users we want to insert into the user directory.
|
# First filter down to users we want to insert into the user directory.
|
||||||
users_to_insert = [
|
users_to_insert = self._filter_local_users_for_dir_txn(
|
||||||
user_id
|
txn, users_to_work_on
|
||||||
for user_id in users_to_work_on
|
)
|
||||||
if await self.should_include_local_user_in_dir(user_id)
|
|
||||||
]
|
|
||||||
|
|
||||||
# Next fetch their profiles. Note that the `user_id` here is the
|
# Next fetch their profiles. Note that the `user_id` here is the
|
||||||
# *localpart*, and that not all users have profiles.
|
# *localpart*, and that not all users have profiles.
|
||||||
profile_rows = await self.db_pool.simple_select_many_batch(
|
profile_rows = self.db_pool.simple_select_many_txn(
|
||||||
|
txn,
|
||||||
table="profiles",
|
table="profiles",
|
||||||
column="user_id",
|
column="user_id",
|
||||||
iterable=[get_localpart_from_id(u) for u in users_to_insert],
|
iterable=[get_localpart_from_id(u) for u in users_to_insert],
|
||||||
|
@ -415,7 +423,6 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
|
||||||
"avatar_url",
|
"avatar_url",
|
||||||
),
|
),
|
||||||
keyvalues={},
|
keyvalues={},
|
||||||
desc="populate_user_directory_process_users_get_profiles",
|
|
||||||
)
|
)
|
||||||
profiles = {
|
profiles = {
|
||||||
f"@{row['user_id']}:{self.server_name}": _UserDirProfile(
|
f"@{row['user_id']}:{self.server_name}": _UserDirProfile(
|
||||||
|
@ -432,31 +439,38 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
|
||||||
]
|
]
|
||||||
|
|
||||||
# Actually insert the users with their profiles into the directory.
|
# Actually insert the users with their profiles into the directory.
|
||||||
await self.db_pool.runInteraction(
|
self._update_profiles_in_user_dir_txn(txn, profiles_to_insert)
|
||||||
"populate_user_directory_process_users_insertion",
|
|
||||||
self._update_profiles_in_user_dir_txn,
|
|
||||||
profiles_to_insert,
|
|
||||||
)
|
|
||||||
|
|
||||||
# We've finished processing the users. Delete it from the table.
|
# We've finished processing the users. Delete it from the table, if
|
||||||
await self.db_pool.simple_delete_many(
|
# we haven't already.
|
||||||
|
if not self.database_engine.supports_returning:
|
||||||
|
self.db_pool.simple_delete_many_txn(
|
||||||
|
txn,
|
||||||
table=TEMP_TABLE + "_users",
|
table=TEMP_TABLE + "_users",
|
||||||
column="user_id",
|
column="user_id",
|
||||||
iterable=users_to_work_on,
|
values=users_to_work_on,
|
||||||
keyvalues={},
|
keyvalues={},
|
||||||
desc="populate_user_directory_process_users_delete",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Update the remaining counter.
|
# Update the remaining counter.
|
||||||
progress["remaining"] -= len(users_to_work_on)
|
progress["remaining"] -= len(users_to_work_on)
|
||||||
await self.db_pool.runInteraction(
|
self.db_pool.updates._background_update_progress_txn(
|
||||||
"populate_user_directory",
|
txn, "populate_user_directory_process_users", progress
|
||||||
self.db_pool.updates._background_update_progress_txn,
|
)
|
||||||
"populate_user_directory_process_users",
|
return len(users_to_work_on)
|
||||||
progress,
|
|
||||||
|
processed_count = await self.db_pool.runInteraction(
|
||||||
|
"populate_user_directory_temp", _populate_user_directory_process_users_txn
|
||||||
)
|
)
|
||||||
|
|
||||||
return len(users_to_work_on)
|
# No more users -- complete the transaction.
|
||||||
|
if not processed_count:
|
||||||
|
await self.db_pool.updates._end_background_update(
|
||||||
|
"populate_user_directory_process_users"
|
||||||
|
)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
return processed_count
|
||||||
|
|
||||||
async def should_include_local_user_in_dir(self, user: str) -> bool:
|
async def should_include_local_user_in_dir(self, user: str) -> bool:
|
||||||
"""Certain classes of local user are omitted from the user directory.
|
"""Certain classes of local user are omitted from the user directory.
|
||||||
|
@ -494,6 +508,30 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
def _filter_local_users_for_dir_txn(
|
||||||
|
self, txn: LoggingTransaction, users: Collection[str]
|
||||||
|
) -> Collection[str]:
|
||||||
|
"""A batched version of `should_include_local_user_in_dir`"""
|
||||||
|
users = [
|
||||||
|
user
|
||||||
|
for user in users
|
||||||
|
if self.get_app_service_by_user_id(user) is None # type: ignore[attr-defined]
|
||||||
|
and not self.get_if_app_services_interested_in_user(user) # type: ignore[attr-defined]
|
||||||
|
]
|
||||||
|
|
||||||
|
rows = self.db_pool.simple_select_many_txn(
|
||||||
|
txn,
|
||||||
|
table="users",
|
||||||
|
column="name",
|
||||||
|
iterable=users,
|
||||||
|
keyvalues={
|
||||||
|
"deactivated": 0,
|
||||||
|
},
|
||||||
|
retcols=("name", "user_type"),
|
||||||
|
)
|
||||||
|
|
||||||
|
return [row["name"] for row in rows if row["user_type"] != UserTypes.SUPPORT]
|
||||||
|
|
||||||
async def is_room_world_readable_or_publicly_joinable(self, room_id: str) -> bool:
|
async def is_room_world_readable_or_publicly_joinable(self, room_id: str) -> bool:
|
||||||
"""Check if the room is either world_readable or publically joinable"""
|
"""Check if the room is either world_readable or publically joinable"""
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue