Merge pull request #3112 from matrix-org/dbkr/profile_replication

Option to replicate user profiles to another server
This commit is contained in:
David Baker 2018-04-26 10:22:28 +01:00 committed by GitHub
commit e2adb360eb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 211 additions and 12 deletions

View file

@ -49,6 +49,10 @@ class RegistrationConfig(Config):
self.auto_join_rooms = config.get("auto_join_rooms", []) self.auto_join_rooms = config.get("auto_join_rooms", [])
self.replicate_user_profiles_to = config.get("replicate_user_profiles_to", [])
if not isinstance(self.replicate_user_profiles_to, list):
self.replicate_user_profiles_to = [self.replicate_user_profiles_to, ]
def default_config(self, **kwargs): def default_config(self, **kwargs):
registration_shared_secret = random_string_with_symbols(50) registration_shared_secret = random_string_with_symbols(50)
@ -108,6 +112,12 @@ class RegistrationConfig(Config):
- vector.im - vector.im
- riot.im - riot.im
# If enabled, user IDs, display names and avatar URLs will be replicated
# to this server whenever they change.
# This is an experimental API currently implemented by sydent to support
# cross-homeserver user directories.
# replicate_user_profiles_to: example.com
# Users who register on this homeserver will automatically be joined # Users who register on this homeserver will automatically be joined
# to these rooms # to these rooms
#auto_join_rooms: #auto_join_rooms:

View file

@ -15,12 +15,15 @@
import logging import logging
from twisted.internet import defer from twisted.internet import defer, reactor
from synapse.api.errors import SynapseError, AuthError, CodeMessageException from synapse.api.errors import SynapseError, AuthError, CodeMessageException
from synapse.util.logcontext import run_in_background
from synapse.types import UserID, get_domain_from_id from synapse.types import UserID, get_domain_from_id
from ._base import BaseHandler from ._base import BaseHandler
from signedjson.sign import sign_json
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -28,6 +31,8 @@ class ProfileHandler(BaseHandler):
PROFILE_UPDATE_MS = 60 * 1000 PROFILE_UPDATE_MS = 60 * 1000
PROFILE_UPDATE_EVERY_MS = 24 * 60 * 60 * 1000 PROFILE_UPDATE_EVERY_MS = 24 * 60 * 60 * 1000
PROFILE_REPLICATE_INTERVAL = 2 * 60 * 1000
def __init__(self, hs): def __init__(self, hs):
super(ProfileHandler, self).__init__(hs) super(ProfileHandler, self).__init__(hs)
@ -38,11 +43,82 @@ class ProfileHandler(BaseHandler):
self.user_directory_handler = hs.get_user_directory_handler() self.user_directory_handler = hs.get_user_directory_handler()
self.http_client = hs.get_simple_http_client()
if hs.config.worker_app is None: if hs.config.worker_app is None:
self.clock.looping_call( self.clock.looping_call(
self._update_remote_profile_cache, self.PROFILE_UPDATE_MS, self._update_remote_profile_cache, self.PROFILE_UPDATE_MS,
) )
if len(self.hs.config.replicate_user_profiles_to) > 0:
reactor.callWhenRunning(self._assign_profile_replication_batches)
reactor.callWhenRunning(self._replicate_profiles)
# Add a looping call to replicate_profiles: this handles retries
# if the replication is unsuccessful when the user updated their
# profile.
self.clock.looping_call(
self._replicate_profiles, self.PROFILE_REPLICATE_INTERVAL
)
@defer.inlineCallbacks
def _assign_profile_replication_batches(self):
"""If no profile replication has been done yet, allocate replication batch
numbers to each profile to start the replication process.
"""
logger.info("Assigning profile batch numbers...")
total = 0
while True:
assigned = yield self.store.assign_profile_batch()
total += assigned
if assigned == 0:
break
logger.info("Assigned %d profile batch numbers", total)
@defer.inlineCallbacks
def _replicate_profiles(self):
"""If any profile data has been updated and not pushed to the replication targets,
replicate it.
"""
host_batches = yield self.store.get_replication_hosts()
latest_batch = yield self.store.get_latest_profile_replication_batch_number()
for repl_host in self.hs.config.replicate_user_profiles_to:
if repl_host not in host_batches:
host_batches[repl_host] = -1
try:
for i in xrange(host_batches[repl_host] + 1, latest_batch + 1):
yield self._replicate_host_profile_batch(repl_host, i)
except Exception:
logger.exception(
"Exception while replicating to %s: aborting for now", repl_host,
)
@defer.inlineCallbacks
def _replicate_host_profile_batch(self, host, batchnum):
logger.info("Replicating profile batch %d to %s", batchnum, host)
batch_rows = yield self.store.get_profile_batch(batchnum)
batch = {
UserID(r["user_id"], self.hs.hostname).to_string(): {
"display_name": r["displayname"],
"avatar_url": r["avatar_url"],
} for r in batch_rows
}
url = "https://%s/_matrix/federation/v1/replicate_profiles" % (host,)
body = {
"batchnum": batchnum,
"batch": batch,
"origin_server": self.hs.hostname,
}
signed_body = sign_json(body, self.hs.hostname, self.hs.config.signing_key[0])
try:
yield self.http_client.post_json_get_json(url, signed_body)
yield self.store.update_replication_batch_for_host(host, batchnum)
logger.info("Sucessfully replicated profile batch %d to %s", batchnum, host)
except Exception:
# This will get retried when the looping call next comes around
logger.exception("Failed to replicate profile batch %d to %s", batchnum, host)
raise
@defer.inlineCallbacks @defer.inlineCallbacks
def get_profile(self, user_id): def get_profile(self, user_id):
target_user = UserID.from_string(user_id) target_user = UserID.from_string(user_id)
@ -140,8 +216,14 @@ class ProfileHandler(BaseHandler):
if new_displayname == '': if new_displayname == '':
new_displayname = None new_displayname = None
if len(self.hs.config.replicate_user_profiles_to) > 0:
cur_batchnum = yield self.store.get_latest_profile_replication_batch_number()
new_batchnum = 0 if cur_batchnum is None else cur_batchnum + 1
else:
new_batchnum = None
yield self.store.set_profile_displayname( yield self.store.set_profile_displayname(
target_user.localpart, new_displayname target_user.localpart, new_displayname, new_batchnum
) )
if self.hs.config.user_directory_search_all_users: if self.hs.config.user_directory_search_all_users:
@ -152,6 +234,9 @@ class ProfileHandler(BaseHandler):
yield self._update_join_states(requester, target_user) yield self._update_join_states(requester, target_user)
# start a profile replication push
run_in_background(self._replicate_profiles)
@defer.inlineCallbacks @defer.inlineCallbacks
def get_avatar_url(self, target_user): def get_avatar_url(self, target_user):
if self.hs.is_mine(target_user): if self.hs.is_mine(target_user):
@ -190,8 +275,14 @@ class ProfileHandler(BaseHandler):
if not by_admin and target_user != requester.user: if not by_admin and target_user != requester.user:
raise AuthError(400, "Cannot set another user's avatar_url") raise AuthError(400, "Cannot set another user's avatar_url")
if len(self.hs.config.replicate_user_profiles_to) > 0:
cur_batchnum = yield self.store.get_latest_profile_replication_batch_number()
new_batchnum = 0 if cur_batchnum is None else cur_batchnum + 1
else:
new_batchnum = None
yield self.store.set_profile_avatar_url( yield self.store.set_profile_avatar_url(
target_user.localpart, new_avatar_url target_user.localpart, new_avatar_url, new_batchnum,
) )
if self.hs.config.user_directory_search_all_users: if self.hs.config.user_directory_search_all_users:
@ -202,6 +293,9 @@ class ProfileHandler(BaseHandler):
yield self._update_join_states(requester, target_user) yield self._update_join_states(requester, target_user)
# start a profile replication push
run_in_background(self._replicate_profiles)
@defer.inlineCallbacks @defer.inlineCallbacks
def on_profile_query(self, args): def on_profile_query(self, args):
user = UserID.from_string(args["user_id"]) user = UserID.from_string(args["user_id"])

View file

@ -1,5 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2014-2016 OpenMarket Ltd # Copyright 2014-2016 OpenMarket Ltd
# Copyright 2018 New Vector Ltd
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
@ -20,6 +21,8 @@ from synapse.api.errors import StoreError
from ._base import SQLBaseStore from ._base import SQLBaseStore
BATCH_SIZE = 100
class ProfileWorkerStore(SQLBaseStore): class ProfileWorkerStore(SQLBaseStore):
@defer.inlineCallbacks @defer.inlineCallbacks
@ -62,6 +65,55 @@ class ProfileWorkerStore(SQLBaseStore):
desc="get_profile_avatar_url", desc="get_profile_avatar_url",
) )
def get_latest_profile_replication_batch_number(self):
def f(txn):
txn.execute("SELECT MAX(batch) as maxbatch FROM profiles")
rows = self.cursor_to_dict(txn)
return rows[0]['maxbatch']
return self.runInteraction(
"get_latest_profile_replication_batch_number", f,
)
def get_profile_batch(self, batchnum):
return self._simple_select_list(
table="profiles",
keyvalues={
"batch": batchnum,
},
retcols=("user_id", "displayname", "avatar_url"),
desc="get_profile_batch",
)
def assign_profile_batch(self):
def f(txn):
sql = (
"UPDATE profiles SET batch = "
"(SELECT IFNULL(MAX(batch), -1) + 1 FROM profiles) "
"WHERE user_id in ("
" SELECT user_id FROM profiles WHERE batch is NULL limit ?"
")"
)
txn.execute(sql, (BATCH_SIZE,))
return txn.rowcount
return self.runInteraction("assign_profile_batch", f)
def get_replication_hosts(self):
def f(txn):
txn.execute("SELECT host, last_synced_batch FROM profile_replication_status")
rows = self.cursor_to_dict(txn)
return {r['host']: r['last_synced_batch'] for r in rows}
return self.runInteraction("get_replication_hosts", f)
def update_replication_batch_for_host(self, host, last_synced_batch):
return self._simple_upsert(
table="profile_replication_status",
keyvalues={"host": host},
values={
"last_synced_batch": last_synced_batch,
},
desc="update_replication_batch_for_host",
)
def get_from_remote_profile_cache(self, user_id): def get_from_remote_profile_cache(self, user_id):
return self._simple_select_one( return self._simple_select_one(
table="remote_profile_cache", table="remote_profile_cache",
@ -80,19 +132,25 @@ class ProfileStore(ProfileWorkerStore):
desc="create_profile", desc="create_profile",
) )
def set_profile_displayname(self, user_localpart, new_displayname): def set_profile_displayname(self, user_localpart, new_displayname, batchnum):
return self._simple_update_one( return self._simple_update_one(
table="profiles", table="profiles",
keyvalues={"user_id": user_localpart}, keyvalues={"user_id": user_localpart},
updatevalues={"displayname": new_displayname}, updatevalues={
"displayname": new_displayname,
"batch": batchnum,
},
desc="set_profile_displayname", desc="set_profile_displayname",
) )
def set_profile_avatar_url(self, user_localpart, new_avatar_url): def set_profile_avatar_url(self, user_localpart, new_avatar_url, batchnum):
return self._simple_update_one( return self._simple_update_one(
table="profiles", table="profiles",
keyvalues={"user_id": user_localpart}, keyvalues={"user_id": user_localpart},
updatevalues={"avatar_url": new_avatar_url}, updatevalues={
"avatar_url": new_avatar_url,
"batch": batchnum,
},
desc="set_profile_avatar_url", desc="set_profile_avatar_url",
) )

View file

@ -0,0 +1,36 @@
/* Copyright 2018 New Vector Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Add a batch number to track changes to profiles and the
* order they're made in so we can replicate user profiles
* to other hosts as they change
*/
ALTER TABLE profiles ADD COLUMN batch BIGINT DEFAULT NULL;
/*
* Index on the batch number so we can get profiles
* by their batch
*/
CREATE INDEX profiles_batch_idx ON profiles(batch);
/*
* A table to track what batch of user profiles has been
* synced to what profile replication target.
*/
CREATE TABLE profile_replication_status (
host TEXT NOT NULL,
last_synced_batch BIGINT NOT NULL
);

View file

@ -75,7 +75,7 @@ class ProfileTestCase(unittest.TestCase):
@defer.inlineCallbacks @defer.inlineCallbacks
def test_get_my_name(self): def test_get_my_name(self):
yield self.store.set_profile_displayname( yield self.store.set_profile_displayname(
self.frank.localpart, "Frank" self.frank.localpart, "Frank", 1,
) )
displayname = yield self.handler.get_displayname(self.frank) displayname = yield self.handler.get_displayname(self.frank)
@ -124,7 +124,7 @@ class ProfileTestCase(unittest.TestCase):
@defer.inlineCallbacks @defer.inlineCallbacks
def test_incoming_fed_query(self): def test_incoming_fed_query(self):
yield self.store.create_profile("caroline") yield self.store.create_profile("caroline")
yield self.store.set_profile_displayname("caroline", "Caroline") yield self.store.set_profile_displayname("caroline", "Caroline", 1)
response = yield self.query_handlers["profile"]( response = yield self.query_handlers["profile"](
{"user_id": "@caroline:test", "field": "displayname"} {"user_id": "@caroline:test", "field": "displayname"}
@ -135,7 +135,7 @@ class ProfileTestCase(unittest.TestCase):
@defer.inlineCallbacks @defer.inlineCallbacks
def test_get_my_avatar(self): def test_get_my_avatar(self):
yield self.store.set_profile_avatar_url( yield self.store.set_profile_avatar_url(
self.frank.localpart, "http://my.server/me.png" self.frank.localpart, "http://my.server/me.png", 1,
) )
avatar_url = yield self.handler.get_avatar_url(self.frank) avatar_url = yield self.handler.get_avatar_url(self.frank)

View file

@ -40,7 +40,7 @@ class ProfileStoreTestCase(unittest.TestCase):
) )
yield self.store.set_profile_displayname( yield self.store.set_profile_displayname(
self.u_frank.localpart, "Frank" self.u_frank.localpart, "Frank", 1,
) )
self.assertEquals( self.assertEquals(
@ -55,7 +55,7 @@ class ProfileStoreTestCase(unittest.TestCase):
) )
yield self.store.set_profile_avatar_url( yield self.store.set_profile_avatar_url(
self.u_frank.localpart, "http://my.site/here" self.u_frank.localpart, "http://my.site/here", 1,
) )
self.assertEquals( self.assertEquals(

View file

@ -60,6 +60,7 @@ def setup_test_homeserver(name="test", datastore=None, config=None, **kargs):
config.block_non_admin_invites = False config.block_non_admin_invites = False
config.federation_domain_whitelist = None config.federation_domain_whitelist = None
config.user_directory_search_all_users = False config.user_directory_search_all_users = False
config.replicate_user_profiles_to = []
# disable user directory updates, because they get done in the # disable user directory updates, because they get done in the
# background, which upsets the test runner. # background, which upsets the test runner.