From 6f05de0e5ecd9e3f4d418232f0af7d1aef087426 Mon Sep 17 00:00:00 2001 From: Richard von Seck Date: Thu, 16 Nov 2017 15:59:38 +0100 Subject: [PATCH 001/348] synapse/config/password_auth_providers: Fixed bracket typo Signed-off-by: Richard von Seck --- synapse/config/password_auth_providers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/config/password_auth_providers.py b/synapse/config/password_auth_providers.py index e9828fac17..6602c5b4c7 100644 --- a/synapse/config/password_auth_providers.py +++ b/synapse/config/password_auth_providers.py @@ -29,10 +29,10 @@ class PasswordAuthProviderConfig(Config): # param. ldap_config = config.get("ldap_config", {}) if ldap_config.get("enabled", False): - providers.append[{ + providers.append({ 'module': LDAP_PROVIDER, 'config': ldap_config, - }] + }) providers.extend(config.get("password_providers", [])) for provider in providers: From 83d8d4d8cd699d9596e25c6167dad130f3df9ac3 Mon Sep 17 00:00:00 2001 From: Oliver Kurz Date: Mon, 20 Nov 2017 11:11:53 +0100 Subject: [PATCH 002/348] Allow use of higher versions of saml2 The package was pinned to <4.0 with 07cf96eb because "from saml2 import config" did not work. This seems to have been fixed in the mean time in the saml2 package and therefore should not stop to use a more recent version. Signed-off-by: Oliver Kurz --- synapse/python_dependencies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index 7052333c19..97b631e60d 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -36,7 +36,7 @@ REQUIREMENTS = { "pydenticon": ["pydenticon"], "ujson": ["ujson"], "blist": ["blist"], - "pysaml2>=3.0.0,<4.0.0": ["saml2>=3.0.0,<4.0.0"], + "pysaml2>=3.0.0": ["saml2>=3.0.0"], "pymacaroons-pynacl": ["pymacaroons"], "msgpack-python>=0.3.0": ["msgpack"], "phonenumbers>=8.2.0": ["phonenumbers"], From 3e59143ba8d62b83f774e1c67a0aa46ce3ecd3ab Mon Sep 17 00:00:00 2001 From: Willem Mulder Date: Thu, 18 May 2017 15:35:36 +0200 Subject: [PATCH 003/348] Adapt the default config to bind on IPv6. Most deployments are on Linux (or Mac OS), so this would actually bind on both IPv4 and IPv6. Resolves #1886. Signed-off-by: Willem Mulder --- synapse/config/server.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/synapse/config/server.py b/synapse/config/server.py index edb90a1348..9035fbf22d 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -220,13 +220,14 @@ class ServerConfig(Config): port: %(bind_port)s # Local addresses to listen on. - # This will listen on all IPv4 addresses by default. + # On Linux and Mac OS, this will listen on all IPv4 and IPv6 + # addresses by default. For most other OSes, this will only listen + # on IPv6. bind_addresses: - - '0.0.0.0' - # Uncomment to listen on all IPv6 interfaces - # N.B: On at least Linux this will also listen on all IPv4 - # addresses, so you will need to comment out the line above. - # - '::' + - '::' + # For systems other than Linux or Mac OS, uncomment the next line + # to also listen on IPv4. + #- '0.0.0.0' # This is a 'http' listener, allows us to specify 'resources'. type: http @@ -264,7 +265,7 @@ class ServerConfig(Config): # For when matrix traffic passes through loadbalancer that unwraps TLS. - port: %(unsecure_port)s tls: false - bind_addresses: ['0.0.0.0'] + bind_addresses: ['::'] type: http x_forwarded: false @@ -278,7 +279,7 @@ class ServerConfig(Config): # Turn on the twisted ssh manhole service on localhost on the given # port. # - port: 9000 - # bind_address: 127.0.0.1 + # bind_addresses: ['::1', '127.0.0.1'] # type: manhole """ % locals() From 37d1a90025c7dd3d87487f8cc0e6794e29becb93 Mon Sep 17 00:00:00 2001 From: Silke Hofstra Date: Sat, 2 Sep 2017 17:26:40 +0200 Subject: [PATCH 004/348] Allow binds to both :: and 0.0.0.0 Binding on 0.0.0.0 when :: is specified in the bind_addresses is now allowed. This causes a warning explaining the behaviour. Configuration changed to match. See #2232 Signed-off-by: Silke Hofstra --- synapse/app/homeserver.py | 95 ++++++++++++++++++++++++--------------- synapse/config/server.py | 8 ++-- 2 files changed, 61 insertions(+), 42 deletions(-) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 6b8875afb4..82a4e18c67 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -58,6 +58,7 @@ from twisted.internet import defer, reactor from twisted.web.resource import EncodingResourceWrapper, Resource from twisted.web.server import GzipEncoderFactory from twisted.web.static import File +from twisted.internet import error logger = logging.getLogger("synapse.app.homeserver") @@ -131,29 +132,36 @@ class SynapseHomeServer(HomeServer): if tls: for address in bind_addresses: - reactor.listenSSL( - port, - SynapseSite( - "synapse.access.https.%s" % (site_tag,), - site_tag, - listener_config, - root_resource, - ), - self.tls_server_context_factory, - interface=address - ) + try: + reactor.listenSSL( + port, + SynapseSite( + "synapse.access.https.%s" % (site_tag,), + site_tag, + listener_config, + root_resource, + ), + self.tls_server_context_factory, + interface=address + ) + except error.CannotListenError as e: + check_bind_error(e, address, bind_addresses) + else: for address in bind_addresses: - reactor.listenTCP( - port, - SynapseSite( - "synapse.access.http.%s" % (site_tag,), - site_tag, - listener_config, - root_resource, - ), - interface=address - ) + try: + reactor.listenTCP( + port, + SynapseSite( + "synapse.access.http.%s" % (site_tag,), + site_tag, + listener_config, + root_resource, + ), + interface=address + ) + except error.CannotListenError as e: + check_bind_error(e, address, bind_addresses) logger.info("Synapse now listening on port %d", port) def _configure_named_resource(self, name, compress=False): @@ -232,25 +240,31 @@ class SynapseHomeServer(HomeServer): bind_addresses = listener["bind_addresses"] for address in bind_addresses: - reactor.listenTCP( - listener["port"], - manhole( - username="matrix", - password="rabbithole", - globals={"hs": self}, - ), - interface=address - ) + try: + reactor.listenTCP( + listener["port"], + manhole( + username="matrix", + password="rabbithole", + globals={"hs": self}, + ), + interface=address + ) + except error.CannotListenError as e: + check_bind_error(e, address, bind_addresses) elif listener["type"] == "replication": bind_addresses = listener["bind_addresses"] for address in bind_addresses: - factory = ReplicationStreamProtocolFactory(self) - server_listener = reactor.listenTCP( - listener["port"], factory, interface=address - ) - reactor.addSystemEventTrigger( - "before", "shutdown", server_listener.stopListening, - ) + try: + factory = ReplicationStreamProtocolFactory(self) + server_listener = reactor.listenTCP( + listener["port"], factory, interface=address + ) + reactor.addSystemEventTrigger( + "before", "shutdown", server_listener.stopListening, + ) + except error.CannotListenError as e: + check_bind_error(e, address, bind_addresses) else: logger.warn("Unrecognized listener type: %s", listener["type"]) @@ -284,6 +298,13 @@ class SynapseHomeServer(HomeServer): return db_conn +def check_bind_error(e, address, bind_addresses): + if address == '0.0.0.0' and '::' in bind_addresses: + logger.warn('Failed to listen on 0.0.0.0, continuing because listening on [::]') + else: + raise e + + def setup(config_options): """ Args: diff --git a/synapse/config/server.py b/synapse/config/server.py index 9035fbf22d..436dd8a6fe 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -220,14 +220,12 @@ class ServerConfig(Config): port: %(bind_port)s # Local addresses to listen on. - # On Linux and Mac OS, this will listen on all IPv4 and IPv6 + # On Linux and Mac OS, `::` will listen on all IPv4 and IPv6 # addresses by default. For most other OSes, this will only listen # on IPv6. bind_addresses: - '::' - # For systems other than Linux or Mac OS, uncomment the next line - # to also listen on IPv4. - #- '0.0.0.0' + - '0.0.0.0' # This is a 'http' listener, allows us to specify 'resources'. type: http @@ -265,7 +263,7 @@ class ServerConfig(Config): # For when matrix traffic passes through loadbalancer that unwraps TLS. - port: %(unsecure_port)s tls: false - bind_addresses: ['::'] + bind_addresses: ['::', '0.0.0.0'] type: http x_forwarded: false From ed48ecc58cc1e26cacc5bf5a4b7d83c27ba27732 Mon Sep 17 00:00:00 2001 From: Silke Hofstra Date: Wed, 6 Sep 2017 17:48:49 +0200 Subject: [PATCH 005/348] Add methods for listening on multiple addresses Add listen_tcp and listen_ssl which implement Twisted's reactor.listenTCP and reactor.listenSSL for multiple addresses. Signed-off-by: Silke Hofstra --- synapse/app/_base.py | 57 +++++++++++++++++++++++++- synapse/app/homeserver.py | 84 ++++++++++++++++----------------------- 2 files changed, 90 insertions(+), 51 deletions(-) diff --git a/synapse/app/_base.py b/synapse/app/_base.py index 9477737759..0f9a57bd4c 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -25,7 +25,7 @@ except Exception: from daemonize import Daemonize from synapse.util import PreserveLoggingContext from synapse.util.rlimit import change_resource_limit -from twisted.internet import reactor +from twisted.internet import error, reactor def start_worker_reactor(appname, config): @@ -120,3 +120,58 @@ def quit_with_error(error_string): sys.stderr.write(" %s\n" % (line.rstrip(),)) sys.stderr.write("*" * line_length + '\n') sys.exit(1) + + +def listen_tcp(logger, bind_addresses, port, factory, backlog=50): + """ + Create a TCP socket for a port and several addresses + """ + for address in bind_addresses: + try: + reactor.listenTCP( + port, + factory, + backlog, + address + ) + except error.CannotListenError as e: + check_bind_error(logger, e, address, bind_addresses) + + +def listen_ssl(logger, bind_addresses, port, factory, context_factory, backlog=50): + """ + Create an SSL socket for a port and several addresses + """ + for address in bind_addresses: + try: + reactor.listenSSL( + port, + factory, + context_factory, + backlog, + address + ) + except error.CannotListenError as e: + check_bind_error(logger, e, address, bind_addresses) + + +def check_bind_error(logger, e, address, bind_addresses): + """ + This method checks an exception occurred while binding on 0.0.0.0. + If :: is specified in the bind addresses a warning is shown. + The exception is still raised otherwise. + + Binding on both 0.0.0.0 and :: causes an exception on Linux and macOS + because :: binds on both IPv4 and IPv6 (as per RFC 3493). + When binding on 0.0.0.0 after :: this can safely be ignored. + + Args: + logger (Logger): Logger used to log the warning. + e (Exception): Exception that was caught. + address (str): Address on which binding was attempted. + bind_addresses (list): Addresses on which the service listens. + """ + if address == '0.0.0.0' and '::' in bind_addresses: + logger.warn('Failed to listen on 0.0.0.0, continuing because listening on [::]') + else: + raise e diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 82a4e18c67..66f206a89d 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -131,37 +131,31 @@ class SynapseHomeServer(HomeServer): root_resource = create_resource_tree(resources, root_resource) if tls: - for address in bind_addresses: - try: - reactor.listenSSL( - port, - SynapseSite( - "synapse.access.https.%s" % (site_tag,), - site_tag, - listener_config, - root_resource, - ), - self.tls_server_context_factory, - interface=address - ) - except error.CannotListenError as e: - check_bind_error(e, address, bind_addresses) + _base.listen_ssl( + logger, + bind_addresses, + port, + SynapseSite( + "synapse.access.https.%s" % (site_tag,), + site_tag, + listener_config, + root_resource, + ), + self.tls_server_context_factory, + ) else: - for address in bind_addresses: - try: - reactor.listenTCP( - port, - SynapseSite( - "synapse.access.http.%s" % (site_tag,), - site_tag, - listener_config, - root_resource, - ), - interface=address - ) - except error.CannotListenError as e: - check_bind_error(e, address, bind_addresses) + _base.listen_tcp( + logger, + bind_addresses, + port, + SynapseSite( + "synapse.access.http.%s" % (site_tag,), + site_tag, + listener_config, + root_resource, + ) + ) logger.info("Synapse now listening on port %d", port) def _configure_named_resource(self, name, compress=False): @@ -239,19 +233,16 @@ class SynapseHomeServer(HomeServer): elif listener["type"] == "manhole": bind_addresses = listener["bind_addresses"] - for address in bind_addresses: - try: - reactor.listenTCP( - listener["port"], - manhole( - username="matrix", - password="rabbithole", - globals={"hs": self}, - ), - interface=address - ) - except error.CannotListenError as e: - check_bind_error(e, address, bind_addresses) + _base.listen_tcp( + logger, + bind_addresses, + listener["port"], + manhole( + username="matrix", + password="rabbithole", + globals={"hs": self}, + ) + ) elif listener["type"] == "replication": bind_addresses = listener["bind_addresses"] for address in bind_addresses: @@ -264,7 +255,7 @@ class SynapseHomeServer(HomeServer): "before", "shutdown", server_listener.stopListening, ) except error.CannotListenError as e: - check_bind_error(e, address, bind_addresses) + _base.check_bind_error(logger, e, address, bind_addresses) else: logger.warn("Unrecognized listener type: %s", listener["type"]) @@ -298,13 +289,6 @@ class SynapseHomeServer(HomeServer): return db_conn -def check_bind_error(e, address, bind_addresses): - if address == '0.0.0.0' and '::' in bind_addresses: - logger.warn('Failed to listen on 0.0.0.0, continuing because listening on [::]') - else: - raise e - - def setup(config_options): """ Args: From 26cd3f5690493b8969a5f097e9990a4760ef2455 Mon Sep 17 00:00:00 2001 From: Silke Date: Sun, 17 Dec 2017 13:04:05 +0100 Subject: [PATCH 006/348] Remove logger argument and do not catch replication listener Signed-off-by: Silke --- synapse/app/_base.py | 13 +++++++------ synapse/app/homeserver.py | 29 +++++++++++------------------ 2 files changed, 18 insertions(+), 24 deletions(-) diff --git a/synapse/app/_base.py b/synapse/app/_base.py index 0f9a57bd4c..e4318cdfc3 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -27,6 +27,8 @@ from synapse.util import PreserveLoggingContext from synapse.util.rlimit import change_resource_limit from twisted.internet import error, reactor +logger = logging.getLogger(__name__) + def start_worker_reactor(appname, config): """ Run the reactor in the main process @@ -122,7 +124,7 @@ def quit_with_error(error_string): sys.exit(1) -def listen_tcp(logger, bind_addresses, port, factory, backlog=50): +def listen_tcp(bind_addresses, port, factory, backlog=50): """ Create a TCP socket for a port and several addresses """ @@ -135,10 +137,10 @@ def listen_tcp(logger, bind_addresses, port, factory, backlog=50): address ) except error.CannotListenError as e: - check_bind_error(logger, e, address, bind_addresses) + check_bind_error(e, address, bind_addresses) -def listen_ssl(logger, bind_addresses, port, factory, context_factory, backlog=50): +def listen_ssl(bind_addresses, port, factory, context_factory, backlog=50): """ Create an SSL socket for a port and several addresses """ @@ -152,10 +154,10 @@ def listen_ssl(logger, bind_addresses, port, factory, context_factory, backlog=5 address ) except error.CannotListenError as e: - check_bind_error(logger, e, address, bind_addresses) + check_bind_error(e, address, bind_addresses) -def check_bind_error(logger, e, address, bind_addresses): +def check_bind_error(e, address, bind_addresses): """ This method checks an exception occurred while binding on 0.0.0.0. If :: is specified in the bind addresses a warning is shown. @@ -166,7 +168,6 @@ def check_bind_error(logger, e, address, bind_addresses): When binding on 0.0.0.0 after :: this can safely be ignored. Args: - logger (Logger): Logger used to log the warning. e (Exception): Exception that was caught. address (str): Address on which binding was attempted. bind_addresses (list): Addresses on which the service listens. diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 66f206a89d..4ea2b39f20 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -25,7 +25,7 @@ from synapse.api.urls import CONTENT_REPO_PREFIX, FEDERATION_PREFIX, \ LEGACY_MEDIA_PREFIX, MEDIA_PREFIX, SERVER_KEY_PREFIX, SERVER_KEY_V2_PREFIX, \ STATIC_PREFIX, WEB_CLIENT_PREFIX from synapse.app import _base -from synapse.app._base import quit_with_error +from synapse.app._base import quit_with_error, listen_ssl, listen_tcp from synapse.config._base import ConfigError from synapse.config.homeserver import HomeServerConfig from synapse.crypto import context_factory @@ -58,7 +58,6 @@ from twisted.internet import defer, reactor from twisted.web.resource import EncodingResourceWrapper, Resource from twisted.web.server import GzipEncoderFactory from twisted.web.static import File -from twisted.internet import error logger = logging.getLogger("synapse.app.homeserver") @@ -131,8 +130,7 @@ class SynapseHomeServer(HomeServer): root_resource = create_resource_tree(resources, root_resource) if tls: - _base.listen_ssl( - logger, + listen_ssl( bind_addresses, port, SynapseSite( @@ -145,8 +143,7 @@ class SynapseHomeServer(HomeServer): ) else: - _base.listen_tcp( - logger, + listen_tcp( bind_addresses, port, SynapseSite( @@ -233,8 +230,7 @@ class SynapseHomeServer(HomeServer): elif listener["type"] == "manhole": bind_addresses = listener["bind_addresses"] - _base.listen_tcp( - logger, + listen_tcp( bind_addresses, listener["port"], manhole( @@ -246,16 +242,13 @@ class SynapseHomeServer(HomeServer): elif listener["type"] == "replication": bind_addresses = listener["bind_addresses"] for address in bind_addresses: - try: - factory = ReplicationStreamProtocolFactory(self) - server_listener = reactor.listenTCP( - listener["port"], factory, interface=address - ) - reactor.addSystemEventTrigger( - "before", "shutdown", server_listener.stopListening, - ) - except error.CannotListenError as e: - _base.check_bind_error(logger, e, address, bind_addresses) + factory = ReplicationStreamProtocolFactory(self) + server_listener = reactor.listenTCP( + listener["port"], factory, interface=address + ) + reactor.addSystemEventTrigger( + "before", "shutdown", server_listener.stopListening, + ) else: logger.warn("Unrecognized listener type: %s", listener["type"]) From df0f60279636b978aae456bbd639a5f3b2adf3ba Mon Sep 17 00:00:00 2001 From: Silke Date: Sun, 17 Dec 2017 14:15:30 +0100 Subject: [PATCH 007/348] Implement listen_tcp method in remaining workers Signed-off-by: Silke --- synapse/app/appservice.py | 38 ++++++++++++++----------------- synapse/app/client_reader.py | 39 +++++++++++++++----------------- synapse/app/federation_reader.py | 38 ++++++++++++++----------------- synapse/app/federation_sender.py | 38 ++++++++++++++----------------- synapse/app/frontend_proxy.py | 38 ++++++++++++++----------------- synapse/app/homeserver.py | 4 +--- synapse/app/media_repository.py | 38 ++++++++++++++----------------- synapse/app/pusher.py | 38 ++++++++++++++----------------- synapse/app/synchrotron.py | 38 ++++++++++++++----------------- synapse/app/user_dir.py | 38 ++++++++++++++----------------- 10 files changed, 155 insertions(+), 192 deletions(-) diff --git a/synapse/app/appservice.py b/synapse/app/appservice.py index ba2657bbad..7d0c2879ae 100644 --- a/synapse/app/appservice.py +++ b/synapse/app/appservice.py @@ -79,17 +79,16 @@ class AppserviceServer(HomeServer): root_resource = create_resource_tree(resources, Resource()) - for address in bind_addresses: - reactor.listenTCP( - port, - SynapseSite( - "synapse.access.http.%s" % (site_tag,), - site_tag, - listener_config, - root_resource, - ), - interface=address + _base.listen_tcp( + bind_addresses, + port, + SynapseSite( + "synapse.access.http.%s" % (site_tag,), + site_tag, + listener_config, + root_resource, ) + ) logger.info("Synapse appservice now listening on port %d", port) @@ -98,18 +97,15 @@ class AppserviceServer(HomeServer): if listener["type"] == "http": self._listen_http(listener) elif listener["type"] == "manhole": - bind_addresses = listener["bind_addresses"] - - for address in bind_addresses: - reactor.listenTCP( - listener["port"], - manhole( - username="matrix", - password="rabbithole", - globals={"hs": self}, - ), - interface=address + _base.listen_tcp( + listener["bind_addresses"], + listener["port"], + manhole( + username="matrix", + password="rabbithole", + globals={"hs": self}, ) + ) else: logger.warn("Unrecognized listener type: %s", listener["type"]) diff --git a/synapse/app/client_reader.py b/synapse/app/client_reader.py index 129cfa901f..dc3f6efd43 100644 --- a/synapse/app/client_reader.py +++ b/synapse/app/client_reader.py @@ -103,17 +103,16 @@ class ClientReaderServer(HomeServer): root_resource = create_resource_tree(resources, Resource()) - for address in bind_addresses: - reactor.listenTCP( - port, - SynapseSite( - "synapse.access.http.%s" % (site_tag,), - site_tag, - listener_config, - root_resource, - ), - interface=address + _base.listen_tcp( + bind_addresses, + port, + SynapseSite( + "synapse.access.http.%s" % (site_tag,), + site_tag, + listener_config, + root_resource, ) + ) logger.info("Synapse client reader now listening on port %d", port) @@ -122,18 +121,16 @@ class ClientReaderServer(HomeServer): if listener["type"] == "http": self._listen_http(listener) elif listener["type"] == "manhole": - bind_addresses = listener["bind_addresses"] - - for address in bind_addresses: - reactor.listenTCP( - listener["port"], - manhole( - username="matrix", - password="rabbithole", - globals={"hs": self}, - ), - interface=address + _base.listen_tcp( + listener["bind_addresses"], + listener["port"], + manhole( + username="matrix", + password="rabbithole", + globals={"hs": self}, ) + ) + else: logger.warn("Unrecognized listener type: %s", listener["type"]) diff --git a/synapse/app/federation_reader.py b/synapse/app/federation_reader.py index 40cebe6f4a..a072291e1f 100644 --- a/synapse/app/federation_reader.py +++ b/synapse/app/federation_reader.py @@ -92,17 +92,16 @@ class FederationReaderServer(HomeServer): root_resource = create_resource_tree(resources, Resource()) - for address in bind_addresses: - reactor.listenTCP( - port, - SynapseSite( - "synapse.access.http.%s" % (site_tag,), - site_tag, - listener_config, - root_resource, - ), - interface=address + _base.listen_tcp( + bind_addresses, + port, + SynapseSite( + "synapse.access.http.%s" % (site_tag,), + site_tag, + listener_config, + root_resource, ) + ) logger.info("Synapse federation reader now listening on port %d", port) @@ -111,18 +110,15 @@ class FederationReaderServer(HomeServer): if listener["type"] == "http": self._listen_http(listener) elif listener["type"] == "manhole": - bind_addresses = listener["bind_addresses"] - - for address in bind_addresses: - reactor.listenTCP( - listener["port"], - manhole( - username="matrix", - password="rabbithole", - globals={"hs": self}, - ), - interface=address + _base.listen_tcp( + listener["bind_addresses"], + listener["port"], + manhole( + username="matrix", + password="rabbithole", + globals={"hs": self}, ) + ) else: logger.warn("Unrecognized listener type: %s", listener["type"]) diff --git a/synapse/app/federation_sender.py b/synapse/app/federation_sender.py index 389e3909d1..09e9488f06 100644 --- a/synapse/app/federation_sender.py +++ b/synapse/app/federation_sender.py @@ -106,17 +106,16 @@ class FederationSenderServer(HomeServer): root_resource = create_resource_tree(resources, Resource()) - for address in bind_addresses: - reactor.listenTCP( - port, - SynapseSite( - "synapse.access.http.%s" % (site_tag,), - site_tag, - listener_config, - root_resource, - ), - interface=address + _base.listen_tcp( + bind_addresses, + port, + SynapseSite( + "synapse.access.http.%s" % (site_tag,), + site_tag, + listener_config, + root_resource, ) + ) logger.info("Synapse federation_sender now listening on port %d", port) @@ -125,18 +124,15 @@ class FederationSenderServer(HomeServer): if listener["type"] == "http": self._listen_http(listener) elif listener["type"] == "manhole": - bind_addresses = listener["bind_addresses"] - - for address in bind_addresses: - reactor.listenTCP( - listener["port"], - manhole( - username="matrix", - password="rabbithole", - globals={"hs": self}, - ), - interface=address + _base.listen_tcp( + listener["bind_addresses"], + listener["port"], + manhole( + username="matrix", + password="rabbithole", + globals={"hs": self}, ) + ) else: logger.warn("Unrecognized listener type: %s", listener["type"]) diff --git a/synapse/app/frontend_proxy.py b/synapse/app/frontend_proxy.py index abc7ef5725..ae531c0aa4 100644 --- a/synapse/app/frontend_proxy.py +++ b/synapse/app/frontend_proxy.py @@ -157,17 +157,16 @@ class FrontendProxyServer(HomeServer): root_resource = create_resource_tree(resources, Resource()) - for address in bind_addresses: - reactor.listenTCP( - port, - SynapseSite( - "synapse.access.http.%s" % (site_tag,), - site_tag, - listener_config, - root_resource, - ), - interface=address + _base.listen_tcp( + bind_addresses, + port, + SynapseSite( + "synapse.access.http.%s" % (site_tag,), + site_tag, + listener_config, + root_resource, ) + ) logger.info("Synapse client reader now listening on port %d", port) @@ -176,18 +175,15 @@ class FrontendProxyServer(HomeServer): if listener["type"] == "http": self._listen_http(listener) elif listener["type"] == "manhole": - bind_addresses = listener["bind_addresses"] - - for address in bind_addresses: - reactor.listenTCP( - listener["port"], - manhole( - username="matrix", - password="rabbithole", - globals={"hs": self}, - ), - interface=address + _base.listen_tcp( + listener["bind_addresses"], + listener["port"], + manhole( + username="matrix", + password="rabbithole", + globals={"hs": self}, ) + ) else: logger.warn("Unrecognized listener type: %s", listener["type"]) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 4ea2b39f20..92ab3b311b 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -228,10 +228,8 @@ class SynapseHomeServer(HomeServer): if listener["type"] == "http": self._listener_http(config, listener) elif listener["type"] == "manhole": - bind_addresses = listener["bind_addresses"] - listen_tcp( - bind_addresses, + listener["bind_addresses"], listener["port"], manhole( username="matrix", diff --git a/synapse/app/media_repository.py b/synapse/app/media_repository.py index c4e5f0965d..eab1597aaa 100644 --- a/synapse/app/media_repository.py +++ b/synapse/app/media_repository.py @@ -99,17 +99,16 @@ class MediaRepositoryServer(HomeServer): root_resource = create_resource_tree(resources, Resource()) - for address in bind_addresses: - reactor.listenTCP( - port, - SynapseSite( - "synapse.access.http.%s" % (site_tag,), - site_tag, - listener_config, - root_resource, - ), - interface=address + _base.listen_tcp( + bind_addresses, + port, + SynapseSite( + "synapse.access.http.%s" % (site_tag,), + site_tag, + listener_config, + root_resource, ) + ) logger.info("Synapse media repository now listening on port %d", port) @@ -118,18 +117,15 @@ class MediaRepositoryServer(HomeServer): if listener["type"] == "http": self._listen_http(listener) elif listener["type"] == "manhole": - bind_addresses = listener["bind_addresses"] - - for address in bind_addresses: - reactor.listenTCP( - listener["port"], - manhole( - username="matrix", - password="rabbithole", - globals={"hs": self}, - ), - interface=address + _base.listen_tcp( + listener["bind_addresses"], + listener["port"], + manhole( + username="matrix", + password="rabbithole", + globals={"hs": self}, ) + ) else: logger.warn("Unrecognized listener type: %s", listener["type"]) diff --git a/synapse/app/pusher.py b/synapse/app/pusher.py index db9a4d16f4..7fbbb0b0e1 100644 --- a/synapse/app/pusher.py +++ b/synapse/app/pusher.py @@ -114,17 +114,16 @@ class PusherServer(HomeServer): root_resource = create_resource_tree(resources, Resource()) - for address in bind_addresses: - reactor.listenTCP( - port, - SynapseSite( - "synapse.access.http.%s" % (site_tag,), - site_tag, - listener_config, - root_resource, - ), - interface=address + _base.listen_tcp( + bind_addresses, + port, + SynapseSite( + "synapse.access.http.%s" % (site_tag,), + site_tag, + listener_config, + root_resource, ) + ) logger.info("Synapse pusher now listening on port %d", port) @@ -133,18 +132,15 @@ class PusherServer(HomeServer): if listener["type"] == "http": self._listen_http(listener) elif listener["type"] == "manhole": - bind_addresses = listener["bind_addresses"] - - for address in bind_addresses: - reactor.listenTCP( - listener["port"], - manhole( - username="matrix", - password="rabbithole", - globals={"hs": self}, - ), - interface=address + _base.listen_tcp( + listener["bind_addresses"], + listener["port"], + manhole( + username="matrix", + password="rabbithole", + globals={"hs": self}, ) + ) else: logger.warn("Unrecognized listener type: %s", listener["type"]) diff --git a/synapse/app/synchrotron.py b/synapse/app/synchrotron.py index 323fddee21..0abba3016e 100644 --- a/synapse/app/synchrotron.py +++ b/synapse/app/synchrotron.py @@ -288,17 +288,16 @@ class SynchrotronServer(HomeServer): root_resource = create_resource_tree(resources, Resource()) - for address in bind_addresses: - reactor.listenTCP( - port, - SynapseSite( - "synapse.access.http.%s" % (site_tag,), - site_tag, - listener_config, - root_resource, - ), - interface=address + _base.listen_tcp( + bind_addresses, + port, + SynapseSite( + "synapse.access.http.%s" % (site_tag,), + site_tag, + listener_config, + root_resource, ) + ) logger.info("Synapse synchrotron now listening on port %d", port) @@ -307,18 +306,15 @@ class SynchrotronServer(HomeServer): if listener["type"] == "http": self._listen_http(listener) elif listener["type"] == "manhole": - bind_addresses = listener["bind_addresses"] - - for address in bind_addresses: - reactor.listenTCP( - listener["port"], - manhole( - username="matrix", - password="rabbithole", - globals={"hs": self}, - ), - interface=address + _base.listen_tcp( + listener["bind_addresses"], + listener["port"], + manhole( + username="matrix", + password="rabbithole", + globals={"hs": self}, ) + ) else: logger.warn("Unrecognized listener type: %s", listener["type"]) diff --git a/synapse/app/user_dir.py b/synapse/app/user_dir.py index be661a70c7..a48c4a2ae6 100644 --- a/synapse/app/user_dir.py +++ b/synapse/app/user_dir.py @@ -131,17 +131,16 @@ class UserDirectoryServer(HomeServer): root_resource = create_resource_tree(resources, Resource()) - for address in bind_addresses: - reactor.listenTCP( - port, - SynapseSite( - "synapse.access.http.%s" % (site_tag,), - site_tag, - listener_config, - root_resource, - ), - interface=address + _base.listen_tcp( + bind_addresses, + port, + SynapseSite( + "synapse.access.http.%s" % (site_tag,), + site_tag, + listener_config, + root_resource, ) + ) logger.info("Synapse user_dir now listening on port %d", port) @@ -150,18 +149,15 @@ class UserDirectoryServer(HomeServer): if listener["type"] == "http": self._listen_http(listener) elif listener["type"] == "manhole": - bind_addresses = listener["bind_addresses"] - - for address in bind_addresses: - reactor.listenTCP( - listener["port"], - manhole( - username="matrix", - password="rabbithole", - globals={"hs": self}, - ), - interface=address + _base.listen_tcp( + listener["bind_addresses"], + listener["port"], + manhole( + username="matrix", + password="rabbithole", + globals={"hs": self}, ) + ) else: logger.warn("Unrecognized listener type: %s", listener["type"]) From 75c1b8df01273aafbbe25afb47582021dbb82498 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 20 Dec 2017 19:31:00 +0000 Subject: [PATCH 008/348] Better logging when login can't find a 3pid --- synapse/rest/client/v1/login.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/v1/login.py index 5669ecb724..45844aa2d2 100644 --- a/synapse/rest/client/v1/login.py +++ b/synapse/rest/client/v1/login.py @@ -191,19 +191,25 @@ class LoginRestServlet(ClientV1RestServlet): # convert threepid identifiers to user IDs if identifier["type"] == "m.id.thirdparty": - if 'medium' not in identifier or 'address' not in identifier: + address = identifier.get('address') + medium = identifier.get('medium') + + if medium is None or address is None: raise SynapseError(400, "Invalid thirdparty identifier") - address = identifier['address'] - if identifier['medium'] == 'email': + if medium == 'email': # For emails, transform the address to lowercase. # We store all email addreses as lowercase in the DB. # (See add_threepid in synapse/handlers/auth.py) address = address.lower() user_id = yield self.hs.get_datastore().get_user_id_by_threepid( - identifier['medium'], address + medium, address, ) if not user_id: + logger.warn( + "unknown 3pid identifier medium %s, address %r", + medium, address, + ) raise LoginError(403, "", errcode=Codes.FORBIDDEN) identifier = { From a7b726ad181935da16ede550e1202b77632823f3 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 30 Dec 2017 18:40:19 +0000 Subject: [PATCH 009/348] federation_client: clean up imports --- synapse/federation/federation_client.py | 31 +++++++++++-------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index b8f02f5391..0f754f933f 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -14,28 +14,25 @@ # limitations under the License. -from twisted.internet import defer - -from .federation_base import FederationBase -from synapse.api.constants import Membership - -from synapse.api.errors import ( - CodeMessageException, HttpResponseException, SynapseError, -) -from synapse.util import unwrapFirstError, logcontext -from synapse.util.caches.expiringcache import ExpiringCache -from synapse.util.logutils import log_function -from synapse.util.logcontext import make_deferred_yieldable, preserve_fn -from synapse.events import FrozenEvent, builder -import synapse.metrics - -from synapse.util.retryutils import NotRetryingDestination - import copy import itertools import logging import random +from twisted.internet import defer + +from synapse.api.constants import Membership +from synapse.api.errors import ( + CodeMessageException, HttpResponseException, SynapseError, +) +from synapse.events import FrozenEvent, builder +from synapse.federation.federation_base import FederationBase +import synapse.metrics +from synapse.util import logcontext, unwrapFirstError +from synapse.util.caches.expiringcache import ExpiringCache +from synapse.util.logcontext import make_deferred_yieldable, preserve_fn +from synapse.util.logutils import log_function +from synapse.util.retryutils import NotRetryingDestination logger = logging.getLogger(__name__) From 65abc90fb6f10a91d72bc4518a8d155251d23a6f Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 30 Dec 2017 18:40:19 +0000 Subject: [PATCH 010/348] federation_server: clean up imports --- synapse/federation/federation_server.py | 31 +++++++++++-------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index a2327f24b6..5fdfbbeee5 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -12,25 +12,22 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from twisted.internet import defer - -from .federation_base import FederationBase -from .units import Transaction, Edu - -from synapse.util import async -from synapse.util.logcontext import make_deferred_yieldable, preserve_fn -from synapse.util.logutils import log_function -from synapse.util.caches.response_cache import ResponseCache -from synapse.events import FrozenEvent -from synapse.types import get_domain_from_id -import synapse.metrics - -from synapse.api.errors import AuthError, FederationError, SynapseError - -from synapse.crypto.event_signing import compute_event_signature +import logging import simplejson as json -import logging +from twisted.internet import defer + +from synapse.api.errors import AuthError, FederationError, SynapseError +from synapse.crypto.event_signing import compute_event_signature +from synapse.events import FrozenEvent +from synapse.federation.federation_base import FederationBase +from synapse.federation.units import Edu, Transaction +import synapse.metrics +from synapse.types import get_domain_from_id +from synapse.util import async +from synapse.util.caches.response_cache import ResponseCache +from synapse.util.logcontext import make_deferred_yieldable, preserve_fn +from synapse.util.logutils import log_function # when processing incoming transactions, we try to handle multiple rooms in # parallel, up to this limit. From 3079f80d4a67714294660e27214cd2a55bb7ecf1 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 30 Dec 2017 18:40:19 +0000 Subject: [PATCH 011/348] Factor out `event_from_pdu_json` turns out we have two copies of this, and neither needs to be an instance method --- synapse/federation/federation_base.py | 20 ++++++++++++++ synapse/federation/federation_client.py | 36 +++++++++++-------------- synapse/federation/federation_server.py | 25 +++++++---------- 3 files changed, 44 insertions(+), 37 deletions(-) diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py index a0f5d40eb3..6476cea895 100644 --- a/synapse/federation/federation_base.py +++ b/synapse/federation/federation_base.py @@ -16,6 +16,7 @@ import logging from synapse.api.errors import SynapseError from synapse.crypto.event_signing import check_event_content_hash +from synapse.events import FrozenEvent from synapse.events.utils import prune_event from synapse.util import unwrapFirstError, logcontext from twisted.internet import defer @@ -169,3 +170,22 @@ class FederationBase(object): ) return deferreds + + +def event_from_pdu_json(pdu_json, outlier=False): + """Construct a FrozenEvent from an event json received over federation + + Args: + pdu_json (object): pdu as received over federation + outlier (bool): True to mark this event as an outlier + + Returns: + FrozenEvent + """ + event = FrozenEvent( + pdu_json + ) + + event.internal_metadata.outlier = outlier + + return event diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 0f754f933f..b1fe03f702 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -25,8 +25,11 @@ from synapse.api.constants import Membership from synapse.api.errors import ( CodeMessageException, HttpResponseException, SynapseError, ) -from synapse.events import FrozenEvent, builder -from synapse.federation.federation_base import FederationBase +from synapse.events import builder +from synapse.federation.federation_base import ( + FederationBase, + event_from_pdu_json, +) import synapse.metrics from synapse.util import logcontext, unwrapFirstError from synapse.util.caches.expiringcache import ExpiringCache @@ -181,7 +184,7 @@ class FederationClient(FederationBase): logger.debug("backfill transaction_data=%s", repr(transaction_data)) pdus = [ - self.event_from_pdu_json(p, outlier=False) + event_from_pdu_json(p, outlier=False) for p in transaction_data["pdus"] ] @@ -241,7 +244,7 @@ class FederationClient(FederationBase): logger.debug("transaction_data %r", transaction_data) pdu_list = [ - self.event_from_pdu_json(p, outlier=outlier) + event_from_pdu_json(p, outlier=outlier) for p in transaction_data["pdus"] ] @@ -333,11 +336,11 @@ class FederationClient(FederationBase): ) pdus = [ - self.event_from_pdu_json(p, outlier=True) for p in result["pdus"] + event_from_pdu_json(p, outlier=True) for p in result["pdus"] ] auth_chain = [ - self.event_from_pdu_json(p, outlier=True) + event_from_pdu_json(p, outlier=True) for p in result.get("auth_chain", []) ] @@ -438,7 +441,7 @@ class FederationClient(FederationBase): ) auth_chain = [ - self.event_from_pdu_json(p, outlier=True) + event_from_pdu_json(p, outlier=True) for p in res["auth_chain"] ] @@ -567,12 +570,12 @@ class FederationClient(FederationBase): logger.debug("Got content: %s", content) state = [ - self.event_from_pdu_json(p, outlier=True) + event_from_pdu_json(p, outlier=True) for p in content.get("state", []) ] auth_chain = [ - self.event_from_pdu_json(p, outlier=True) + event_from_pdu_json(p, outlier=True) for p in content.get("auth_chain", []) ] @@ -647,7 +650,7 @@ class FederationClient(FederationBase): logger.debug("Got response to send_invite: %s", pdu_dict) - pdu = self.event_from_pdu_json(pdu_dict) + pdu = event_from_pdu_json(pdu_dict) # Check signatures are correct. pdu = yield self._check_sigs_and_hash(pdu) @@ -737,7 +740,7 @@ class FederationClient(FederationBase): ) auth_chain = [ - self.event_from_pdu_json(e) + event_from_pdu_json(e) for e in content["auth_chain"] ] @@ -785,7 +788,7 @@ class FederationClient(FederationBase): ) events = [ - self.event_from_pdu_json(e) + event_from_pdu_json(e) for e in content.get("events", []) ] @@ -802,15 +805,6 @@ class FederationClient(FederationBase): defer.returnValue(signed_events) - def event_from_pdu_json(self, pdu_json, outlier=False): - event = FrozenEvent( - pdu_json - ) - - event.internal_metadata.outlier = outlier - - return event - @defer.inlineCallbacks def forward_third_party_invite(self, destinations, room_id, event_dict): for destination in destinations: diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 5fdfbbeee5..9849953c9b 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -19,8 +19,10 @@ from twisted.internet import defer from synapse.api.errors import AuthError, FederationError, SynapseError from synapse.crypto.event_signing import compute_event_signature -from synapse.events import FrozenEvent -from synapse.federation.federation_base import FederationBase +from synapse.federation.federation_base import ( + FederationBase, + event_from_pdu_json, +) from synapse.federation.units import Edu, Transaction import synapse.metrics from synapse.types import get_domain_from_id @@ -169,7 +171,7 @@ class FederationServer(FederationBase): p["age_ts"] = request_time - int(p["age"]) del p["age"] - event = self.event_from_pdu_json(p) + event = event_from_pdu_json(p) room_id = event.room_id pdus_by_room.setdefault(room_id, []).append(event) @@ -343,7 +345,7 @@ class FederationServer(FederationBase): @defer.inlineCallbacks def on_invite_request(self, origin, content): - pdu = self.event_from_pdu_json(content) + pdu = event_from_pdu_json(content) ret_pdu = yield self.handler.on_invite_request(origin, pdu) time_now = self._clock.time_msec() defer.returnValue((200, {"event": ret_pdu.get_pdu_json(time_now)})) @@ -351,7 +353,7 @@ class FederationServer(FederationBase): @defer.inlineCallbacks def on_send_join_request(self, origin, content): logger.debug("on_send_join_request: content: %s", content) - pdu = self.event_from_pdu_json(content) + pdu = event_from_pdu_json(content) logger.debug("on_send_join_request: pdu sigs: %s", pdu.signatures) res_pdus = yield self.handler.on_send_join_request(origin, pdu) time_now = self._clock.time_msec() @@ -371,7 +373,7 @@ class FederationServer(FederationBase): @defer.inlineCallbacks def on_send_leave_request(self, origin, content): logger.debug("on_send_leave_request: content: %s", content) - pdu = self.event_from_pdu_json(content) + pdu = event_from_pdu_json(content) logger.debug("on_send_leave_request: pdu sigs: %s", pdu.signatures) yield self.handler.on_send_leave_request(origin, pdu) defer.returnValue((200, {})) @@ -408,7 +410,7 @@ class FederationServer(FederationBase): """ with (yield self._server_linearizer.queue((origin, room_id))): auth_chain = [ - self.event_from_pdu_json(e) + event_from_pdu_json(e) for e in content["auth_chain"] ] @@ -583,15 +585,6 @@ class FederationServer(FederationBase): def __str__(self): return "" % self.server_name - def event_from_pdu_json(self, pdu_json, outlier=False): - event = FrozenEvent( - pdu_json - ) - - event.internal_metadata.outlier = outlier - - return event - @defer.inlineCallbacks def exchange_third_party_invite( self, From bd91857028e0b7adf046a379a0eee030a92c1249 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 30 Dec 2017 18:40:19 +0000 Subject: [PATCH 012/348] Check missing fields in event_from_pdu_json Return a 400 rather than a 500 when somebody messes up their send_join --- synapse/federation/federation_base.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py index 6476cea895..7918d3e442 100644 --- a/synapse/federation/federation_base.py +++ b/synapse/federation/federation_base.py @@ -18,6 +18,7 @@ from synapse.api.errors import SynapseError from synapse.crypto.event_signing import check_event_content_hash from synapse.events import FrozenEvent from synapse.events.utils import prune_event +from synapse.http.servlet import assert_params_in_request from synapse.util import unwrapFirstError, logcontext from twisted.internet import defer @@ -181,7 +182,13 @@ def event_from_pdu_json(pdu_json, outlier=False): Returns: FrozenEvent + + Raises: + SynapseError: if the pdu is missing required fields """ + # we could probably enforce a bunch of other fields here (room_id, sender, + # origin, etc etc) + assert_params_in_request(pdu_json, ('event_id', 'type')) event = FrozenEvent( pdu_json ) From 840f72356e93971dedd7c66117c293866060c54f Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 5 Jan 2018 12:30:28 +0000 Subject: [PATCH 013/348] Remove 'verbosity'/'log_file' from generated cfg ... because these only really exist to confuse people nowadays. Also bring log config more into line with the generated log config, by making `level_for_storage` apply to the `synapse.storage.SQL` logger rather than `synapse.storage`. --- synapse/config/logger.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/synapse/config/logger.py b/synapse/config/logger.py index a1d6e4d4f7..9e8003ad65 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -79,12 +79,6 @@ class LoggingConfig(Config): os.path.join(config_dir_path, server_name + ".log.config") ) return """ - # Logging verbosity level. Ignored if log_config is specified. - verbose: 0 - - # File to write logging to. Ignored if log_config is specified. - log_file: "%(log_file)s" - # A yaml python logging config file log_config: "%(log_config)s" """ % locals() @@ -150,6 +144,9 @@ def setup_logging(config, use_worker_options=False): ) if log_config is None: + # We don't have a logfile, so fall back to the 'verbosity' param from + # the config or cmdline. (Note that we generate a log config for new + # installs, so this will be an unusual case) level = logging.INFO level_for_storage = logging.INFO if config.verbosity: @@ -157,11 +154,10 @@ def setup_logging(config, use_worker_options=False): if config.verbosity > 1: level_for_storage = logging.DEBUG - # FIXME: we need a logging.WARN for a -q quiet option logger = logging.getLogger('') logger.setLevel(level) - logging.getLogger('synapse.storage').setLevel(level_for_storage) + logging.getLogger('synapse.storage.SQL').setLevel(level_for_storage) formatter = logging.Formatter(log_format) if log_file: From eccfc8e928020e7b009979a629278718d49f8e24 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Sun, 7 Jan 2018 19:52:58 -0700 Subject: [PATCH 014/348] Fix templating error with unban permission message Fixes https://github.com/matrix-org/synapse/issues/2759 Signed-off-by: Travis Ralston --- synapse/event_auth.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/event_auth.py b/synapse/event_auth.py index 061ee86b16..cd5627e36a 100644 --- a/synapse/event_auth.py +++ b/synapse/event_auth.py @@ -319,7 +319,7 @@ def _is_membership_change_allowed(event, auth_events): # TODO (erikj): Implement kicks. if target_banned and user_level < ban_level: raise AuthError( - 403, "You cannot unban user &s." % (target_user_id,) + 403, "You cannot unban user %s." % (target_user_id,) ) elif target_user_id != event.user_id: kick_level = _get_named_level(auth_events, "kick", 50) From 3a556f1ea024d93ba2c1a93a88137fe82f0b3298 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 9 Jan 2018 11:27:19 +0000 Subject: [PATCH 015/348] Make indentation of generated log config consistent (we had a mix of 2- and 4-space indents) --- synapse/config/logger.py | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/synapse/config/logger.py b/synapse/config/logger.py index 9e8003ad65..9ee46c4dea 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -28,27 +28,27 @@ DEFAULT_LOG_CONFIG = Template(""" version: 1 formatters: - precise: - format: '%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(request)s\ -- %(message)s' + precise: + format: '%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - \ +%(request)s - %(message)s' filters: - context: - (): synapse.util.logcontext.LoggingContextFilter - request: "" + context: + (): synapse.util.logcontext.LoggingContextFilter + request: "" handlers: - file: - class: logging.handlers.RotatingFileHandler - formatter: precise - filename: ${log_file} - maxBytes: 104857600 - backupCount: 10 - filters: [context] - console: - class: logging.StreamHandler - formatter: precise - filters: [context] + file: + class: logging.handlers.RotatingFileHandler + formatter: precise + filename: ${log_file} + maxBytes: 104857600 + backupCount: 10 + filters: [context] + console: + class: logging.StreamHandler + formatter: precise + filters: [context] loggers: synapse: From 0211464ba2eb188ee519030ada2ed6b517b41513 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 9 Jan 2018 11:28:33 +0000 Subject: [PATCH 016/348] Fix broken config UTs https://github.com/matrix-org/synapse/pull/2755 broke log-config generation, which in turn broke the unit tests. --- synapse/config/logger.py | 4 ++-- tests/config/test_generate.py | 17 ++++++++++++++++- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/synapse/config/logger.py b/synapse/config/logger.py index 9e8003ad65..0922125187 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -74,7 +74,6 @@ class LoggingConfig(Config): self.log_file = self.abspath(config.get("log_file")) def default_config(self, config_dir_path, server_name, **kwargs): - log_file = self.abspath("homeserver.log") log_config = self.abspath( os.path.join(config_dir_path, server_name + ".log.config") ) @@ -117,9 +116,10 @@ class LoggingConfig(Config): def generate_files(self, config): log_config = config.get("log_config") if log_config and not os.path.exists(log_config): + log_file = self.abspath("homeserver.log") with open(log_config, "wb") as log_config_file: log_config_file.write( - DEFAULT_LOG_CONFIG.substitute(log_file=config["log_file"]) + DEFAULT_LOG_CONFIG.substitute(log_file=log_file) ) diff --git a/tests/config/test_generate.py b/tests/config/test_generate.py index 8f57fbeb23..879159ccea 100644 --- a/tests/config/test_generate.py +++ b/tests/config/test_generate.py @@ -12,9 +12,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import os.path +import re import shutil import tempfile + from synapse.config.homeserver import HomeServerConfig from tests import unittest @@ -23,7 +26,6 @@ class ConfigGenerationTestCase(unittest.TestCase): def setUp(self): self.dir = tempfile.mkdtemp() - print self.dir self.file = os.path.join(self.dir, "homeserver.yaml") def tearDown(self): @@ -48,3 +50,16 @@ class ConfigGenerationTestCase(unittest.TestCase): ]), set(os.listdir(self.dir)) ) + + self.assert_log_filename_is( + os.path.join(self.dir, "lemurs.win.log.config"), + os.path.join(os.getcwd(), "homeserver.log"), + ) + + def assert_log_filename_is(self, log_config_file, expected): + with open(log_config_file) as f: + config = f.read() + # find the 'filename' line + matches = re.findall("^\s*filename:\s*(.*)$", config, re.M) + self.assertEqual(1, len(matches)) + self.assertEqual(matches[0], expected) From 142fb0a7d40f17d834e1e4d13963839195c1040b Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 9 Jan 2018 12:03:12 +0000 Subject: [PATCH 017/348] Disable user_directory updates for UTs Fix flakiness in the UTs caused by the user_directory being updated in the background --- tests/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/utils.py b/tests/utils.py index ed8a7360f5..44e5f75093 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -58,6 +58,10 @@ def setup_test_homeserver(name="test", datastore=None, config=None, **kargs): config.email_enable_notifs = False config.block_non_admin_invites = False + # disable user directory updates, because they get done in the + # background, which upsets the test runner. + config.update_user_directory = False + config.use_frozen_dicts = True config.database_config = {"name": "sqlite3"} config.ldap_enabled = False From 374c4d4cedf2366f145934d750fd309209b0de62 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 9 Jan 2018 12:04:52 +0000 Subject: [PATCH 018/348] Remove dead code pointless function is pointless --- tests/rest/client/v1/test_rooms.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/rest/client/v1/test_rooms.py b/tests/rest/client/v1/test_rooms.py index de376fb514..9f37255381 100644 --- a/tests/rest/client/v1/test_rooms.py +++ b/tests/rest/client/v1/test_rooms.py @@ -515,9 +515,6 @@ class RoomsCreateTestCase(RestTestCase): synapse.rest.client.v1.room.register_servlets(hs, self.mock_resource) - def tearDown(self): - pass - @defer.inlineCallbacks def test_post_room_no_keys(self): # POST with no config keys, expect new room id From f8fa5ae4af770cdf69e2d335f3aff0b823b34009 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 9 Jan 2018 12:05:25 +0000 Subject: [PATCH 019/348] enable twisted delayedcall debugging in UTs --- tests/unittest.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/unittest.py b/tests/unittest.py index 38715972dd..7b478c4294 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import twisted from twisted.trial import unittest import logging @@ -65,6 +65,10 @@ class TestCase(unittest.TestCase): @around(self) def setUp(orig): + # enable debugging of delayed calls - this means that we get a + # traceback when a unit test exits leaving things on the reactor. + twisted.internet.base.DelayedCall.debug = True + old_level = logging.getLogger().level if old_level != level: From b30cd5b107acafce43fb63c471c086b8df4d981a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 9 Jan 2018 11:31:00 +0000 Subject: [PATCH 020/348] Remove dead code related to default thumbnails --- synapse/rest/media/v1/thumbnail_resource.py | 76 +-------------------- synapse/storage/media_repository.py | 3 - 2 files changed, 3 insertions(+), 76 deletions(-) diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index 68d56b2b10..779fd3e9bc 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -85,11 +85,6 @@ class ThumbnailResource(Resource): respond_404(request) return - # if media_info["media_type"] == "image/svg+xml": - # file_path = self.filepaths.local_media_filepath(media_id) - # yield respond_with_file(request, media_info["media_type"], file_path) - # return - thumbnail_infos = yield self.store.get_local_media_thumbnails(media_id) if thumbnail_infos: @@ -114,9 +109,7 @@ class ThumbnailResource(Resource): yield respond_with_file(request, t_type, file_path) else: - yield self._respond_default_thumbnail( - request, media_info, width, height, method, m_type, - ) + respond_404(request) @defer.inlineCallbacks def _select_or_generate_local_thumbnail(self, request, media_id, desired_width, @@ -128,11 +121,6 @@ class ThumbnailResource(Resource): respond_404(request) return - # if media_info["media_type"] == "image/svg+xml": - # file_path = self.filepaths.local_media_filepath(media_id) - # yield respond_with_file(request, media_info["media_type"], file_path) - # return - thumbnail_infos = yield self.store.get_local_media_thumbnails(media_id) for info in thumbnail_infos: t_w = info["thumbnail_width"] == desired_width @@ -166,10 +154,7 @@ class ThumbnailResource(Resource): if file_path: yield respond_with_file(request, desired_type, file_path) else: - yield self._respond_default_thumbnail( - request, media_info, desired_width, desired_height, - desired_method, desired_type, - ) + respond_404(request) @defer.inlineCallbacks def _select_or_generate_remote_thumbnail(self, request, server_name, media_id, @@ -177,11 +162,6 @@ class ThumbnailResource(Resource): desired_method, desired_type): media_info = yield self.media_repo.get_remote_media(server_name, media_id) - # if media_info["media_type"] == "image/svg+xml": - # file_path = self.filepaths.remote_media_filepath(server_name, media_id) - # yield respond_with_file(request, media_info["media_type"], file_path) - # return - thumbnail_infos = yield self.store.get_remote_media_thumbnails( server_name, media_id, ) @@ -213,23 +193,13 @@ class ThumbnailResource(Resource): if file_path: yield respond_with_file(request, desired_type, file_path) else: - yield self._respond_default_thumbnail( - request, media_info, desired_width, desired_height, - desired_method, desired_type, - ) + respond_404(request) @defer.inlineCallbacks def _respond_remote_thumbnail(self, request, server_name, media_id, width, height, method, m_type): # TODO: Don't download the whole remote file # We should proxy the thumbnail from the remote server instead. - media_info = yield self.media_repo.get_remote_media(server_name, media_id) - - # if media_info["media_type"] == "image/svg+xml": - # file_path = self.filepaths.remote_media_filepath(server_name, media_id) - # yield respond_with_file(request, media_info["media_type"], file_path) - # return - thumbnail_infos = yield self.store.get_remote_media_thumbnails( server_name, media_id, ) @@ -250,47 +220,7 @@ class ThumbnailResource(Resource): ) yield respond_with_file(request, t_type, file_path, t_length) else: - yield self._respond_default_thumbnail( - request, media_info, width, height, method, m_type, - ) - - @defer.inlineCallbacks - def _respond_default_thumbnail(self, request, media_info, width, height, - method, m_type): - # XXX: how is this meant to work? store.get_default_thumbnails - # appears to always return [] so won't this always 404? - media_type = media_info["media_type"] - top_level_type = media_type.split("/")[0] - sub_type = media_type.split("/")[-1].split(";")[0] - thumbnail_infos = yield self.store.get_default_thumbnails( - top_level_type, sub_type, - ) - if not thumbnail_infos: - thumbnail_infos = yield self.store.get_default_thumbnails( - top_level_type, "_default", - ) - if not thumbnail_infos: - thumbnail_infos = yield self.store.get_default_thumbnails( - "_default", "_default", - ) - if not thumbnail_infos: respond_404(request) - return - - thumbnail_info = self._select_thumbnail( - width, height, "crop", m_type, thumbnail_infos - ) - - t_width = thumbnail_info["thumbnail_width"] - t_height = thumbnail_info["thumbnail_height"] - t_type = thumbnail_info["thumbnail_type"] - t_method = thumbnail_info["thumbnail_method"] - t_length = thumbnail_info["thumbnail_length"] - - file_path = self.filepaths.default_thumbnail( - top_level_type, sub_type, t_width, t_height, t_type, t_method, - ) - yield respond_with_file(request, t_type, file_path, t_length) def _select_thumbnail(self, desired_width, desired_height, desired_method, desired_type, thumbnail_infos): diff --git a/synapse/storage/media_repository.py b/synapse/storage/media_repository.py index a66ff7c1e0..6ebc372498 100644 --- a/synapse/storage/media_repository.py +++ b/synapse/storage/media_repository.py @@ -29,9 +29,6 @@ class MediaRepositoryStore(BackgroundUpdateStore): where_clause='url_cache IS NOT NULL', ) - def get_default_thumbnails(self, top_level_type, sub_type): - return [] - def get_local_media(self, media_id): """Get the metadata for a local piece of media Returns: From 51c9d9ed65af58d4fb5657bad877dbf1f0dcaf39 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 9 Jan 2018 14:39:12 +0000 Subject: [PATCH 021/348] Add /room/{id}/event/{id} to synapse Turns out that there is a valid usecase for retrieving event by id (notably having received a push), but event ids should be scoped to room, so /event/{id} is wrong. --- synapse/rest/client/v1/room.py | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 75b735b47d..682a0af9fc 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -487,13 +487,35 @@ class RoomInitialSyncRestServlet(ClientV1RestServlet): defer.returnValue((200, content)) -class RoomEventContext(ClientV1RestServlet): +class RoomEventServlet(ClientV1RestServlet): + PATTERNS = client_path_patterns( + "/rooms/(?P[^/]*)/event/(?P[^/]*)$" + ) + + def __init__(self, hs): + super(RoomEventServlet, self).__init__(hs) + self.clock = hs.get_clock() + self.event_handler = hs.get_event_handler() + + @defer.inlineCallbacks + def on_GET(self, request, room_id, event_id): + requester = yield self.auth.get_user_by_req(request) + event = yield self.event_handler.get_event(requester.user, event_id) + + time_now = self.clock.time_msec() + if event: + defer.returnValue((200, serialize_event(event, time_now))) + else: + defer.returnValue((404, "Event not found.")) + + +class RoomEventContextServlet(ClientV1RestServlet): PATTERNS = client_path_patterns( "/rooms/(?P[^/]*)/context/(?P[^/]*)$" ) def __init__(self, hs): - super(RoomEventContext, self).__init__(hs) + super(RoomEventContextServlet, self).__init__(hs) self.clock = hs.get_clock() self.handlers = hs.get_handlers() @@ -803,4 +825,5 @@ def register_servlets(hs, http_server): RoomTypingRestServlet(hs).register(http_server) SearchRestServlet(hs).register(http_server) JoinedRoomsRestServlet(hs).register(http_server) - RoomEventContext(hs).register(http_server) + RoomEventServlet(hs).register(http_server) + RoomEventContextServlet(hs).register(http_server) From b6c9deffda94f230085d9974379643497749e7f5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 9 Jan 2018 15:53:23 +0000 Subject: [PATCH 022/348] Remove dead TODO --- synapse/rest/media/v1/thumbnail_resource.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index 779fd3e9bc..70dbf7f5c9 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -198,8 +198,6 @@ class ThumbnailResource(Resource): @defer.inlineCallbacks def _respond_remote_thumbnail(self, request, server_name, media_id, width, height, method, m_type): - # TODO: Don't download the whole remote file - # We should proxy the thumbnail from the remote server instead. thumbnail_infos = yield self.store.get_remote_media_thumbnails( server_name, media_id, ) From 47ca5eb8822ddc376b098a2747e1dbb85b2ce32b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 5 Jan 2018 15:09:43 +0000 Subject: [PATCH 023/348] Split out add_file_headers --- synapse/rest/media/v1/_base.py | 70 ++++++++++++++++++++-------------- 1 file changed, 42 insertions(+), 28 deletions(-) diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py index 95fa95fce3..57a4509816 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py @@ -70,38 +70,11 @@ def respond_with_file(request, media_type, file_path, logger.debug("Responding with %r", file_path) if os.path.isfile(file_path): - request.setHeader(b"Content-Type", media_type.encode("UTF-8")) - if upload_name: - if is_ascii(upload_name): - request.setHeader( - b"Content-Disposition", - b"inline; filename=%s" % ( - urllib.quote(upload_name.encode("utf-8")), - ), - ) - else: - request.setHeader( - b"Content-Disposition", - b"inline; filename*=utf-8''%s" % ( - urllib.quote(upload_name.encode("utf-8")), - ), - ) - - # cache for at least a day. - # XXX: we might want to turn this off for data we don't want to - # recommend caching as it's sensitive or private - or at least - # select private. don't bother setting Expires as all our - # clients are smart enough to be happy with Cache-Control - request.setHeader( - b"Cache-Control", b"public,max-age=86400,s-maxage=86400" - ) if file_size is None: stat = os.stat(file_path) file_size = stat.st_size - request.setHeader( - b"Content-Length", b"%d" % (file_size,) - ) + add_file_headers(request, media_type, file_size, upload_name) with open(file_path, "rb") as f: yield logcontext.make_deferred_yieldable( @@ -111,3 +84,44 @@ def respond_with_file(request, media_type, file_path, finish_request(request) else: respond_404(request) + + +def add_file_headers(request, media_type, file_size, upload_name): + """Adds the correct response headers in preparation for responding with the + media. + + Args: + request (twisted.web.http.Request) + media_type (str): The media/content type. + file_size (int): Size in bytes of the media, if known. + upload_name (str): The name of the requested file, if any. + """ + request.setHeader(b"Content-Type", media_type.encode("UTF-8")) + if upload_name: + if is_ascii(upload_name): + request.setHeader( + b"Content-Disposition", + b"inline; filename=%s" % ( + urllib.quote(upload_name.encode("utf-8")), + ), + ) + else: + request.setHeader( + b"Content-Disposition", + b"inline; filename*=utf-8''%s" % ( + urllib.quote(upload_name.encode("utf-8")), + ), + ) + + # cache for at least a day. + # XXX: we might want to turn this off for data we don't want to + # recommend caching as it's sensitive or private - or at least + # select private. don't bother setting Expires as all our + # clients are smart enough to be happy with Cache-Control + request.setHeader( + b"Cache-Control", b"public,max-age=86400,s-maxage=86400" + ) + + request.setHeader( + b"Content-Length", b"%d" % (file_size,) + ) From 1ee787912b1fda1f9255627b0ea5f69cf021679d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 8 Jan 2018 16:58:09 +0000 Subject: [PATCH 024/348] Add some helper classes --- synapse/rest/media/v1/_base.py | 73 ++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py index 57a4509816..1310820486 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py @@ -125,3 +125,76 @@ def add_file_headers(request, media_type, file_size, upload_name): request.setHeader( b"Content-Length", b"%d" % (file_size,) ) + + +@defer.inlineCallbacks +def respond_with_responder(request, responder, media_type, file_size, upload_name=None): + """Responds to the request with given responder. If responder is None then + returns 404. + + Args: + request (twisted.web.http.Request) + responder (Responder) + media_type (str): The media/content type. + file_size (int): Size in bytes of the media, if known. + upload_name (str): The name of the requested file, if any. + """ + if not responder: + respond_404(request) + return + + add_file_headers(request, media_type, file_size, upload_name) + yield responder.write_to_consumer(request) + finish_request(request) + + +class Responder(object): + """Represents a response that can be streamed to the requester. + + Either `write_to_consumer` or `cancel` must be called to clean up any open + resources. + """ + def write_to_consumer(self, consumer): + """Stream response into consumer + + Args: + consumer (IConsumer) + + Returns: + Deferred: Resolves once the response has finished being written + """ + pass + + def cancel(self): + """Called when the responder is not going to be used after all. + """ + pass + + +class FileInfo(object): + """Details about a requested/uploaded file. + + Attributes: + server_name (str): The server name where the media originated from, + or None if local. + file_id (str): The local ID of the file. For local files this is the + same as the media_id + media_type (str): Type of the file + url_cache (bool): If the file is for the url preview cache + thumbnail (bool): Whether the file is a thumbnail or not. + thumbnail_width (int) + thumbnail_height (int) + thumbnail_method (int) + thumbnail_type (str) + """ + def __init__(self, server_name, file_id, url_cache=False, + thumbnail=False, thumbnail_width=None, thumbnail_height=None, + thumbnail_method=None, thumbnail_type=None): + self.server_name = server_name + self.file_id = file_id + self.url_cache = url_cache + self.thumbnail = thumbnail + self.thumbnail_width = thumbnail_width + self.thumbnail_height = thumbnail_height + self.thumbnail_method = thumbnail_method + self.thumbnail_type = thumbnail_type From ada470bccbf08486ae9e48afbcc4ebfd8161e93b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 8 Jan 2018 17:07:30 +0000 Subject: [PATCH 025/348] Add MediaStorage class --- synapse/rest/media/v1/media_storage.py | 198 +++++++++++++++++++++++++ 1 file changed, 198 insertions(+) create mode 100644 synapse/rest/media/v1/media_storage.py diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py new file mode 100644 index 0000000000..052745e6f6 --- /dev/null +++ b/synapse/rest/media/v1/media_storage.py @@ -0,0 +1,198 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vecotr Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet import defer, threads +from twisted.protocols.basic import FileSender + +from ._base import Responder + +from synapse.util.logcontext import make_deferred_yieldable + +import contextlib +import os +import logging +import shutil + +logger = logging.getLogger(__name__) + + +class MediaStorage(object): + """Responsible for storing/fetching files from local sources. + """ + + def __init__(self, local_media_directory, filepaths): + self.local_media_directory = local_media_directory + self.filepaths = filepaths + + @defer.inlineCallbacks + def store_file(self, source, file_info): + """Write `source` to the on disk media store, and also any other + configured storage providers + + Args: + source: A file like object that should be written + file_info (FileInfo): Info about the file to store + + Returns: + Deferred[str]: the file path written to in the primary media store + """ + path = self._file_info_to_path(file_info) + fname = os.path.join(self.local_media_directory, path) + + dirname = os.path.dirname(fname) + if not os.path.exists(dirname): + os.makedirs(dirname) + + # Write to the main repository + yield make_deferred_yieldable(threads.deferToThread( + _write_file_synchronously, source, fname, + )) + + defer.returnValue(fname) + + @contextlib.contextmanager + def store_into_file(self, file_info): + """Context manager used to get a file like object to write into, as + described by file_info. + + Actually yields a 3-tuple (file, fname, finish_cb), where finish_cb is a + function that returns a Deferred that must be waited on after the file + has been successfully written to. + + Args: + file_info (FileInfo): Info about the file to store + + Example: + + with media_storage.store_into_file(info) as (f, fname, finish_cb): + # .. write into f ... + yield finish_cb() + """ + + path = self._file_info_to_path(file_info) + fname = os.path.join(self.local_media_directory, path) + + dirname = os.path.dirname(fname) + if not os.path.exists(dirname): + os.makedirs(dirname) + + finished_called = [False] + + def finish(): + # This will be used later when we want to hit out to other storage + # places + finished_called[0] = True + return defer.succeed(None) + + try: + with open(fname, "wb") as f: + yield f, fname, finish + except Exception as e: + try: + os.remove(fname) + except Exception: + pass + raise e + + if not finished_called: + raise Exception("Fnished callback not called") + + @defer.inlineCallbacks + def fetch_media(self, file_info): + """Attempts to fetch media described by file_info from the local cache + and configured storage providers. + + Args: + file_info (FileInfo) + + Returns: + Deferred(Responder): Returns a Responder if the file was found, + otherwise None. + """ + + path = self._file_info_to_path(file_info) + local_path = os.path.join(self.local_media_directory, path) + if os.path.exists(local_path): + defer.returnValue(FileResponder(open(local_path, "rb"))) + + defer.returnValue(None) + + def _file_info_to_path(self, file_info): + """Converts file_info into a relative path. + """ + if file_info.url_cache: + return self.filepaths.url_cache_filepath_rel(file_info.file_id) + + if file_info.server_name: + if file_info.thumbnail: + return self.filepaths.remote_media_thumbnail_rel( + server_name=file_info.server_name, + file_id=file_info.file_id, + width=file_info.thumbnail_width, + height=file_info.thumbnail_height, + content_type=file_info.thumbnail_type, + method=file_info.thumbnail_method + ) + return self.filepaths.remote_media_filepath_rel( + file_info.server_name, file_info.file_id, + ) + + if file_info.thumbnail: + return self.filepaths.local_media_thumbnail_rel( + media_id=file_info.file_id, + width=file_info.thumbnail_width, + height=file_info.thumbnail_height, + content_type=file_info.thumbnail_type, + method=file_info.thumbnail_method + ) + return self.filepaths.local_media_filepath_rel( + file_info.file_id, + ) + + +def _write_file_synchronously(source, fname): + """Write `source` to the path `fname` synchronously. Should be called + from a thread. + + Args: + source: A file like object to be written + fname (str): Path to write to + """ + dirname = os.path.dirname(fname) + if not os.path.exists(dirname): + os.makedirs(dirname) + + source.seek(0) # Ensure we read from the start of the file + with open(fname, "wb") as f: + shutil.copyfileobj(source, f) + + +class FileResponder(Responder): + """Wraps an open file that can be sent to a request. + + Args: + open_file (file): A file like object to be streamed ot the client, + is closed when finished streaming. + """ + def __init__(self, open_file): + self.open_file = open_file + + @defer.inlineCallbacks + def write_to_consumer(self, consumer): + with self.open_file: + yield FileSender().beginFileTransfer(self.open_file, consumer) + + def cancel(self): + self.open_file.close() From dd3092c3a357be2453e25293b5590d627f3cfc48 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 8 Jan 2018 17:45:11 +0000 Subject: [PATCH 026/348] Use MediaStorage for local files --- synapse/rest/media/v1/download_resource.py | 24 +-- synapse/rest/media/v1/media_repository.py | 168 +++++++++------------ 2 files changed, 73 insertions(+), 119 deletions(-) diff --git a/synapse/rest/media/v1/download_resource.py b/synapse/rest/media/v1/download_resource.py index 6879249c8a..2a5afa9ff4 100644 --- a/synapse/rest/media/v1/download_resource.py +++ b/synapse/rest/media/v1/download_resource.py @@ -57,34 +57,12 @@ class DownloadResource(Resource): ) server_name, media_id, name = parse_media_id(request) if server_name == self.server_name: - yield self._respond_local_file(request, media_id, name) + yield self.media_repo.get_local_media(request, media_id, name) else: yield self._respond_remote_file( request, server_name, media_id, name ) - @defer.inlineCallbacks - def _respond_local_file(self, request, media_id, name): - media_info = yield self.store.get_local_media(media_id) - if not media_info or media_info["quarantined_by"]: - respond_404(request) - return - - media_type = media_info["media_type"] - media_length = media_info["media_length"] - upload_name = name if name else media_info["upload_name"] - if media_info["url_cache"]: - # TODO: Check the file still exists, if it doesn't we can redownload - # it from the url `media_info["url_cache"]` - file_path = self.filepaths.url_cache_filepath(media_id) - else: - file_path = self.filepaths.local_media_filepath(media_id) - - yield respond_with_file( - request, media_type, file_path, media_length, - upload_name=upload_name, - ) - @defer.inlineCallbacks def _respond_remote_file(self, request, server_name, media_id, name): # don't forward requests for remote media if allow_remote is false diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index eed9056a2f..6ad9320b69 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vecotr Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -18,6 +19,7 @@ import twisted.internet.error import twisted.web.http from twisted.web.resource import Resource +from ._base import respond_404, RequestWriter, FileInfo, respond_with_responder from .upload_resource import UploadResource from .download_resource import DownloadResource from .thumbnail_resource import ThumbnailResource @@ -25,6 +27,7 @@ from .identicon_resource import IdenticonResource from .preview_url_resource import PreviewUrlResource from .filepath import MediaFilePaths from .thumbnailer import Thumbnailer +from .media_storage import MediaStorage from synapse.http.matrixfederationclient import MatrixFederationHttpClient from synapse.util.stringutils import random_string @@ -33,7 +36,7 @@ from synapse.api.errors import SynapseError, HttpResponseException, \ from synapse.util.async import Linearizer from synapse.util.stringutils import is_ascii -from synapse.util.logcontext import make_deferred_yieldable, preserve_fn +from synapse.util.logcontext import make_deferred_yieldable from synapse.util.retryutils import NotRetryingDestination import os @@ -74,6 +77,8 @@ class MediaRepository(object): self.recently_accessed_remotes = set() + self.media_storage = MediaStorage(self.primary_base_path, self.filepaths) + self.clock.looping_call( self._update_recently_accessed_remotes, UPDATE_RECENTLY_ACCESSED_REMOTES_TS @@ -88,72 +93,6 @@ class MediaRepository(object): media, self.clock.time_msec() ) - @staticmethod - def _makedirs(filepath): - dirname = os.path.dirname(filepath) - if not os.path.exists(dirname): - os.makedirs(dirname) - - @staticmethod - def _write_file_synchronously(source, fname): - """Write `source` to the path `fname` synchronously. Should be called - from a thread. - - Args: - source: A file like object to be written - fname (str): Path to write to - """ - MediaRepository._makedirs(fname) - source.seek(0) # Ensure we read from the start of the file - with open(fname, "wb") as f: - shutil.copyfileobj(source, f) - - @defer.inlineCallbacks - def write_to_file_and_backup(self, source, path): - """Write `source` to the on disk media store, and also the backup store - if configured. - - Args: - source: A file like object that should be written - path (str): Relative path to write file to - - Returns: - Deferred[str]: the file path written to in the primary media store - """ - fname = os.path.join(self.primary_base_path, path) - - # Write to the main repository - yield make_deferred_yieldable(threads.deferToThread( - self._write_file_synchronously, source, fname, - )) - - # Write to backup repository - yield self.copy_to_backup(path) - - defer.returnValue(fname) - - @defer.inlineCallbacks - def copy_to_backup(self, path): - """Copy a file from the primary to backup media store, if configured. - - Args: - path(str): Relative path to write file to - """ - if self.backup_base_path: - primary_fname = os.path.join(self.primary_base_path, path) - backup_fname = os.path.join(self.backup_base_path, path) - - # We can either wait for successful writing to the backup repository - # or write in the background and immediately return - if self.synchronous_backup_media_store: - yield make_deferred_yieldable(threads.deferToThread( - shutil.copyfile, primary_fname, backup_fname, - )) - else: - preserve_fn(threads.deferToThread)( - shutil.copyfile, primary_fname, backup_fname, - ) - @defer.inlineCallbacks def create_content(self, media_type, upload_name, content, content_length, auth_user): @@ -171,10 +110,13 @@ class MediaRepository(object): """ media_id = random_string(24) - fname = yield self.write_to_file_and_backup( - content, self.filepaths.local_media_filepath_rel(media_id) + file_info = FileInfo( + server_name=None, + file_id=media_id, ) + fname = yield self.media_storage.store_file(content, file_info) + logger.info("Stored local media in file %r", fname) yield self.store.store_local_media( @@ -194,6 +136,30 @@ class MediaRepository(object): defer.returnValue("mxc://%s/%s" % (self.server_name, media_id)) + @defer.inlineCallbacks + def get_local_media(self, request, media_id, name): + """Responds to reqests for local media, if exists, or returns 404. + """ + media_info = yield self.store.get_local_media(media_id) + if not media_info or media_info["quarantined_by"]: + respond_404(request) + return + + media_type = media_info["media_type"] + media_length = media_info["media_length"] + upload_name = name if name else media_info["upload_name"] + url_cache = media_info["url_cache"] + + file_info = FileInfo( + None, media_id, + url_cache=url_cache, + ) + + responder = yield self.media_storage.fetch_media(file_info) + yield respond_with_responder( + request, responder, media_type, media_length, upload_name, + ) + @defer.inlineCallbacks def get_remote_media(self, server_name, media_id): key = (server_name, media_id) @@ -368,11 +334,18 @@ class MediaRepository(object): if t_byte_source: try: - output_path = yield self.write_to_file_and_backup( - t_byte_source, - self.filepaths.local_media_thumbnail_rel( - media_id, t_width, t_height, t_type, t_method - ) + file_info = FileInfo( + server_name=None, + file_id=media_id, + thumbnail=True, + thumbnail_width=t_width, + thumbnail_height=t_height, + thumbnail_method=t_method, + thumbnail_type=t_type, + ) + + output_path = yield self.media_storage.store_file( + t_byte_source, file_info, ) finally: t_byte_source.close() @@ -400,11 +373,18 @@ class MediaRepository(object): if t_byte_source: try: - output_path = yield self.write_to_file_and_backup( - t_byte_source, - self.filepaths.remote_media_thumbnail_rel( - server_name, file_id, t_width, t_height, t_type, t_method - ) + file_info = FileInfo( + server_name=server_name, + file_id=media_id, + thumbnail=True, + thumbnail_width=t_width, + thumbnail_height=t_height, + thumbnail_method=t_method, + thumbnail_type=t_type, + ) + + output_path = yield self.media_storage.store_file( + t_byte_source, file_info, ) finally: t_byte_source.close() @@ -472,20 +452,6 @@ class MediaRepository(object): # Now we generate the thumbnails for each dimension, store it for (t_width, t_height, t_type), t_method in thumbnails.iteritems(): - # Work out the correct file name for thumbnail - if server_name: - file_path = self.filepaths.remote_media_thumbnail_rel( - server_name, file_id, t_width, t_height, t_type, t_method - ) - elif url_cache: - file_path = self.filepaths.url_cache_thumbnail_rel( - media_id, t_width, t_height, t_type, t_method - ) - else: - file_path = self.filepaths.local_media_thumbnail_rel( - media_id, t_width, t_height, t_type, t_method - ) - # Generate the thumbnail if t_method == "crop": t_byte_source = yield make_deferred_yieldable(threads.deferToThread( @@ -505,9 +471,19 @@ class MediaRepository(object): continue try: - # Write to disk - output_path = yield self.write_to_file_and_backup( - t_byte_source, file_path, + file_info = FileInfo( + server_name=server_name, + file_id=media_id, + thumbnail=True, + thumbnail_width=t_width, + thumbnail_height=t_height, + thumbnail_method=t_method, + thumbnail_type=t_type, + url_cache=url_cache, + ) + + output_path = yield self.media_storage.store_file( + t_byte_source, file_info, ) finally: t_byte_source.close() From 9e20840e0296eb9b814bb0e7130342c5c9a19e3d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 8 Jan 2018 17:52:06 +0000 Subject: [PATCH 027/348] Use MediaStorage for remote media --- synapse/rest/media/v1/download_resource.py | 43 +--- synapse/rest/media/v1/media_repository.py | 245 ++++++++++++--------- 2 files changed, 154 insertions(+), 134 deletions(-) diff --git a/synapse/rest/media/v1/download_resource.py b/synapse/rest/media/v1/download_resource.py index 2a5afa9ff4..5dc92972cf 100644 --- a/synapse/rest/media/v1/download_resource.py +++ b/synapse/rest/media/v1/download_resource.py @@ -14,7 +14,7 @@ # limitations under the License. import synapse.http.servlet -from ._base import parse_media_id, respond_with_file, respond_404 +from ._base import parse_media_id, respond_404 from twisted.web.resource import Resource from synapse.http.server import request_handler, set_cors_headers @@ -59,35 +59,14 @@ class DownloadResource(Resource): if server_name == self.server_name: yield self.media_repo.get_local_media(request, media_id, name) else: - yield self._respond_remote_file( - request, server_name, media_id, name - ) + allow_remote = synapse.http.servlet.parse_boolean( + request, "allow_remote", default=True) + if not allow_remote: + logger.info( + "Rejecting request for remote media %s/%s due to allow_remote", + server_name, media_id, + ) + respond_404(request) + return - @defer.inlineCallbacks - def _respond_remote_file(self, request, server_name, media_id, name): - # don't forward requests for remote media if allow_remote is false - allow_remote = synapse.http.servlet.parse_boolean( - request, "allow_remote", default=True) - if not allow_remote: - logger.info( - "Rejecting request for remote media %s/%s due to allow_remote", - server_name, media_id, - ) - respond_404(request) - return - - media_info = yield self.media_repo.get_remote_media(server_name, media_id) - - media_type = media_info["media_type"] - media_length = media_info["media_length"] - filesystem_id = media_info["filesystem_id"] - upload_name = name if name else media_info["upload_name"] - - file_path = self.filepaths.remote_media_filepath( - server_name, filesystem_id - ) - - yield respond_with_file( - request, media_type, file_path, media_length, - upload_name=upload_name, - ) + yield self.media_repo.get_remote_media(request, server_name, media_id, name) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 6ad9320b69..07820fab62 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -19,7 +19,7 @@ import twisted.internet.error import twisted.web.http from twisted.web.resource import Resource -from ._base import respond_404, RequestWriter, FileInfo, respond_with_responder +from ._base import respond_404, FileInfo, respond_with_responder from .upload_resource import UploadResource from .download_resource import DownloadResource from .thumbnail_resource import ThumbnailResource @@ -161,124 +161,165 @@ class MediaRepository(object): ) @defer.inlineCallbacks - def get_remote_media(self, server_name, media_id): + def get_remote_media(self, request, server_name, media_id, name): + """Respond to requests for remote media. + """ + self.recently_accessed_remotes.add((server_name, media_id)) + + # We linearize here to ensure that we don't try and download remote + # media mutliple times concurrently key = (server_name, media_id) with (yield self.remote_media_linearizer.queue(key)): - media_info = yield self._get_remote_media_impl(server_name, media_id) - defer.returnValue(media_info) + responder, media_info = yield self._get_remote_media_impl( + server_name, media_id, + ) + + # We purposefully stream the file outside the lock + if responder: + media_type = media_info["media_type"] + media_length = media_info["media_length"] + upload_name = name if name else media_info["upload_name"] + yield respond_with_responder( + request, responder, media_type, media_length, upload_name, + ) + else: + respond_404(request) @defer.inlineCallbacks def _get_remote_media_impl(self, server_name, media_id): + """Looks for media in local cache, if not there then attempt to + download from remote server. + + Returns: + Deferred((Respodner, media_info)) + """ media_info = yield self.store.get_cached_remote_media( server_name, media_id ) - if not media_info: - media_info = yield self._download_remote_file( - server_name, media_id - ) - elif media_info["quarantined_by"]: - raise NotFoundError() + + # file_id is the ID we use to track the file locally. If we've already + # seen the file then reuse the existing ID, otherwise genereate a new + # one. + if media_info: + file_id = media_info["filesystem_id"] else: - self.recently_accessed_remotes.add((server_name, media_id)) - yield self.store.update_cached_last_access_time( - [(server_name, media_id)], self.clock.time_msec() - ) - defer.returnValue(media_info) + file_id = random_string(24) + + file_info = FileInfo(server_name, file_id) + + # If we have an entry in the DB, try and look for it + if media_info: + if media_info["quarantined_by"]: + raise NotFoundError() + + responder = yield self.media_storage.fetch_media(file_info) + if responder: + defer.returnValue((responder, media_info)) + + # Failed to find the file anywhere, lets download it. + + media_info = yield self._download_remote_file( + server_name, media_id, file_id + ) + + responder = yield self.media_storage.fetch_media(file_info) + if responder: + defer.returnValue((responder, media_info)) + + defer.returnValue((None, media_info)) @defer.inlineCallbacks - def _download_remote_file(self, server_name, media_id): - file_id = random_string(24) + def _download_remote_file(self, server_name, media_id, file_id): + """Attempt to download the remote file from the given server name, + using the given file_id as the local id. + """ - fpath = self.filepaths.remote_media_filepath_rel( - server_name, file_id + file_info = FileInfo( + server_name=server_name, + file_id=file_id, ) - fname = os.path.join(self.primary_base_path, fpath) - self._makedirs(fname) - try: - with open(fname, "wb") as f: - request_path = "/".join(( - "/_matrix/media/v1/download", server_name, media_id, - )) + with self.media_storage.store_into_file(file_info) as (f, fname, finish): + request_path = "/".join(( + "/_matrix/media/v1/download", server_name, media_id, + )) + try: + length, headers = yield self.client.get_file( + server_name, request_path, output_stream=f, + max_size=self.max_upload_size, args={ + # tell the remote server to 404 if it doesn't + # recognise the server_name, to make sure we don't + # end up with a routing loop. + "allow_remote": "false", + } + ) + except twisted.internet.error.DNSLookupError as e: + logger.warn("HTTP error fetching remote media %s/%s: %r", + server_name, media_id, e) + raise NotFoundError() + + except HttpResponseException as e: + logger.warn("HTTP error fetching remote media %s/%s: %s", + server_name, media_id, e.response) + if e.code == twisted.web.http.NOT_FOUND: + raise SynapseError.from_http_response_exception(e) + raise SynapseError(502, "Failed to fetch remote media") + + except SynapseError: + logger.exception("Failed to fetch remote media %s/%s", + server_name, media_id) + raise + except NotRetryingDestination: + logger.warn("Not retrying destination %r", server_name) + raise SynapseError(502, "Failed to fetch remote media") + except Exception: + logger.exception("Failed to fetch remote media %s/%s", + server_name, media_id) + raise SynapseError(502, "Failed to fetch remote media") + + yield finish() + + media_type = headers["Content-Type"][0] + + time_now_ms = self.clock.time_msec() + + content_disposition = headers.get("Content-Disposition", None) + if content_disposition: + _, params = cgi.parse_header(content_disposition[0],) + upload_name = None + + # First check if there is a valid UTF-8 filename + upload_name_utf8 = params.get("filename*", None) + if upload_name_utf8: + if upload_name_utf8.lower().startswith("utf-8''"): + upload_name = upload_name_utf8[7:] + + # If there isn't check for an ascii name. + if not upload_name: + upload_name_ascii = params.get("filename", None) + if upload_name_ascii and is_ascii(upload_name_ascii): + upload_name = upload_name_ascii + + if upload_name: + upload_name = urlparse.unquote(upload_name) try: - length, headers = yield self.client.get_file( - server_name, request_path, output_stream=f, - max_size=self.max_upload_size, args={ - # tell the remote server to 404 if it doesn't - # recognise the server_name, to make sure we don't - # end up with a routing loop. - "allow_remote": "false", - } - ) - except twisted.internet.error.DNSLookupError as e: - logger.warn("HTTP error fetching remote media %s/%s: %r", - server_name, media_id, e) - raise NotFoundError() + upload_name = upload_name.decode("utf-8") + except UnicodeDecodeError: + upload_name = None + else: + upload_name = None - except HttpResponseException as e: - logger.warn("HTTP error fetching remote media %s/%s: %s", - server_name, media_id, e.response) - if e.code == twisted.web.http.NOT_FOUND: - raise SynapseError.from_http_response_exception(e) - raise SynapseError(502, "Failed to fetch remote media") + logger.info("Stored remote media in file %r", fname) - except SynapseError: - logger.exception("Failed to fetch remote media %s/%s", - server_name, media_id) - raise - except NotRetryingDestination: - logger.warn("Not retrying destination %r", server_name) - raise SynapseError(502, "Failed to fetch remote media") - except Exception: - logger.exception("Failed to fetch remote media %s/%s", - server_name, media_id) - raise SynapseError(502, "Failed to fetch remote media") - - yield self.copy_to_backup(fpath) - - media_type = headers["Content-Type"][0] - time_now_ms = self.clock.time_msec() - - content_disposition = headers.get("Content-Disposition", None) - if content_disposition: - _, params = cgi.parse_header(content_disposition[0],) - upload_name = None - - # First check if there is a valid UTF-8 filename - upload_name_utf8 = params.get("filename*", None) - if upload_name_utf8: - if upload_name_utf8.lower().startswith("utf-8''"): - upload_name = upload_name_utf8[7:] - - # If there isn't check for an ascii name. - if not upload_name: - upload_name_ascii = params.get("filename", None) - if upload_name_ascii and is_ascii(upload_name_ascii): - upload_name = upload_name_ascii - - if upload_name: - upload_name = urlparse.unquote(upload_name) - try: - upload_name = upload_name.decode("utf-8") - except UnicodeDecodeError: - upload_name = None - else: - upload_name = None - - logger.info("Stored remote media in file %r", fname) - - yield self.store.store_cached_remote_media( - origin=server_name, - media_id=media_id, - media_type=media_type, - time_now_ms=self.clock.time_msec(), - upload_name=upload_name, - media_length=length, - filesystem_id=file_id, - ) - except Exception: - os.remove(fname) - raise + yield self.store.store_cached_remote_media( + origin=server_name, + media_id=media_id, + media_type=media_type, + time_now_ms=self.clock.time_msec(), + upload_name=upload_name, + media_length=length, + filesystem_id=file_id, + ) media_info = { "media_type": media_type, From 9d30a7691c7a54bf2d299ea3cdbdf4af74dd0af5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 9 Jan 2018 11:08:46 +0000 Subject: [PATCH 028/348] Make ThumbnailResource use MediaStorage --- synapse/rest/media/v1/media_repository.py | 4 +- synapse/rest/media/v1/thumbnail_resource.py | 110 ++++++++++++-------- 2 files changed, 67 insertions(+), 47 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 07820fab62..0c84f1be07 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -637,7 +637,9 @@ class MediaRepositoryResource(Resource): self.putChild("upload", UploadResource(hs, media_repo)) self.putChild("download", DownloadResource(hs, media_repo)) - self.putChild("thumbnail", ThumbnailResource(hs, media_repo)) + self.putChild("thumbnail", ThumbnailResource( + hs, media_repo, media_repo.media_storage, + )) self.putChild("identicon", IdenticonResource()) if hs.config.url_preview_enabled: self.putChild("preview_url", PreviewUrlResource(hs, media_repo)) diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index 70dbf7f5c9..f59f300665 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -14,7 +14,10 @@ # limitations under the License. -from ._base import parse_media_id, respond_404, respond_with_file +from ._base import ( + parse_media_id, respond_404, respond_with_file, FileInfo, + respond_with_responder, +) from twisted.web.resource import Resource from synapse.http.servlet import parse_string, parse_integer from synapse.http.server import request_handler, set_cors_headers @@ -30,12 +33,12 @@ logger = logging.getLogger(__name__) class ThumbnailResource(Resource): isLeaf = True - def __init__(self, hs, media_repo): + def __init__(self, hs, media_repo, media_storage): Resource.__init__(self) self.store = hs.get_datastore() - self.filepaths = media_repo.filepaths self.media_repo = media_repo + self.media_storage = media_storage self.dynamic_thumbnails = hs.config.dynamic_thumbnails self.server_name = hs.hostname self.version_string = hs.version_string @@ -91,23 +94,22 @@ class ThumbnailResource(Resource): thumbnail_info = self._select_thumbnail( width, height, method, m_type, thumbnail_infos ) - t_width = thumbnail_info["thumbnail_width"] - t_height = thumbnail_info["thumbnail_height"] - t_type = thumbnail_info["thumbnail_type"] - t_method = thumbnail_info["thumbnail_method"] - if media_info["url_cache"]: - # TODO: Check the file still exists, if it doesn't we can redownload - # it from the url `media_info["url_cache"]` - file_path = self.filepaths.url_cache_thumbnail( - media_id, t_width, t_height, t_type, t_method, - ) - else: - file_path = self.filepaths.local_media_thumbnail( - media_id, t_width, t_height, t_type, t_method, - ) - yield respond_with_file(request, t_type, file_path) + file_info = FileInfo( + server_name=None, file_id=media_id, + url_cache=media_info["url_cache"], + thumbnail=True, + thumbnail_width=thumbnail_info["thumbnail_width"], + thumbnail_height=thumbnail_info["thumbnail_height"], + thumbnail_type=thumbnail_info["thumbnail_type"], + thumbnail_method=thumbnail_info["thumbnail_method"], + ) + t_type = file_info.thumbnail_type + t_length = thumbnail_info["thumbnail_length"] + + responder = yield self.media_storage.fetch_media(file_info) + yield respond_with_responder(request, responder, t_type, t_length) else: respond_404(request) @@ -129,20 +131,23 @@ class ThumbnailResource(Resource): t_type = info["thumbnail_type"] == desired_type if t_w and t_h and t_method and t_type: - if media_info["url_cache"]: - # TODO: Check the file still exists, if it doesn't we can redownload - # it from the url `media_info["url_cache"]` - file_path = self.filepaths.url_cache_thumbnail( - media_id, desired_width, desired_height, desired_type, - desired_method, - ) - else: - file_path = self.filepaths.local_media_thumbnail( - media_id, desired_width, desired_height, desired_type, - desired_method, - ) - yield respond_with_file(request, desired_type, file_path) - return + file_info = FileInfo( + server_name=None, file_id=media_id, + url_cache=media_info["url_cache"], + thumbnail=True, + thumbnail_width=info["thumbnail_width"], + thumbnail_height=info["thumbnail_height"], + thumbnail_type=info["thumbnail_type"], + thumbnail_method=info["thumbnail_method"], + ) + + t_type = file_info.thumbnail_type + t_length = info["thumbnail_length"] + + responder = yield self.media_storage.fetch_media(file_info) + if responder: + yield respond_with_responder(request, responder, t_type, t_length) + return logger.debug("We don't have a local thumbnail of that size. Generating") @@ -175,12 +180,22 @@ class ThumbnailResource(Resource): t_type = info["thumbnail_type"] == desired_type if t_w and t_h and t_method and t_type: - file_path = self.filepaths.remote_media_thumbnail( - server_name, file_id, desired_width, desired_height, - desired_type, desired_method, + file_info = FileInfo( + server_name=None, file_id=media_id, + thumbnail=True, + thumbnail_width=info["thumbnail_width"], + thumbnail_height=info["thumbnail_height"], + thumbnail_type=info["thumbnail_type"], + thumbnail_method=info["thumbnail_method"], ) - yield respond_with_file(request, desired_type, file_path) - return + + t_type = file_info.thumbnail_type + t_length = info["thumbnail_length"] + + responder = yield self.media_storage.fetch_media(file_info) + if responder: + yield respond_with_responder(request, responder, t_type, t_length) + return logger.debug("We don't have a local thumbnail of that size. Generating") @@ -206,17 +221,20 @@ class ThumbnailResource(Resource): thumbnail_info = self._select_thumbnail( width, height, method, m_type, thumbnail_infos ) - t_width = thumbnail_info["thumbnail_width"] - t_height = thumbnail_info["thumbnail_height"] - t_type = thumbnail_info["thumbnail_type"] - t_method = thumbnail_info["thumbnail_method"] - file_id = thumbnail_info["filesystem_id"] + file_info = FileInfo( + server_name=None, file_id=media_id, + thumbnail=True, + thumbnail_width=thumbnail_info["thumbnail_width"], + thumbnail_height=thumbnail_info["thumbnail_height"], + thumbnail_type=thumbnail_info["thumbnail_type"], + thumbnail_method=thumbnail_info["thumbnail_method"], + ) + + t_type = file_info.thumbnail_type t_length = thumbnail_info["thumbnail_length"] - file_path = self.filepaths.remote_media_thumbnail( - server_name, file_id, t_width, t_height, t_type, t_method, - ) - yield respond_with_file(request, t_type, file_path, t_length) + responder = yield self.media_storage.fetch_media(file_info) + yield respond_with_responder(request, responder, t_type, t_length) else: respond_404(request) From 2442e9876c2622e345ab62a414e509d4f5cecb4f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 9 Jan 2018 14:36:07 +0000 Subject: [PATCH 029/348] Make PreviewUrlResource use MediaStorage --- synapse/rest/media/v1/media_repository.py | 4 +++- synapse/rest/media/v1/preview_url_resource.py | 18 +++++++++++------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 0c84f1be07..9aba9f13f0 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -642,4 +642,6 @@ class MediaRepositoryResource(Resource): )) self.putChild("identicon", IdenticonResource()) if hs.config.url_preview_enabled: - self.putChild("preview_url", PreviewUrlResource(hs, media_repo)) + self.putChild("preview_url", PreviewUrlResource( + hs, media_repo, media_repo.media_storage, + )) diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 40d2e664eb..f3dbbb3fec 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -17,6 +17,8 @@ from twisted.web.server import NOT_DONE_YET from twisted.internet import defer from twisted.web.resource import Resource +from ._base import FileInfo + from synapse.api.errors import ( SynapseError, Codes, ) @@ -49,7 +51,7 @@ logger = logging.getLogger(__name__) class PreviewUrlResource(Resource): isLeaf = True - def __init__(self, hs, media_repo): + def __init__(self, hs, media_repo, media_storage): Resource.__init__(self) self.auth = hs.get_auth() @@ -62,6 +64,7 @@ class PreviewUrlResource(Resource): self.client = SpiderHttpClient(hs) self.media_repo = media_repo self.primary_base_path = media_repo.primary_base_path + self.media_storage = media_storage self.url_preview_url_blacklist = hs.config.url_preview_url_blacklist @@ -273,19 +276,21 @@ class PreviewUrlResource(Resource): file_id = datetime.date.today().isoformat() + '_' + random_string(16) - fpath = self.filepaths.url_cache_filepath_rel(file_id) - fname = os.path.join(self.primary_base_path, fpath) - self.media_repo._makedirs(fname) + file_info = FileInfo( + server_name=None, + file_id=file_id, + url_cache=True, + ) try: - with open(fname, "wb") as f: + with self.media_storage.store_into_file(file_info) as (f, fname, finish): logger.debug("Trying to get url '%s'" % url) length, headers, uri, code = yield self.client.get_file( url, output_stream=f, max_size=self.max_spider_size, ) # FIXME: pass through 404s and other error messages nicely - yield self.media_repo.copy_to_backup(fpath) + yield finish() media_type = headers["Content-Type"][0] time_now_ms = self.clock.time_msec() @@ -327,7 +332,6 @@ class PreviewUrlResource(Resource): ) except Exception as e: - os.remove(fname) raise SynapseError( 500, ("Failed to download content: %s" % e), Codes.UNKNOWN From 8f03aa9f61e1d99dfcde972f4e5f0f52919db49f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 8 Jan 2018 17:19:55 +0000 Subject: [PATCH 030/348] Add StorageProvider concept --- synapse/rest/media/v1/media_repository.py | 29 ++++- synapse/rest/media/v1/media_storage.py | 15 ++- synapse/rest/media/v1/storage_provider.py | 127 ++++++++++++++++++++++ 3 files changed, 162 insertions(+), 9 deletions(-) create mode 100644 synapse/rest/media/v1/storage_provider.py diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 9aba9f13f0..7938fe7bc8 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -27,6 +27,9 @@ from .identicon_resource import IdenticonResource from .preview_url_resource import PreviewUrlResource from .filepath import MediaFilePaths from .thumbnailer import Thumbnailer +from .storage_provider import ( + StorageProviderWrapper, FileStorageProviderBackend, +) from .media_storage import MediaStorage from synapse.http.matrixfederationclient import MatrixFederationHttpClient @@ -66,10 +69,6 @@ class MediaRepository(object): self.primary_base_path = hs.config.media_store_path self.filepaths = MediaFilePaths(self.primary_base_path) - self.backup_base_path = hs.config.backup_media_store_path - - self.synchronous_backup_media_store = hs.config.synchronous_backup_media_store - self.dynamic_thumbnails = hs.config.dynamic_thumbnails self.thumbnail_requirements = hs.config.thumbnail_requirements @@ -77,7 +76,27 @@ class MediaRepository(object): self.recently_accessed_remotes = set() - self.media_storage = MediaStorage(self.primary_base_path, self.filepaths) + # List of StorageProvider's where we should search for media and + # potentially upload to. + self.storage_providers = [] + + # TODO: Move this into config and allow other storage providers to be + # defined. + if hs.config.backup_media_store_path: + backend = FileStorageProviderBackend( + self.primary_base_path, hs.config.backup_media_store_path, + ) + provider = StorageProviderWrapper( + backend, + store=True, + store_synchronous=hs.config.synchronous_backup_media_store, + store_remote=True, + ) + self.storage_providers.append(provider) + + self.media_storage = MediaStorage( + self.primary_base_path, self.filepaths, self.storage_providers, + ) self.clock.looping_call( self._update_recently_accessed_remotes, diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py index 052745e6f6..a1ec6cadb1 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py @@ -32,9 +32,10 @@ class MediaStorage(object): """Responsible for storing/fetching files from local sources. """ - def __init__(self, local_media_directory, filepaths): + def __init__(self, local_media_directory, filepaths, storage_providers): self.local_media_directory = local_media_directory self.filepaths = filepaths + self.storage_providers = storage_providers @defer.inlineCallbacks def store_file(self, source, file_info): @@ -90,11 +91,12 @@ class MediaStorage(object): finished_called = [False] + @defer.inlineCallbacks def finish(): - # This will be used later when we want to hit out to other storage - # places + for provider in self.storage_providers: + yield provider.store_file(path, file_info) + finished_called[0] = True - return defer.succeed(None) try: with open(fname, "wb") as f: @@ -127,6 +129,11 @@ class MediaStorage(object): if os.path.exists(local_path): defer.returnValue(FileResponder(open(local_path, "rb"))) + for provider in self.storage_providers: + res = yield provider.fetch(path, file_info) + if res: + defer.returnValue(res) + defer.returnValue(None) def _file_info_to_path(self, file_info): diff --git a/synapse/rest/media/v1/storage_provider.py b/synapse/rest/media/v1/storage_provider.py new file mode 100644 index 0000000000..2ad602e101 --- /dev/null +++ b/synapse/rest/media/v1/storage_provider.py @@ -0,0 +1,127 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet import defer, threads + +from .media_storage import FileResponder + +from synapse.util.logcontext import preserve_fn + +import logging +import os +import shutil + + +logger = logging.getLogger(__name__) + + +class StorageProvider(object): + """A storage provider is a service that can store uploaded media and + retrieve them. + """ + def store_file(self, path, file_info): + """Store the file described by file_info. The actual contents can be + retrieved by reading the file in file_info.upload_path. + + Args: + path (str): Relative path of file in local cache + file_info (FileInfo) + + Returns: + Deferred + """ + pass + + def fetch(self, path, file_info): + """Attempt to fetch the file described by file_info and stream it + into writer. + + Args: + path (str): Relative path of file in local cache + file_info (FileInfo) + + Returns: + Deferred(Responder): Returns a Responder if the provider has the file, + otherwise returns None. + """ + pass + + +class StorageProviderWrapper(StorageProvider): + """Wraps a storage provider and provides various config options + + Args: + backend (StorageProvider) + store (bool): Whether to store new files or not. + store_synchronous (bool): Whether to wait for file to be successfully + uploaded, or todo the upload in the backgroud. + store_remote (bool): Whether remote media should be uploaded + """ + def __init__(self, backend, store, store_synchronous, store_remote): + self.backend = backend + self.store = store + self.store_synchronous = store_synchronous + self.store_remote = store_remote + + def store_file(self, path, file_info): + if not self.store: + return defer.succeed(None) + + if file_info.server_name and not self.store_remote: + return defer.succeed(None) + + if self.store_synchronous: + return self.backend.store_file(path, file_info) + else: + # TODO: Handle errors. + preserve_fn(self.backend.store_file)(path, file_info) + return defer.succeed(None) + + def fetch(self, path, file_info): + return self.backend.fetch(path, file_info) + + +class FileStorageProviderBackend(StorageProvider): + """A storage provider that stores files in a directory on a filesystem. + + Args: + cache_directory (str): Base path of the local media repository + base_directory (str): Base path to store new files + """ + + def __init__(self, cache_directory, base_directory): + self.cache_directory = cache_directory + self.base_directory = base_directory + + def store_file(self, path, file_info): + """See StorageProvider.store_file""" + + primary_fname = os.path.join(self.cache_directory, path) + backup_fname = os.path.join(self.base_directory, path) + + dirname = os.path.dirname(backup_fname) + if not os.path.exists(dirname): + os.makedirs(dirname) + + return threads.deferToThread( + shutil.copyfile, primary_fname, backup_fname, + ) + + def fetch(self, path, file_info): + """See StorageProvider.fetch""" + + backup_fname = os.path.join(self.base_directory, path) + if os.path.isfile(backup_fname): + return FileResponder(open(backup_fname, "rb")) From 19f9227643b5099666878de33453bbe361f216fc Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 9 Jan 2018 16:25:04 +0000 Subject: [PATCH 031/348] avoid 80s GIN inserts by tweaking work_mem see https://github.com/matrix-org/synapse/issues/2753 for details --- synapse/storage/search.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 479b04c636..7b1166f417 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -106,6 +106,7 @@ class SearchStore(BackgroundUpdateStore): event_search_rows.append((event_id, room_id, key, value)) if isinstance(self.database_engine, PostgresEngine): + txn.execute("SET work_mem='256KB'") sql = ( "INSERT INTO event_search (event_id, room_id, key, vector)" " VALUES (?,?,?,to_tsvector('english', ?))" @@ -123,6 +124,9 @@ class SearchStore(BackgroundUpdateStore): clump = event_search_rows[index:index + INSERT_CLUMP_SIZE] txn.executemany(sql, clump) + if isinstance(self.database_engine, PostgresEngine): + txn.execute("RESET work_mem") + progress = { "target_min_stream_id_inclusive": target_min_stream_id, "max_stream_id_exclusive": min_stream_id, From e365ad329f3c7e12bb2126217acbc62bdf0b9aec Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 9 Jan 2018 16:30:30 +0000 Subject: [PATCH 032/348] oops, tweak work_mem when actually storing --- synapse/storage/room.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 23688430b7..9e2bf1ab48 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -310,6 +310,7 @@ class RoomStore(SQLBaseStore): def _store_event_search_txn(self, txn, event, key, value): if isinstance(self.database_engine, PostgresEngine): + txn.execute("SET work_mem='256KB'") sql = ( "INSERT INTO event_search" " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)" @@ -323,6 +324,7 @@ class RoomStore(SQLBaseStore): event.origin_server_ts, ) ) + txn.execute("RESET work_mem") elif isinstance(self.database_engine, Sqlite3Engine): sql = ( "INSERT INTO event_search (event_id, room_id, key, value)" From e79db0a673ef79bfa30e435bf64b5c3b75ed98d9 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 9 Jan 2018 16:37:48 +0000 Subject: [PATCH 033/348] switch back from GIST to GIN indexes --- synapse/storage/search.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 479b04c636..ba7141563e 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -31,7 +31,7 @@ class SearchStore(BackgroundUpdateStore): EVENT_SEARCH_UPDATE_NAME = "event_search" EVENT_SEARCH_ORDER_UPDATE_NAME = "event_search_order" - EVENT_SEARCH_USE_GIST_POSTGRES_NAME = "event_search_postgres_gist" + EVENT_SEARCH_USE_GIN_POSTGRES_NAME = "event_search_postgres_gin" def __init__(self, db_conn, hs): super(SearchStore, self).__init__(db_conn, hs) @@ -43,8 +43,8 @@ class SearchStore(BackgroundUpdateStore): self._background_reindex_search_order ) self.register_background_update_handler( - self.EVENT_SEARCH_USE_GIST_POSTGRES_NAME, - self._background_reindex_gist_search + self.EVENT_SEARCH_USE_GIN_POSTGRES_NAME, + self._background_reindex_gin_search ) @defer.inlineCallbacks @@ -145,25 +145,30 @@ class SearchStore(BackgroundUpdateStore): defer.returnValue(result) @defer.inlineCallbacks - def _background_reindex_gist_search(self, progress, batch_size): + def _background_reindex_gin_search(self, progress, batch_size): + '''This handles old synapses which used GIST indexes; converting them + back to be GIN as per the actual schema. Otherwise it crashes out + as a NOOP + ''' + def create_index(conn): conn.rollback() conn.set_session(autocommit=True) c = conn.cursor() c.execute( - "CREATE INDEX CONCURRENTLY event_search_fts_idx_gist" - " ON event_search USING GIST (vector)" + "CREATE INDEX CONCURRENTLY event_search_fts_idx" + " ON event_search USING GIN (vector)" ) - c.execute("DROP INDEX event_search_fts_idx") + c.execute("DROP INDEX event_search_fts_idx_gist") conn.set_session(autocommit=False) if isinstance(self.database_engine, PostgresEngine): yield self.runWithConnection(create_index) - yield self._end_background_update(self.EVENT_SEARCH_USE_GIST_POSTGRES_NAME) + yield self._end_background_update(self.EVENT_SEARCH_USE_GIN_POSTGRES_NAME) defer.returnValue(1) @defer.inlineCallbacks From a66f489678dc05fa89e6849405c37a9a390e62fc Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 9 Jan 2018 16:55:51 +0000 Subject: [PATCH 034/348] fix GIST->GIN switch --- .../schema/delta/38/postgres_fts_gist.sql | 6 ++-- .../schema/delta/46/postgres_fts_gin.sql | 17 +++++++++++ synapse/storage/search.py | 28 +++++++++++-------- 3 files changed, 37 insertions(+), 14 deletions(-) create mode 100644 synapse/storage/schema/delta/46/postgres_fts_gin.sql diff --git a/synapse/storage/schema/delta/38/postgres_fts_gist.sql b/synapse/storage/schema/delta/38/postgres_fts_gist.sql index f090a7b75a..5fe27d6877 100644 --- a/synapse/storage/schema/delta/38/postgres_fts_gist.sql +++ b/synapse/storage/schema/delta/38/postgres_fts_gist.sql @@ -13,5 +13,7 @@ * limitations under the License. */ - INSERT into background_updates (update_name, progress_json) - VALUES ('event_search_postgres_gist', '{}'); +-- We no longer do this given we back it out again in schema 46 + +-- INSERT into background_updates (update_name, progress_json) +-- VALUES ('event_search_postgres_gist', '{}'); diff --git a/synapse/storage/schema/delta/46/postgres_fts_gin.sql b/synapse/storage/schema/delta/46/postgres_fts_gin.sql new file mode 100644 index 0000000000..31d7a817eb --- /dev/null +++ b/synapse/storage/schema/delta/46/postgres_fts_gin.sql @@ -0,0 +1,17 @@ +/* Copyright 2018 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +INSERT into background_updates (update_name, progress_json) + VALUES ('event_search_postgres_gin', '{}'); diff --git a/synapse/storage/search.py b/synapse/storage/search.py index ba7141563e..d3e76b58d6 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -146,24 +146,28 @@ class SearchStore(BackgroundUpdateStore): @defer.inlineCallbacks def _background_reindex_gin_search(self, progress, batch_size): - '''This handles old synapses which used GIST indexes; converting them - back to be GIN as per the actual schema. Otherwise it crashes out - as a NOOP + '''This handles old synapses which used GIST indexes, if any; + converting them back to be GIN as per the actual schema. ''' def create_index(conn): - conn.rollback() - conn.set_session(autocommit=True) - c = conn.cursor() + try: + conn.rollback() + conn.set_session(autocommit=True) + c = conn.cursor() - c.execute( - "CREATE INDEX CONCURRENTLY event_search_fts_idx" - " ON event_search USING GIN (vector)" - ) + c.execute( + "CREATE INDEX CONCURRENTLY event_search_fts_idx" + " ON event_search USING GIN (vector)" + ) - c.execute("DROP INDEX event_search_fts_idx_gist") + c.execute("DROP INDEX event_search_fts_idx_gist") - conn.set_session(autocommit=False) + conn.set_session(autocommit=False) + except e: + logger.warn( + "Ignoring error %s when trying to switch from GIST to GIN" % (e,) + ) if isinstance(self.database_engine, PostgresEngine): yield self.runWithConnection(create_index) From 174eacc8ba71015003a78594ebc89cbe45d8384a Mon Sep 17 00:00:00 2001 From: hera Date: Tue, 9 Jan 2018 18:06:30 +0000 Subject: [PATCH 035/348] oops --- synapse/storage/room.py | 2 +- synapse/storage/search.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 9e2bf1ab48..0604f8f270 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -310,7 +310,7 @@ class RoomStore(SQLBaseStore): def _store_event_search_txn(self, txn, event, key, value): if isinstance(self.database_engine, PostgresEngine): - txn.execute("SET work_mem='256KB'") + txn.execute("SET work_mem='256kB'") sql = ( "INSERT INTO event_search" " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)" diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 7b1166f417..f52f3c8592 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -106,7 +106,7 @@ class SearchStore(BackgroundUpdateStore): event_search_rows.append((event_id, room_id, key, value)) if isinstance(self.database_engine, PostgresEngine): - txn.execute("SET work_mem='256KB'") + txn.execute("SET work_mem='256kB'") sql = ( "INSERT INTO event_search (event_id, room_id, key, vector)" " VALUES (?,?,?,to_tsvector('english', ?))" From d90e8ea444092f500c721567fe055d48eb110528 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 9 Jan 2018 18:27:35 +0000 Subject: [PATCH 036/348] Update http request metrics before calling servlet Make sure that we set the servlet name in the metrics object *before* calling the servlet, in case the servlet throws an exception. --- synapse/http/server.py | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/synapse/http/server.py b/synapse/http/server.py index 25466cd292..6e8f4c9c5f 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -107,6 +107,10 @@ def wrap_request_handler(request_handler, include_metrics=False): with LoggingContext(request_id) as request_context: with Measure(self.clock, "wrapped_request_handler"): request_metrics = RequestMetrics() + # we start the request metrics timer here with an initial stab + # at the servlet name. For most requests that name will be + # JsonResource (or a subclass), and JsonResource._async_render + # will update it once it picks a servlet. request_metrics.start(self.clock, name=self.__class__.__name__) request_context.request = request_id @@ -249,12 +253,23 @@ class JsonResource(HttpServer, resource.Resource): if not m: continue - # We found a match! Trigger callback and then return the - # returned response. We pass both the request and any - # matched groups from the regex to the callback. + # We found a match! First update the metrics object to indicate + # which servlet is handling the request. callback = path_entry.callback + servlet_instance = getattr(callback, "__self__", None) + if servlet_instance is not None: + servlet_classname = servlet_instance.__class__.__name__ + else: + servlet_classname = "%r" % callback + + request_metrics.name = servlet_classname + + # Now trigger the callback. If it returns a response, we send it + # here. If it throws an exception, that is handled by the wrapper + # installed by @request_handler. + kwargs = intern_dict({ name: urllib.unquote(value).decode("UTF-8") if value else value for name, value in m.groupdict().items() @@ -265,17 +280,10 @@ class JsonResource(HttpServer, resource.Resource): code, response = callback_return self._send_response(request, code, response) - servlet_instance = getattr(callback, "__self__", None) - if servlet_instance is not None: - servlet_classname = servlet_instance.__class__.__name__ - else: - servlet_classname = "%r" % callback - - request_metrics.name = servlet_classname - return # Huh. No one wanted to handle that? Fiiiiiine. Send 400. + request_metrics.name = self.__class__.__name__ + ".UnrecognizedRequest" raise UnrecognizedRequestError() def _send_response(self, request, code, response_json_object, From 8c9d5b4873dc5e54c1383e3d31060fd32b08fae1 Mon Sep 17 00:00:00 2001 From: Michael Telatynski <7t3chguy@gmail.com> Date: Wed, 10 Jan 2018 15:04:02 +0000 Subject: [PATCH 037/348] Fix publicised groups API (singular) over federation which was missing its fed client API, since there is no other API it might as well reuse the bulk one and unwrap it Signed-off-by: Michael Telatynski <7t3chguy@gmail.com> --- synapse/handlers/groups_local.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/synapse/handlers/groups_local.py b/synapse/handlers/groups_local.py index 7e5d3f148d..0d3feacd47 100644 --- a/synapse/handlers/groups_local.py +++ b/synapse/handlers/groups_local.py @@ -383,11 +383,11 @@ class GroupsLocalHandler(object): defer.returnValue({"groups": result}) else: - result = yield self.transport_client.get_publicised_groups_for_user( - get_domain_from_id(user_id), user_id - ) + result = yield self.transport_client.bulk_get_publicised_groups( + get_domain_from_id(user_id), [user_id], + )["users"][user_id] # TODO: Verify attestations - defer.returnValue(result) + defer.returnValue({"groups": result}) @defer.inlineCallbacks def bulk_get_publicised_groups(self, user_ids, proxy=True): From 5f07f5694c9a0535e64262c3726007e966ba72ea Mon Sep 17 00:00:00 2001 From: Michael Telatynski <7t3chguy@gmail.com> Date: Wed, 10 Jan 2018 15:11:35 +0000 Subject: [PATCH 038/348] fix order of operations derp and also use `.get` to default to {} Signed-off-by: Michael Telatynski <7t3chguy@gmail.com> --- synapse/handlers/groups_local.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/groups_local.py b/synapse/handlers/groups_local.py index 0d3feacd47..e4d0cc8b02 100644 --- a/synapse/handlers/groups_local.py +++ b/synapse/handlers/groups_local.py @@ -383,9 +383,10 @@ class GroupsLocalHandler(object): defer.returnValue({"groups": result}) else: - result = yield self.transport_client.bulk_get_publicised_groups( + bulk_result = yield self.transport_client.bulk_get_publicised_groups( get_domain_from_id(user_id), [user_id], - )["users"][user_id] + ) + result = bulk_result.get("users", {}).get(user_id) # TODO: Verify attestations defer.returnValue({"groups": result}) From f8e1ab5fee708bea93c54b39dc802a08c7ed7e21 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 10 Jan 2018 18:01:28 +0000 Subject: [PATCH 039/348] Do bcrypt hashing in a background thread --- synapse/handlers/auth.py | 24 ++++++++++++++++-------- synapse/handlers/register.py | 2 +- synapse/handlers/set_password.py | 2 +- 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 2f30f183ce..af9060fdbf 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from twisted.internet import defer +from twisted.internet import defer, threads from ._base import BaseHandler from synapse.api.constants import LoginType @@ -22,6 +22,7 @@ from synapse.module_api import ModuleApi from synapse.types import UserID from synapse.util.async import run_on_reactor from synapse.util.caches.expiringcache import ExpiringCache +from synapse.util.logcontext import make_deferred_yieldable from twisted.web.client import PartialDownloadError @@ -626,7 +627,7 @@ class AuthHandler(BaseHandler): if not lookupres: defer.returnValue(None) (user_id, password_hash) = lookupres - result = self.validate_hash(password, password_hash) + result = yield self.validate_hash(password, password_hash) if not result: logger.warn("Failed password login for user %s", user_id) defer.returnValue(None) @@ -754,10 +755,13 @@ class AuthHandler(BaseHandler): password (str): Password to hash. Returns: - Hashed password (str). + Deferred(str): Hashed password. """ - return bcrypt.hashpw(password.encode('utf8') + self.hs.config.password_pepper, - bcrypt.gensalt(self.bcrypt_rounds)) + def _do_hash(): + return bcrypt.hashpw(password.encode('utf8') + self.hs.config.password_pepper, + bcrypt.gensalt(self.bcrypt_rounds)) + + return make_deferred_yieldable(threads.deferToThread(_do_hash)) def validate_hash(self, password, stored_hash): """Validates that self.hash(password) == stored_hash. @@ -767,13 +771,17 @@ class AuthHandler(BaseHandler): stored_hash (str): Expected hash value. Returns: - Whether self.hash(password) == stored_hash (bool). + Deferred(bool): Whether self.hash(password) == stored_hash. """ - if stored_hash: + + def _do_validate_hash(): return bcrypt.hashpw(password.encode('utf8') + self.hs.config.password_pepper, stored_hash.encode('utf8')) == stored_hash + + if stored_hash: + return make_deferred_yieldable(threads.deferToThread(_do_validate_hash)) else: - return False + return defer.succeed(False) class MacaroonGeneartor(object): diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index f6e7e58563..132e2052c0 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -130,7 +130,7 @@ class RegistrationHandler(BaseHandler): yield run_on_reactor() password_hash = None if password: - password_hash = self.auth_handler().hash(password) + password_hash = yield self.auth_handler().hash(password) if localpart: yield self.check_username(localpart, guest_access_token=guest_access_token) diff --git a/synapse/handlers/set_password.py b/synapse/handlers/set_password.py index 44414e1dc1..e057ae54c9 100644 --- a/synapse/handlers/set_password.py +++ b/synapse/handlers/set_password.py @@ -31,7 +31,7 @@ class SetPasswordHandler(BaseHandler): @defer.inlineCallbacks def set_password(self, user_id, newpassword, requester=None): - password_hash = self._auth_handler.hash(newpassword) + password_hash = yield self._auth_handler.hash(newpassword) except_device_id = requester.device_id if requester else None except_access_token_id = requester.access_token_id if requester else None From 7cd34512d850fce57bdf1dcfb3f61d5315baf639 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 11 Jan 2018 11:15:20 +0000 Subject: [PATCH 040/348] When using synctl with workers, don't start the main synapse automatically --- CHANGES.rst | 7 +++++++ synapse/app/synctl.py | 34 ++++++++++++++++++++++++++-------- 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index dcf9adc95c..24e4e7a384 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,3 +1,10 @@ +Unreleased +========== + +synctl no longer starts the main synapse when using ``-a`` option with workers. +A new worker file should be added with ``worker_app: synapse.app.homeserver`` + + Changes in synapse v0.26.0 (2018-01-05) ======================================= diff --git a/synapse/app/synctl.py b/synapse/app/synctl.py index 3bd7ef7bba..049956746e 100755 --- a/synapse/app/synctl.py +++ b/synapse/app/synctl.py @@ -184,6 +184,9 @@ def main(): worker_configfiles.append(worker_configfile) if options.all_processes: + # To start the main synapse with -a you need to add a worker file + # with worker_app == "synapse.app.homeserver" + start_stop_synapse = False worker_configdir = options.all_processes if not os.path.isdir(worker_configdir): write( @@ -200,14 +203,29 @@ def main(): with open(worker_configfile) as stream: worker_config = yaml.load(stream) worker_app = worker_config["worker_app"] - worker_pidfile = worker_config["worker_pid_file"] - worker_daemonize = worker_config["worker_daemonize"] - assert worker_daemonize, "In config %r: expected '%s' to be True" % ( - worker_configfile, "worker_daemonize") - worker_cache_factor = worker_config.get("synctl_cache_factor") - workers.append(Worker( - worker_app, worker_configfile, worker_pidfile, worker_cache_factor, - )) + if worker_app == "synapse.app.homeserver": + # We need to special case all of this to pick up options that may + # be set in the main config file or in this worker config file. + worker_pidfile = ( + worker_config.get("worker_pid_file") + or worker_config("pid_file") + or pidfile + ) + workers.append(Worker( + "synapse.app.homeserver", + worker_configfile, + worker_pidfile, + worker_config.get("synctl_cache_factor") or cache_factor, + )) + else: + worker_pidfile = worker_config["worker_pid_file"] + worker_daemonize = worker_config["worker_daemonize"] + assert worker_daemonize, "In config %r: expected '%s' to be True" % ( + worker_configfile, "worker_daemonize") + worker_cache_factor = worker_config.get("synctl_cache_factor") + workers.append(Worker( + worker_app, worker_configfile, worker_pidfile, worker_cache_factor, + )) action = options.action From f68e4cf690df9bbc70540804fd8cc590d40c2149 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 12 Jan 2018 10:11:12 +0000 Subject: [PATCH 041/348] Refactor --- synapse/app/synctl.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/synapse/app/synctl.py b/synapse/app/synctl.py index 049956746e..4bd8f735ff 100755 --- a/synapse/app/synctl.py +++ b/synapse/app/synctl.py @@ -211,21 +211,16 @@ def main(): or worker_config("pid_file") or pidfile ) - workers.append(Worker( - "synapse.app.homeserver", - worker_configfile, - worker_pidfile, - worker_config.get("synctl_cache_factor") or cache_factor, - )) + worker_cache_factor = worker_config.get("synctl_cache_factor") or cache_factor else: worker_pidfile = worker_config["worker_pid_file"] worker_daemonize = worker_config["worker_daemonize"] assert worker_daemonize, "In config %r: expected '%s' to be True" % ( worker_configfile, "worker_daemonize") worker_cache_factor = worker_config.get("synctl_cache_factor") - workers.append(Worker( - worker_app, worker_configfile, worker_pidfile, worker_cache_factor, - )) + workers.append(Worker( + worker_app, worker_configfile, worker_pidfile, worker_cache_factor, + )) action = options.action From f4d93ae424bec48f7d0d68d885942dc83d5780d7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 12 Jan 2018 10:39:27 +0000 Subject: [PATCH 042/348] Actually make it work --- synapse/app/synctl.py | 12 ++++++++++-- synapse/config/workers.py | 5 +++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/synapse/app/synctl.py b/synapse/app/synctl.py index 4bd8f735ff..0f0ddfa78a 100755 --- a/synapse/app/synctl.py +++ b/synapse/app/synctl.py @@ -207,11 +207,19 @@ def main(): # We need to special case all of this to pick up options that may # be set in the main config file or in this worker config file. worker_pidfile = ( - worker_config.get("worker_pid_file") - or worker_config("pid_file") + worker_config.get("pid_file") or pidfile ) worker_cache_factor = worker_config.get("synctl_cache_factor") or cache_factor + daemonize = worker_config.get("daemonize") or config.get("daemonize") + assert daemonize, "Main process must have daemonize set to true" + + # The master process doesn't support using worker_* config. + for key in worker_config: + if key == "worker_app": # But we allow worker_app + continue + assert not key.startswith("worker_"), \ + "Main process cannot use worker_* config" else: worker_pidfile = worker_config["worker_pid_file"] worker_daemonize = worker_config["worker_daemonize"] diff --git a/synapse/config/workers.py b/synapse/config/workers.py index c5a5a8919c..4b6884918d 100644 --- a/synapse/config/workers.py +++ b/synapse/config/workers.py @@ -23,6 +23,11 @@ class WorkerConfig(Config): def read_config(self, config): self.worker_app = config.get("worker_app") + + # Canonicalise worker_app so that master always has None + if self.worker_app == "synapse.app.homeserver": + self.worker_app = None + self.worker_listeners = config.get("worker_listeners") self.worker_daemonize = config.get("worker_daemonize") self.worker_pid_file = config.get("worker_pid_file") From 227c491510e09bc201b835a7dfa84aaeafb3cdc6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 12 Jan 2018 11:15:31 +0000 Subject: [PATCH 043/348] Comments --- synapse/rest/media/v1/_base.py | 9 +++--- synapse/rest/media/v1/media_repository.py | 39 ++++++++++++++++++++--- synapse/rest/media/v1/media_storage.py | 32 ++++++++++++++++--- 3 files changed, 65 insertions(+), 15 deletions(-) diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py index 1310820486..03df875b44 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py @@ -134,9 +134,9 @@ def respond_with_responder(request, responder, media_type, file_size, upload_nam Args: request (twisted.web.http.Request) - responder (Responder) + responder (Responder|None) media_type (str): The media/content type. - file_size (int): Size in bytes of the media, if known. + file_size (int): Size in bytes of the media. If not known it should be None upload_name (str): The name of the requested file, if any. """ if not responder: @@ -179,13 +179,12 @@ class FileInfo(object): or None if local. file_id (str): The local ID of the file. For local files this is the same as the media_id - media_type (str): Type of the file url_cache (bool): If the file is for the url preview cache thumbnail (bool): Whether the file is a thumbnail or not. thumbnail_width (int) thumbnail_height (int) - thumbnail_method (int) - thumbnail_type (str) + thumbnail_method (str) + thumbnail_type (str): Content type of thumbnail, e.g. image/png """ def __init__(self, server_name, file_id, url_cache=False, thumbnail=False, thumbnail_width=None, thumbnail_height=None, diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 7938fe7bc8..6508dbf178 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd -# Copyright 2018 New Vecotr Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -76,7 +76,7 @@ class MediaRepository(object): self.recently_accessed_remotes = set() - # List of StorageProvider's where we should search for media and + # List of StorageProviders where we should search for media and # potentially upload to. self.storage_providers = [] @@ -158,6 +158,16 @@ class MediaRepository(object): @defer.inlineCallbacks def get_local_media(self, request, media_id, name): """Responds to reqests for local media, if exists, or returns 404. + + Args: + request(twisted.web.http.Request) + media_id (str) + name (str|None): Optional name that, if specified, will be used as + the filename in the Content-Disposition header of the response. + + Retruns: + Deferred: Resolves once a response has successfully been written + to request """ media_info = yield self.store.get_local_media(media_id) if not media_info or media_info["quarantined_by"]: @@ -182,18 +192,29 @@ class MediaRepository(object): @defer.inlineCallbacks def get_remote_media(self, request, server_name, media_id, name): """Respond to requests for remote media. + + Args: + request(twisted.web.http.Request) + server_name (str): Remote server_name where the media originated. + media_id (str) + name (str|None): Optional name that, if specified, will be used as + the filename in the Content-Disposition header of the response. + + Retruns: + Deferred: Resolves once a response has successfully been written + to request """ self.recently_accessed_remotes.add((server_name, media_id)) # We linearize here to ensure that we don't try and download remote - # media mutliple times concurrently + # media multiple times concurrently key = (server_name, media_id) with (yield self.remote_media_linearizer.queue(key)): responder, media_info = yield self._get_remote_media_impl( server_name, media_id, ) - # We purposefully stream the file outside the lock + # We deliberately stream the file outside the lock if responder: media_type = media_info["media_type"] media_length = media_info["media_length"] @@ -210,7 +231,7 @@ class MediaRepository(object): download from remote server. Returns: - Deferred((Respodner, media_info)) + Deferred[(Responder, media_info)] """ media_info = yield self.store.get_cached_remote_media( server_name, media_id @@ -251,6 +272,14 @@ class MediaRepository(object): def _download_remote_file(self, server_name, media_id, file_id): """Attempt to download the remote file from the given server name, using the given file_id as the local id. + + Args: + server_name (str): Originating server + media_id (str) + file_id (str): Local file ID + + Returns: + Deferred[MediaInfo] """ file_info = FileInfo( diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py index a1ec6cadb1..49d2b7cd45 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py @@ -30,6 +30,12 @@ logger = logging.getLogger(__name__) class MediaStorage(object): """Responsible for storing/fetching files from local sources. + + Args: + local_media_directory (str): Base path where we store media on disk + filepaths (MediaFilePaths) + storage_providers ([StorageProvider]): List of StorageProvider that are + used to fetch and store files. """ def __init__(self, local_media_directory, filepaths, storage_providers): @@ -68,9 +74,16 @@ class MediaStorage(object): """Context manager used to get a file like object to write into, as described by file_info. - Actually yields a 3-tuple (file, fname, finish_cb), where finish_cb is a - function that returns a Deferred that must be waited on after the file - has been successfully written to. + Actually yields a 3-tuple (file, fname, finish_cb), where file is a file + like object that can be written to, fname is the absolute path of file + on disk, and finish_cb is a function that returns a Deferred. + + fname can be used to read the contents from after upload, e.g. to + generate thumbnails. + + finish_cb must be called and waited on after the file has been + successfully been written to. Should not be called if there was an + error. Args: file_info (FileInfo): Info about the file to store @@ -109,7 +122,7 @@ class MediaStorage(object): raise e if not finished_called: - raise Exception("Fnished callback not called") + raise Exception("Finished callback not called") @defer.inlineCallbacks def fetch_media(self, file_info): @@ -120,7 +133,7 @@ class MediaStorage(object): file_info (FileInfo) Returns: - Deferred(Responder): Returns a Responder if the file was found, + Deferred[Responder|None]: Returns a Responder if the file was found, otherwise None. """ @@ -138,6 +151,15 @@ class MediaStorage(object): def _file_info_to_path(self, file_info): """Converts file_info into a relative path. + + The path is suitable for storing files under a directory, e.g. used to + store files on local FS under the base media repository directory. + + Args: + file_info (FileInfo) + + Returns: + str """ if file_info.url_cache: return self.filepaths.url_cache_filepath_rel(file_info.file_id) From 4d88958cf6f9ed28ecd78990a8f51119eb294279 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 12 Jan 2018 11:23:54 +0000 Subject: [PATCH 044/348] Make class var local --- synapse/rest/media/v1/media_repository.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 6508dbf178..65b16ce4cb 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -78,7 +78,7 @@ class MediaRepository(object): # List of StorageProviders where we should search for media and # potentially upload to. - self.storage_providers = [] + storage_providers = [] # TODO: Move this into config and allow other storage providers to be # defined. @@ -92,10 +92,10 @@ class MediaRepository(object): store_synchronous=hs.config.synchronous_backup_media_store, store_remote=True, ) - self.storage_providers.append(provider) + storage_providers.append(provider) self.media_storage = MediaStorage( - self.primary_base_path, self.filepaths, self.storage_providers, + self.primary_base_path, self.filepaths, storage_providers, ) self.clock.looping_call( From c6c009603cd72c2dfd777658b871dc66b08f7aa4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 12 Jan 2018 11:24:05 +0000 Subject: [PATCH 045/348] Remove unused variables --- synapse/rest/media/v1/download_resource.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/synapse/rest/media/v1/download_resource.py b/synapse/rest/media/v1/download_resource.py index 5dc92972cf..3443db91ca 100644 --- a/synapse/rest/media/v1/download_resource.py +++ b/synapse/rest/media/v1/download_resource.py @@ -32,11 +32,8 @@ class DownloadResource(Resource): def __init__(self, hs, media_repo): Resource.__init__(self) - self.filepaths = media_repo.filepaths self.media_repo = media_repo self.server_name = hs.hostname - self.store = hs.get_datastore() - self.version_string = hs.version_string self.clock = hs.get_clock() def render_GET(self, request): From 1e4edd1717d1a3ef5ada210882d6798c520626eb Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 12 Jan 2018 11:28:32 +0000 Subject: [PATCH 046/348] Remove unnecessary condition --- synapse/rest/media/v1/media_repository.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 65b16ce4cb..5c50646bc7 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -263,10 +263,7 @@ class MediaRepository(object): ) responder = yield self.media_storage.fetch_media(file_info) - if responder: - defer.returnValue((responder, media_info)) - - defer.returnValue((None, media_info)) + defer.returnValue((responder, media_info)) @defer.inlineCallbacks def _download_remote_file(self, server_name, media_id, file_id): From dcc8eded4172670137b5dd9be1e4c70328cf85f7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 12 Jan 2018 13:16:27 +0000 Subject: [PATCH 047/348] Add missing class var --- synapse/rest/media/v1/download_resource.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/synapse/rest/media/v1/download_resource.py b/synapse/rest/media/v1/download_resource.py index 3443db91ca..fe7e17596f 100644 --- a/synapse/rest/media/v1/download_resource.py +++ b/synapse/rest/media/v1/download_resource.py @@ -34,7 +34,10 @@ class DownloadResource(Resource): self.media_repo = media_repo self.server_name = hs.hostname + + # Both of these are expected by @request_handler() self.clock = hs.get_clock() + self.version_string = hs.version_string def render_GET(self, request): self._async_render_GET(request) From 85a4d78213f6987c920043532bca428bb582a46b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 12 Jan 2018 13:32:03 +0000 Subject: [PATCH 048/348] Make Responder a context manager --- synapse/rest/media/v1/_base.py | 14 ++++++++------ synapse/rest/media/v1/media_storage.py | 5 ++--- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py index 03df875b44..1145904aeb 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py @@ -144,15 +144,16 @@ def respond_with_responder(request, responder, media_type, file_size, upload_nam return add_file_headers(request, media_type, file_size, upload_name) - yield responder.write_to_consumer(request) + with responder: + yield responder.write_to_consumer(request) finish_request(request) class Responder(object): """Represents a response that can be streamed to the requester. - Either `write_to_consumer` or `cancel` must be called to clean up any open - resources. + Responder is a context manager which *must* be used, so that any resources + held can be cleaned up. """ def write_to_consumer(self, consumer): """Stream response into consumer @@ -165,9 +166,10 @@ class Responder(object): """ pass - def cancel(self): - """Called when the responder is not going to be used after all. - """ + def __enter__(self): + pass + + def __exit__(self, exc_type, exc_val, exc_tb): pass diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py index 49d2b7cd45..b6e7a19e12 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py @@ -220,8 +220,7 @@ class FileResponder(Responder): @defer.inlineCallbacks def write_to_consumer(self, consumer): - with self.open_file: - yield FileSender().beginFileTransfer(self.open_file, consumer) + yield FileSender().beginFileTransfer(self.open_file, consumer) - def cancel(self): + def __exit__(self, exc_type, exc_val, exc_tb): self.open_file.close() From e21370ba54607c9eb78869bc7ce5ab3d6f896fdd Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 12 Jan 2018 14:44:02 +0000 Subject: [PATCH 049/348] Correctly reraise exception --- synapse/rest/media/v1/media_storage.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py index b6e7a19e12..001e84578e 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py @@ -24,6 +24,7 @@ import contextlib import os import logging import shutil +import sys logger = logging.getLogger(__name__) @@ -114,12 +115,13 @@ class MediaStorage(object): try: with open(fname, "wb") as f: yield f, fname, finish - except Exception as e: + except Exception: + t, v, tb = sys.exc_info() try: os.remove(fname) except Exception: pass - raise e + raise t, v, tb if not finished_called: raise Exception("Finished callback not called") From 694f1c1b185a8431679d39a80b7567ae68605e17 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 12 Jan 2018 15:02:46 +0000 Subject: [PATCH 050/348] Fix up comments --- synapse/rest/media/v1/_base.py | 4 ++-- synapse/rest/media/v1/media_repository.py | 19 ++++++++++++++----- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py index 1145904aeb..e7ac01da01 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py @@ -136,8 +136,8 @@ def respond_with_responder(request, responder, media_type, file_size, upload_nam request (twisted.web.http.Request) responder (Responder|None) media_type (str): The media/content type. - file_size (int): Size in bytes of the media. If not known it should be None - upload_name (str): The name of the requested file, if any. + file_size (int|None): Size in bytes of the media. If not known it should be None + upload_name (str|None): The name of the requested file, if any. """ if not responder: respond_404(request) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 5c50646bc7..45bc534200 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -161,11 +161,12 @@ class MediaRepository(object): Args: request(twisted.web.http.Request) - media_id (str) + media_id (str): The media ID of the content. (This is the same as + the file_id for local content.) name (str|None): Optional name that, if specified, will be used as the filename in the Content-Disposition header of the response. - Retruns: + Returns: Deferred: Resolves once a response has successfully been written to request """ @@ -196,11 +197,12 @@ class MediaRepository(object): Args: request(twisted.web.http.Request) server_name (str): Remote server_name where the media originated. - media_id (str) + media_id (str): The media ID of the content (as defined by the + remote server). name (str|None): Optional name that, if specified, will be used as the filename in the Content-Disposition header of the response. - Retruns: + Returns: Deferred: Resolves once a response has successfully been written to request """ @@ -230,6 +232,11 @@ class MediaRepository(object): """Looks for media in local cache, if not there then attempt to download from remote server. + Args: + server_name (str): Remote server_name where the media originated. + media_id (str): The media ID of the content (as defined by the + remote server). + Returns: Deferred[(Responder, media_info)] """ @@ -272,7 +279,9 @@ class MediaRepository(object): Args: server_name (str): Originating server - media_id (str) + media_id (str): The media ID of the content (as defined by the + remote server). This is different than the file_id, which is + locally generated. file_id (str): Local file ID Returns: From 21bf87a146e54d9c111abb6f39a1bcbdc0563df2 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 12 Jan 2018 15:38:06 +0000 Subject: [PATCH 051/348] Reinstate media download on thumbnail request We need to actually download the remote media when we get a request for a thumbnail. --- synapse/rest/media/v1/thumbnail_resource.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index 70dbf7f5c9..53e48aba22 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -198,6 +198,11 @@ class ThumbnailResource(Resource): @defer.inlineCallbacks def _respond_remote_thumbnail(self, request, server_name, media_id, width, height, method, m_type): + # TODO: Don't download the whole remote file + # We should proxy the thumbnail from the remote server instead of + # downloading the remote file and generating our own thumbnails. + yield self.media_repo.get_remote_media(server_name, media_id) + thumbnail_infos = yield self.store.get_remote_media_thumbnails( server_name, media_id, ) From 19d274085fa939c440667759d38a8a255216899b Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 12 Jan 2018 23:21:32 +0000 Subject: [PATCH 052/348] Make Counter render floats Prometheus handles all metrics as floats, and sometimes we store non-integer values in them (notably, durations in seconds), so let's render them as floats too. (Note that the standard client libraries also treat Counters as floats.) --- synapse/metrics/metric.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/synapse/metrics/metric.py b/synapse/metrics/metric.py index e87b2b80a7..1d054dd557 100644 --- a/synapse/metrics/metric.py +++ b/synapse/metrics/metric.py @@ -50,7 +50,14 @@ class BaseMetric(object): class CounterMetric(BaseMetric): """The simplest kind of metric; one that stores a monotonically-increasing - integer that counts events.""" + value that counts events or running totals. + + Example use cases for Counters: + - Number of requests processed + - Number of items that were inserted into a queue + - Total amount of data that a system has processed + Counters can only go up (and be reset when the process restarts). + """ def __init__(self, *args, **kwargs): super(CounterMetric, self).__init__(*args, **kwargs) @@ -59,7 +66,7 @@ class CounterMetric(BaseMetric): # Scalar metrics are never empty if self.is_scalar(): - self.counts[()] = 0 + self.counts[()] = 0. def inc_by(self, incr, *values): if len(values) != self.dimension(): @@ -78,7 +85,7 @@ class CounterMetric(BaseMetric): self.inc_by(1, *values) def render_item(self, k): - return ["%s%s %d" % (self.name, self._render_key(k), self.counts[k])] + return ["%s%s %.12g" % (self.name, self._render_key(k), self.counts[k])] def render(self): return map_concat(self.render_item, sorted(self.counts.keys())) From 80fa610f9c8702d6b7256be9d97668de29ba2e06 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 15 Jan 2018 16:52:52 +0000 Subject: [PATCH 053/348] Add some comments to metrics classes --- synapse/metrics/metric.py | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/synapse/metrics/metric.py b/synapse/metrics/metric.py index 1d054dd557..c5f0bcbc15 100644 --- a/synapse/metrics/metric.py +++ b/synapse/metrics/metric.py @@ -24,8 +24,16 @@ def map_concat(func, items): class BaseMetric(object): + """Base class for metrics which report a single value per label set + """ def __init__(self, name, labels=[]): + """ + Args: + name (str): principal name for this metric + labels (list(str)): names of the labels which will be reported + for this metric + """ self.name = name self.labels = labels # OK not to clone as we never write it @@ -36,7 +44,7 @@ class BaseMetric(object): return not len(self.labels) def _render_labelvalue(self, value): - # TODO: some kind of value escape + # TODO: escape backslashes, quotes and newlines return '"%s"' % (value) def _render_key(self, values): @@ -47,6 +55,20 @@ class BaseMetric(object): for k, v in zip(self.labels, values)]) ) + def render(self): + """Render this metric + + Each metric is rendered as: + + name{label1="val1",label2="val2"} value + + https://prometheus.io/docs/instrumenting/exposition_formats/#text-format-details + + Returns: + iterable[str]: rendered metrics + """ + raise NotImplementedError() + class CounterMetric(BaseMetric): """The simplest kind of metric; one that stores a monotonically-increasing @@ -62,6 +84,10 @@ class CounterMetric(BaseMetric): def __init__(self, *args, **kwargs): super(CounterMetric, self).__init__(*args, **kwargs) + # dict[list[str]]: value for each set of label values. the keys are the + # label values, in the same order as the labels in self.labels. + # + # (if the metric is a scalar, the (single) key is the empty list). self.counts = {} # Scalar metrics are never empty From 992018d1c07a727e54c4ad5b4079f5f5de8fec5d Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 15 Jan 2018 16:58:41 +0000 Subject: [PATCH 054/348] mechanism to render metrics with alternative names --- synapse/metrics/metric.py | 53 +++++++++++++++++++++++++++++---------- 1 file changed, 40 insertions(+), 13 deletions(-) diff --git a/synapse/metrics/metric.py b/synapse/metrics/metric.py index c5f0bcbc15..f480aae614 100644 --- a/synapse/metrics/metric.py +++ b/synapse/metrics/metric.py @@ -17,24 +17,33 @@ from itertools import chain -# TODO(paul): I can't believe Python doesn't have one of these -def map_concat(func, items): - # flatten a list-of-lists - return list(chain.from_iterable(map(func, items))) +def flatten(items): + """Flatten a list of lists + + Args: + items: iterable[iterable[X]] + + Returns: + list[X]: flattened list + """ + return list(chain.from_iterable(items)) class BaseMetric(object): """Base class for metrics which report a single value per label set """ - def __init__(self, name, labels=[]): + def __init__(self, name, labels=[], alternative_names=[]): """ Args: name (str): principal name for this metric labels (list(str)): names of the labels which will be reported for this metric + alternative_names (iterable(str)): list of alternative names for + this metric. This can be useful to provide a migration path + when renaming metrics. """ - self.name = name + self._names = [name] + list(alternative_names) self.labels = labels # OK not to clone as we never write it def dimension(self): @@ -55,6 +64,22 @@ class BaseMetric(object): for k, v in zip(self.labels, values)]) ) + def _render_for_labels(self, label_values, value): + """Render this metric for a single set of labels + + Args: + label_values (list[str]): values for each of the labels + value: value of the metric at with these labels + + Returns: + iterable[str]: rendered metric + """ + rendered_labels = self._render_key(label_values) + return ( + "%s%s %.12g" % (name, rendered_labels, value) + for name in self._names + ) + def render(self): """Render this metric @@ -110,11 +135,11 @@ class CounterMetric(BaseMetric): def inc(self, *values): self.inc_by(1, *values) - def render_item(self, k): - return ["%s%s %.12g" % (self.name, self._render_key(k), self.counts[k])] - def render(self): - return map_concat(self.render_item, sorted(self.counts.keys())) + return flatten( + self._render_for_labels(k, self.counts[k]) + for k in sorted(self.counts.keys()) + ) class CallbackMetric(BaseMetric): @@ -131,10 +156,12 @@ class CallbackMetric(BaseMetric): value = self.callback() if self.is_scalar(): - return ["%s %.12g" % (self.name, value)] + return list(self._render_for_labels([], value)) - return ["%s%s %.12g" % (self.name, self._render_key(k), value[k]) - for k in sorted(value.keys())] + return flatten( + self._render_for_labels(k, value[k]) + for k in sorted(value.keys()) + ) class DistributionMetric(object): From 39f4e29d0151b56a3c8528e3149cd5765b9f600d Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 15 Jan 2018 17:00:12 +0000 Subject: [PATCH 055/348] Reorganise request and block metrics In order to circumvent the number of duplicate foo:count metrics increasing without bounds, it's time for a rearrangement. The following are all deprecated, and replaced with synapse_util_metrics_block_count: synapse_util_metrics_block_timer:count synapse_util_metrics_block_ru_utime:count synapse_util_metrics_block_ru_stime:count synapse_util_metrics_block_db_txn_count:count synapse_util_metrics_block_db_txn_duration:count The following are all deprecated, and replaced with synapse_http_server_response_count: synapse_http_server_requests synapse_http_server_response_time:count synapse_http_server_response_ru_utime:count synapse_http_server_response_ru_stime:count synapse_http_server_response_db_txn_count:count synapse_http_server_response_db_txn_duration:count The following are renamed (the old metrics are kept for now, but deprecated): synapse_util_metrics_block_timer:total -> synapse_util_metrics_block_time_seconds synapse_util_metrics_block_ru_utime:total -> synapse_util_metrics_block_ru_utime_seconds synapse_util_metrics_block_ru_stime:total -> synapse_util_metrics_block_ru_stime_seconds synapse_util_metrics_block_db_txn_count:total -> synapse_util_metrics_block_db_txn_count synapse_util_metrics_block_db_txn_duration:total -> synapse_util_metrics_block_db_txn_duration_seconds synapse_http_server_response_time:total -> synapse_http_server_response_time_seconds synapse_http_server_response_ru_utime:total -> synapse_http_server_response_ru_utime_seconds synapse_http_server_response_ru_stime:total -> synapse_http_server_response_ru_stime_seconds synapse_http_server_response_db_txn_count:total -> synapse_http_server_response_db_txn_count synapse_http_server_response_db_txn_duration:total synapse_http_server_response_db_txn_duration_seconds --- synapse/http/server.py | 56 ++++++++++++++++++++++++++++++----------- synapse/util/metrics.py | 53 ++++++++++++++++++++++++++++++-------- 2 files changed, 84 insertions(+), 25 deletions(-) diff --git a/synapse/http/server.py b/synapse/http/server.py index 6e8f4c9c5f..269b65ca41 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -42,34 +42,62 @@ logger = logging.getLogger(__name__) metrics = synapse.metrics.get_metrics_for(__name__) -incoming_requests_counter = metrics.register_counter( - "requests", +# total number of responses served, split by method/servlet/tag +response_count = metrics.register_counter( + "response_count", labels=["method", "servlet", "tag"], + alternative_names=( + # the following are all deprecated aliases for the same metric + metrics.name_prefix + x for x in ( + "_requests", + "_response_time:count", + "_response_ru_utime:count", + "_response_ru_stime:count", + "_response_db_txn_count:count", + "_response_db_txn_duration:count", + ) + ) ) + outgoing_responses_counter = metrics.register_counter( "responses", labels=["method", "code"], ) -response_timer = metrics.register_distribution( - "response_time", - labels=["method", "servlet", "tag"] +response_timer = metrics.register_counter( + "response_time_seconds", + labels=["method", "servlet", "tag"], + alternative_names=( + metrics.name_prefix + "_response_time:total", + ), ) -response_ru_utime = metrics.register_distribution( - "response_ru_utime", labels=["method", "servlet", "tag"] +response_ru_utime = metrics.register_counter( + "response_ru_utime_seconds", labels=["method", "servlet", "tag"], + alternative_names=( + metrics.name_prefix + "_response_ru_utime:total", + ), ) -response_ru_stime = metrics.register_distribution( - "response_ru_stime", labels=["method", "servlet", "tag"] +response_ru_stime = metrics.register_counter( + "response_ru_stime_seconds", labels=["method", "servlet", "tag"], + alternative_names=( + metrics.name_prefix + "_response_ru_stime:total", + ), ) -response_db_txn_count = metrics.register_distribution( - "response_db_txn_count", labels=["method", "servlet", "tag"] +response_db_txn_count = metrics.register_counter( + "response_db_txn_count", labels=["method", "servlet", "tag"], + alternative_names=( + metrics.name_prefix + "_response_db_txn_count:total", + ), ) -response_db_txn_duration = metrics.register_distribution( - "response_db_txn_duration", labels=["method", "servlet", "tag"] +response_db_txn_duration = metrics.register_counter( + "response_db_txn_duration_seconds", labels=["method", "servlet", "tag"], + alternative_names=( + metrics.name_prefix + "_response_db_txn_duration:total", + ), ) @@ -330,7 +358,7 @@ class RequestMetrics(object): ) return - incoming_requests_counter.inc(request.method, self.name, tag) + response_count.inc(request.method, self.name, tag) response_timer.inc_by( clock.time_msec() - self.start, request.method, diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py index 4ea930d3e8..8d22ff3068 100644 --- a/synapse/util/metrics.py +++ b/synapse/util/metrics.py @@ -27,25 +27,56 @@ logger = logging.getLogger(__name__) metrics = synapse.metrics.get_metrics_for(__name__) -block_timer = metrics.register_distribution( - "block_timer", - labels=["block_name"] +# total number of times we have hit this block +response_count = metrics.register_counter( + "block_count", + labels=["block_name"], + alternative_names=( + # the following are all deprecated aliases for the same metric + metrics.name_prefix + x for x in ( + "_block_timer:count", + "_block_ru_utime:count", + "_block_ru_stime:count", + "_block_db_txn_count:count", + "_block_db_txn_duration:count", + ) + ) ) -block_ru_utime = metrics.register_distribution( - "block_ru_utime", labels=["block_name"] +block_timer = metrics.register_counter( + "block_time_seconds", + labels=["block_name"], + alternative_names=( + metrics.name_prefix + "_block_timer:total", + ), ) -block_ru_stime = metrics.register_distribution( - "block_ru_stime", labels=["block_name"] +block_ru_utime = metrics.register_counter( + "block_ru_utime_seconds", labels=["block_name"], + alternative_names=( + metrics.name_prefix + "_block_ru_utime:total", + ), ) -block_db_txn_count = metrics.register_distribution( - "block_db_txn_count", labels=["block_name"] +block_ru_stime = metrics.register_counter( + "block_ru_stime_seconds", labels=["block_name"], + alternative_names=( + metrics.name_prefix + "_block_ru_stime:total", + ), ) -block_db_txn_duration = metrics.register_distribution( - "block_db_txn_duration", labels=["block_name"] +block_db_txn_count = metrics.register_counter( + "block_db_txn_count", labels=["block_name"], + alternative_names=( + metrics.name_prefix + "_block_db_txn_count:total", + ), +) + +block_db_txn_duration = metrics.register_counter( + "block_db_txn_duration_seconds", labels=["block_name"], + alternative_names=( + metrics.name_prefix + "_block_db_txn_count:total", + ), ) From 5c3c32f16f99b11b91b34c0829db98896373ea75 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 15 Jan 2018 17:45:55 +0000 Subject: [PATCH 056/348] Metrics for number of RDATA commands received I found myself wishing we had this. --- synapse/replication/tcp/protocol.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py index d59503b905..0a9a290af4 100644 --- a/synapse/replication/tcp/protocol.py +++ b/synapse/replication/tcp/protocol.py @@ -517,25 +517,28 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol): self.send_error("Wrong remote") def on_RDATA(self, cmd): + stream_name = cmd.stream_name + inbound_rdata_count.inc(stream_name) + try: - row = STREAMS_MAP[cmd.stream_name].ROW_TYPE(*cmd.row) + row = STREAMS_MAP[stream_name].ROW_TYPE(*cmd.row) except Exception: logger.exception( "[%s] Failed to parse RDATA: %r %r", - self.id(), cmd.stream_name, cmd.row + self.id(), stream_name, cmd.row ) raise if cmd.token is None: # I.e. this is part of a batch of updates for this stream. Batch # until we get an update for the stream with a non None token - self.pending_batches.setdefault(cmd.stream_name, []).append(row) + self.pending_batches.setdefault(stream_name, []).append(row) else: # Check if this is the last of a batch of updates - rows = self.pending_batches.pop(cmd.stream_name, []) + rows = self.pending_batches.pop(stream_name, []) rows.append(row) - self.handler.on_rdata(cmd.stream_name, cmd.token, rows) + self.handler.on_rdata(stream_name, cmd.token, rows) def on_POSITION(self, cmd): self.handler.on_position(cmd.stream_name, cmd.token) @@ -644,3 +647,9 @@ metrics.register_callback( }, labels=["command", "name", "conn_id"], ) + +# number of updates received for each RDATA stream +inbound_rdata_count = metrics.register_counter( + "inbound_rdata_count", + labels=["stream_name"], +) From a027c2af8d348554cad4855094a6f46ef21bfad7 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 15 Jan 2018 18:20:30 +0000 Subject: [PATCH 057/348] Metrics for events processed in appservice and fed sender More metrics I wished I'd had --- synapse/federation/transaction_queue.py | 4 ++++ synapse/handlers/appservice.py | 7 +++++++ 2 files changed, 11 insertions(+) diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index 3e7809b04f..9d39f46583 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -42,6 +42,8 @@ sent_edus_counter = client_metrics.register_counter("sent_edus") sent_transactions_counter = client_metrics.register_counter("sent_transactions") +events_processed_counter = client_metrics.register_counter("events_processed") + class TransactionQueue(object): """This class makes sure we only have one transaction in flight at @@ -205,6 +207,8 @@ class TransactionQueue(object): self._send_pdu(event, destinations) + events_processed_counter.inc_by(len(events)) + yield self.store.update_federation_out_pos( "events", next_token ) diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py index feca3e4c10..3dd3fa2a27 100644 --- a/synapse/handlers/appservice.py +++ b/synapse/handlers/appservice.py @@ -15,6 +15,7 @@ from twisted.internet import defer +import synapse from synapse.api.constants import EventTypes from synapse.util.metrics import Measure from synapse.util.logcontext import make_deferred_yieldable, preserve_fn @@ -23,6 +24,10 @@ import logging logger = logging.getLogger(__name__) +metrics = synapse.metrics.get_metrics_for(__name__) + +events_processed_counter = metrics.register_counter("events_processed") + def log_failure(failure): logger.error( @@ -103,6 +108,8 @@ class ApplicationServicesHandler(object): service, event ) + events_processed_counter.inc_by(len(events)) + yield self.store.set_appservice_last_pos(upper_bound) finally: self.is_processing = False From a4c5e4a6451dfc7b378c10b916635de6bdafa80a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jan 2018 11:06:42 +0000 Subject: [PATCH 058/348] Fix thumbnailing remote files --- synapse/rest/media/v1/media_repository.py | 28 +++++++++++++++++++++ synapse/rest/media/v1/thumbnail_resource.py | 4 +-- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 45bc534200..2608fab5de 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -227,6 +227,34 @@ class MediaRepository(object): else: respond_404(request) + @defer.inlineCallbacks + def get_remote_media_info(self, server_name, media_id): + """Gets the media info associated with the remote file, downloading + if necessary. + + Args: + server_name (str): Remote server_name where the media originated. + media_id (str): The media ID of the content (as defined by the + remote server). + + Returns: + Deferred[dict]: The media_info of the file + """ + # We linearize here to ensure that we don't try and download remote + # media multiple times concurrently + key = (server_name, media_id) + with (yield self.remote_media_linearizer.queue(key)): + responder, media_info = yield self._get_remote_media_impl( + server_name, media_id, + ) + + # Ensure we actually use the responder so that it releases resources + if responder: + with responder: + pass + + defer.returnValue(media_info) + @defer.inlineCallbacks def _get_remote_media_impl(self, server_name, media_id): """Looks for media in local cache, if not there then attempt to diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index 835540c3dd..70cea7782e 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -165,7 +165,7 @@ class ThumbnailResource(Resource): def _select_or_generate_remote_thumbnail(self, request, server_name, media_id, desired_width, desired_height, desired_method, desired_type): - media_info = yield self.media_repo.get_remote_media(server_name, media_id) + media_info = yield self.media_repo.get_remote_media_info(server_name, media_id) thumbnail_infos = yield self.store.get_remote_media_thumbnails( server_name, media_id, @@ -216,7 +216,7 @@ class ThumbnailResource(Resource): # TODO: Don't download the whole remote file # We should proxy the thumbnail from the remote server instead of # downloading the remote file and generating our own thumbnails. - yield self.media_repo.get_remote_media(server_name, media_id) + yield self.media_repo.get_remote_media_info(server_name, media_id) thumbnail_infos = yield self.store.get_remote_media_thumbnails( server_name, media_id, From 64ddec1bc0a1d23a285d560e34986441b3f8c854 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 16 Jan 2018 11:47:36 +0000 Subject: [PATCH 059/348] Fix a logcontext leak in persist_events ObserveableDeferred expects its callbacks to be called without any logcontexts, whereas it turns out we were calling them with the logcontext of the request which initiated the persistence loop. It seems wrong that we are attributing work done in the persistence loop to the request that happened to initiate it, so let's solve this by dropping the logcontext for it. (I'm not sure this actually causes any real problems other than messages in the debug log, but let's clean it up anyway) --- synapse/storage/events.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index d08f7571d7..ad1d782705 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -146,6 +146,9 @@ class _EventPeristenceQueue(object): try: queue = self._get_drainining_queue(room_id) for item in queue: + # handle_queue_loop runs in the sentinel logcontext, so + # there is no need to preserve_fn when running the + # callbacks on the deferred. try: ret = yield per_item_callback(item) item.deferred.callback(ret) @@ -157,7 +160,11 @@ class _EventPeristenceQueue(object): self._event_persist_queues[room_id] = queue self._currently_persisting_rooms.discard(room_id) - preserve_fn(handle_queue_loop)() + # set handle_queue_loop off on the background. We don't want to + # attribute work done in it to the current request, so we drop the + # logcontext altogether. + with PreserveLoggingContext(): + handle_queue_loop() def _get_drainining_queue(self, room_id): queue = self._event_persist_queues.setdefault(room_id, deque()) From c5b589f2e8205ca0253534cf5826b807253bb8ea Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jan 2018 12:01:40 +0000 Subject: [PATCH 060/348] Log when we respond with 404 --- synapse/rest/media/v1/media_repository.py | 1 + synapse/rest/media/v1/thumbnail_resource.py | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 2608fab5de..b12fabd943 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -285,6 +285,7 @@ class MediaRepository(object): # If we have an entry in the DB, try and look for it if media_info: if media_info["quarantined_by"]: + logger.info("Media is quarentined") raise NotFoundError() responder = yield self.media_storage.fetch_media(file_info) diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index 70cea7782e..b8c38eb319 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -85,6 +85,7 @@ class ThumbnailResource(Resource): media_info = yield self.store.get_local_media(media_id) if not media_info or media_info["quarantined_by"]: + logger.info("Media is quarantined") respond_404(request) return @@ -111,6 +112,7 @@ class ThumbnailResource(Resource): responder = yield self.media_storage.fetch_media(file_info) yield respond_with_responder(request, responder, t_type, t_length) else: + logger.info("Couldn't find any generated thumbnails") respond_404(request) @defer.inlineCallbacks @@ -120,6 +122,7 @@ class ThumbnailResource(Resource): media_info = yield self.store.get_local_media(media_id) if not media_info or media_info["quarantined_by"]: + logger.info("Media is quarantined") respond_404(request) return @@ -159,6 +162,7 @@ class ThumbnailResource(Resource): if file_path: yield respond_with_file(request, desired_type, file_path) else: + logger.warn("Failed to generate local thumbnail") respond_404(request) @defer.inlineCallbacks @@ -197,7 +201,7 @@ class ThumbnailResource(Resource): yield respond_with_responder(request, responder, t_type, t_length) return - logger.debug("We don't have a local thumbnail of that size. Generating") + logger.debug("We don't have a remote thumbnail of that size. Generating") # Okay, so we generate one. file_path = yield self.media_repo.generate_remote_exact_thumbnail( @@ -208,6 +212,7 @@ class ThumbnailResource(Resource): if file_path: yield respond_with_file(request, desired_type, file_path) else: + logger.warn("Failed to generate remote thumbnail") respond_404(request) @defer.inlineCallbacks @@ -241,6 +246,7 @@ class ThumbnailResource(Resource): responder = yield self.media_storage.fetch_media(file_info) yield respond_with_responder(request, responder, t_type, t_length) else: + logger.info("Failed to find any generated thumbnails") respond_404(request) def _select_thumbnail(self, desired_width, desired_height, desired_method, From 9795b9ebb11953747e6362fe123725326885667b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jan 2018 12:02:06 +0000 Subject: [PATCH 061/348] Correctly use server_name/file_id when generating/fetching remote thumbnails --- synapse/rest/media/v1/media_repository.py | 7 +++++-- synapse/rest/media/v1/thumbnail_resource.py | 6 +++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index b12fabd943..e82dcfdc2c 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -539,7 +539,10 @@ class MediaRepository(object): Deferred[dict]: Dict with "width" and "height" keys of original image """ media_type = media_info["media_type"] - file_id = media_info.get("filesystem_id") + if server_name: + file_id = media_info["filesystem_id"] + else: + file_id = media_id requirements = self._get_thumbnail_requirements(media_type) if not requirements: return @@ -597,7 +600,7 @@ class MediaRepository(object): try: file_info = FileInfo( server_name=server_name, - file_id=media_id, + file_id=file_id, thumbnail=True, thumbnail_width=t_width, thumbnail_height=t_height, diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index b8c38eb319..e20d6f10b0 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -185,7 +185,7 @@ class ThumbnailResource(Resource): if t_w and t_h and t_method and t_type: file_info = FileInfo( - server_name=None, file_id=media_id, + server_name=server_name, file_id=media_info["filesystem_id"], thumbnail=True, thumbnail_width=info["thumbnail_width"], thumbnail_height=info["thumbnail_height"], @@ -221,7 +221,7 @@ class ThumbnailResource(Resource): # TODO: Don't download the whole remote file # We should proxy the thumbnail from the remote server instead of # downloading the remote file and generating our own thumbnails. - yield self.media_repo.get_remote_media_info(server_name, media_id) + media_info = yield self.media_repo.get_remote_media_info(server_name, media_id) thumbnail_infos = yield self.store.get_remote_media_thumbnails( server_name, media_id, @@ -232,7 +232,7 @@ class ThumbnailResource(Resource): width, height, method, m_type, thumbnail_infos ) file_info = FileInfo( - server_name=None, file_id=media_id, + server_name=server_name, file_id=media_info["filesystem_id"], thumbnail=True, thumbnail_width=thumbnail_info["thumbnail_width"], thumbnail_height=thumbnail_info["thumbnail_height"], From ee7a1cabd8c6d218b838295fde6999dcbc23036b Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 16 Jan 2018 13:04:01 +0000 Subject: [PATCH 062/348] document metrics changes --- CHANGES.rst | 5 +++- docs/metrics-howto.rst | 61 +++++++++++++++++++++++++++++++++++++----- 2 files changed, 58 insertions(+), 8 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 24e4e7a384..a7ed49e105 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -2,7 +2,10 @@ Unreleased ========== synctl no longer starts the main synapse when using ``-a`` option with workers. -A new worker file should be added with ``worker_app: synapse.app.homeserver`` +A new worker file should be added with ``worker_app: synapse.app.homeserver``. + +This release also begins the process of renaming a number of the metrics +reported to prometheus. See `docs/metrics-howto.rst `_. Changes in synapse v0.26.0 (2018-01-05) diff --git a/docs/metrics-howto.rst b/docs/metrics-howto.rst index 143cd0f42f..8acc479bc3 100644 --- a/docs/metrics-howto.rst +++ b/docs/metrics-howto.rst @@ -16,7 +16,7 @@ How to monitor Synapse metrics using Prometheus metrics_port: 9092 Also ensure that ``enable_metrics`` is set to ``True``. - + Restart synapse. 3. Add a prometheus target for synapse. @@ -28,11 +28,58 @@ How to monitor Synapse metrics using Prometheus static_configs: - targets: ["my.server.here:9092"] - If your prometheus is older than 1.5.2, you will need to replace + If your prometheus is older than 1.5.2, you will need to replace ``static_configs`` in the above with ``target_groups``. - + Restart prometheus. + +Block and response metrics renamed for 0.27.0 +--------------------------------------------- + +Synapse 0.27.0 begins the process of rationalising the duplicate ``*:count`` +metrics reported for the resource tracking for code blocks and HTTP requests. + +At the same time, the corresponding ``*:total`` metrics are being renamed, as +the ``:total`` suffix no longer makes sense in the absence of a corresponding +``:count`` metric. + +To enable a graceful migration path, this release just adds new names for the +metrics being renamed. A future release will remove the old ones. + +The following table shows the new metrics, and the old metrics which they are +replacing. + +==================================================== =================================================== +New name Old name +==================================================== =================================================== +synapse_util_metrics_block_count synapse_util_metrics_block_timer:count +synapse_util_metrics_block_count synapse_util_metrics_block_ru_utime:count +synapse_util_metrics_block_count synapse_util_metrics_block_ru_stime:count +synapse_util_metrics_block_count synapse_util_metrics_block_db_txn_count:count +synapse_util_metrics_block_count synapse_util_metrics_block_db_txn_duration:count + +synapse_util_metrics_block_time_seconds synapse_util_metrics_block_timer:total +synapse_util_metrics_block_ru_utime_seconds synapse_util_metrics_block_ru_utime:total +synapse_util_metrics_block_ru_stime_seconds synapse_util_metrics_block_ru_stime:total +synapse_util_metrics_block_db_txn_count synapse_util_metrics_block_db_txn_count:total +synapse_util_metrics_block_db_txn_duration_seconds synapse_util_metrics_block_db_txn_duration:total + +synapse_http_server_response_count synapse_http_server_requests +synapse_http_server_response_count synapse_http_server_response_time:count +synapse_http_server_response_count synapse_http_server_response_ru_utime:count +synapse_http_server_response_count synapse_http_server_response_ru_stime:count +synapse_http_server_response_count synapse_http_server_response_db_txn_count:count +synapse_http_server_response_count synapse_http_server_response_db_txn_duration:count + +synapse_http_server_response_time_seconds synapse_http_server_response_time:total +synapse_http_server_response_ru_utime_seconds synapse_http_server_response_ru_utime:total +synapse_http_server_response_ru_stime_seconds synapse_http_server_response_ru_stime:total +synapse_http_server_response_db_txn_count synapse_http_server_response_db_txn_count:total +synapse_http_server_response_db_txn_duration_seconds synapse_http_server_response_db_txn_duration:total +==================================================== =================================================== + + Standard Metric Names --------------------- @@ -42,7 +89,7 @@ have been changed to seconds, from miliseconds. ================================== ============================= New name Old name ----------------------------------- ----------------------------- +================================== ============================= process_cpu_user_seconds_total process_resource_utime / 1000 process_cpu_system_seconds_total process_resource_stime / 1000 process_open_fds (no 'type' label) process_fds @@ -52,8 +99,8 @@ The python-specific counts of garbage collector performance have been renamed. =========================== ====================== New name Old name ---------------------------- ---------------------- -python_gc_time reactor_gc_time +=========================== ====================== +python_gc_time reactor_gc_time python_gc_unreachable_total reactor_gc_unreachable python_gc_counts reactor_gc_counts =========================== ====================== @@ -62,7 +109,7 @@ The twisted-specific reactor metrics have been renamed. ==================================== ===================== New name Old name ------------------------------------- --------------------- +==================================== ===================== python_twisted_reactor_pending_calls reactor_pending_calls python_twisted_reactor_tick_time reactor_tick_time ==================================== ===================== From 307f88dfb6c157ee2bda6f9b8b3dee82cad490aa Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jan 2018 13:53:43 +0000 Subject: [PATCH 063/348] Fix up log lines --- synapse/rest/media/v1/media_repository.py | 2 +- synapse/rest/media/v1/thumbnail_resource.py | 15 +++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index e82dcfdc2c..d191f9d2f5 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -285,7 +285,7 @@ class MediaRepository(object): # If we have an entry in the DB, try and look for it if media_info: if media_info["quarantined_by"]: - logger.info("Media is quarentined") + logger.info("Media is quarantined") raise NotFoundError() responder = yield self.media_storage.fetch_media(file_info) diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index e20d6f10b0..1451f6f106 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -85,7 +85,9 @@ class ThumbnailResource(Resource): media_info = yield self.store.get_local_media(media_id) if not media_info or media_info["quarantined_by"]: - logger.info("Media is quarantined") + if media_info: + logger.info("Media is quarantined") + respond_404(request) return @@ -122,7 +124,8 @@ class ThumbnailResource(Resource): media_info = yield self.store.get_local_media(media_id) if not media_info or media_info["quarantined_by"]: - logger.info("Media is quarantined") + if media_info["quarantined_by"]: + logger.info("Media is quarantined") respond_404(request) return @@ -152,7 +155,7 @@ class ThumbnailResource(Resource): yield respond_with_responder(request, responder, t_type, t_length) return - logger.debug("We don't have a local thumbnail of that size. Generating") + logger.debug("We don't have a thumbnail of that size. Generating") # Okay, so we generate one. file_path = yield self.media_repo.generate_local_exact_thumbnail( @@ -162,7 +165,7 @@ class ThumbnailResource(Resource): if file_path: yield respond_with_file(request, desired_type, file_path) else: - logger.warn("Failed to generate local thumbnail") + logger.warn("Failed to generate thumbnail") respond_404(request) @defer.inlineCallbacks @@ -201,7 +204,7 @@ class ThumbnailResource(Resource): yield respond_with_responder(request, responder, t_type, t_length) return - logger.debug("We don't have a remote thumbnail of that size. Generating") + logger.debug("We don't have a thumbnail of that size. Generating") # Okay, so we generate one. file_path = yield self.media_repo.generate_remote_exact_thumbnail( @@ -212,7 +215,7 @@ class ThumbnailResource(Resource): if file_path: yield respond_with_file(request, desired_type, file_path) else: - logger.warn("Failed to generate remote thumbnail") + logger.warn("Failed to generate thumbnail") respond_404(request) @defer.inlineCallbacks From 5dfc83704b9f338c975a03bad7854218658b3a80 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jan 2018 14:32:56 +0000 Subject: [PATCH 064/348] Fix typo --- synapse/rest/media/v1/thumbnail_resource.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index 1451f6f106..8c9653843b 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -84,10 +84,11 @@ class ThumbnailResource(Resource): method, m_type): media_info = yield self.store.get_local_media(media_id) - if not media_info or media_info["quarantined_by"]: - if media_info: - logger.info("Media is quarantined") - + if not media_info: + respond_404(request) + return + if media_info["quarantined_by"]: + logger.info("Media is quarantined") respond_404(request) return @@ -123,9 +124,11 @@ class ThumbnailResource(Resource): desired_type): media_info = yield self.store.get_local_media(media_id) - if not media_info or media_info["quarantined_by"]: - if media_info["quarantined_by"]: - logger.info("Media is quarantined") + if not media_info: + respond_404(request) + return + if media_info["quarantined_by"]: + logger.info("Media is quarantined") respond_404(request) return From 44a498418c62a835aae9bff8550f844888b3ab84 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 11 Jan 2018 22:40:51 +0000 Subject: [PATCH 065/348] Optimise LoggingContext creation and copying It turns out that the only thing we use the __dict__ of LoggingContext for is `request`, and given we create lots of LoggingContexts and then copy them every time we do a db transaction or log line, using the __dict__ seems a bit redundant. Let's try to optimise things by making the request attribute explicit. --- synapse/util/logcontext.py | 25 ++++++++++++++++++------- tests/crypto/test_keyring.py | 14 +++++++------- tests/util/test_logcontext.py | 16 ++++++++-------- 3 files changed, 33 insertions(+), 22 deletions(-) diff --git a/synapse/util/logcontext.py b/synapse/util/logcontext.py index 48c9f6802d..ca71a1fc27 100644 --- a/synapse/util/logcontext.py +++ b/synapse/util/logcontext.py @@ -52,13 +52,16 @@ except Exception: class LoggingContext(object): """Additional context for log formatting. Contexts are scoped within a "with" block. + Args: name (str): Name for the context for debugging. """ __slots__ = [ - "previous_context", "name", "usage_start", "usage_end", "main_thread", - "__dict__", "tag", "alive", + "previous_context", "name", "ru_stime", "ru_utime", + "db_txn_count", "db_txn_duration", "usage_start", "usage_end", + "main_thread", "alive", + "request", "tag", ] thread_local = threading.local() @@ -96,7 +99,9 @@ class LoggingContext(object): self.db_txn_count = 0 self.db_txn_duration = 0. self.usage_start = None + self.usage_end = None self.main_thread = threading.current_thread() + self.request = None self.tag = "" self.alive = True @@ -105,7 +110,11 @@ class LoggingContext(object): @classmethod def current_context(cls): - """Get the current logging context from thread local storage""" + """Get the current logging context from thread local storage + + Returns: + LoggingContext: the current logging context + """ return getattr(cls.thread_local, "current_context", cls.sentinel) @classmethod @@ -155,11 +164,13 @@ class LoggingContext(object): self.alive = False def copy_to(self, record): - """Copy fields from this context to the record""" - for key, value in self.__dict__.items(): - setattr(record, key, value) + """Copy logging fields from this context to a log record or + another LoggingContext + """ - record.ru_utime, record.ru_stime = self.get_resource_usage() + # 'request' is the only field we currently use in the logger, so that's + # all we need to copy + record.request = self.request def start(self): if threading.current_thread() is not self.main_thread: diff --git a/tests/crypto/test_keyring.py b/tests/crypto/test_keyring.py index 570312da84..c899fecf5d 100644 --- a/tests/crypto/test_keyring.py +++ b/tests/crypto/test_keyring.py @@ -68,7 +68,7 @@ class KeyringTestCase(unittest.TestCase): def check_context(self, _, expected): self.assertEquals( - getattr(LoggingContext.current_context(), "test_key", None), + getattr(LoggingContext.current_context(), "request", None), expected ) @@ -82,7 +82,7 @@ class KeyringTestCase(unittest.TestCase): lookup_2_deferred = defer.Deferred() with LoggingContext("one") as context_one: - context_one.test_key = "one" + context_one.request = "one" wait_1_deferred = kr.wait_for_previous_lookups( ["server1"], @@ -96,7 +96,7 @@ class KeyringTestCase(unittest.TestCase): wait_1_deferred.addBoth(self.check_context, "one") with LoggingContext("two") as context_two: - context_two.test_key = "two" + context_two.request = "two" # set off another wait. It should block because the first lookup # hasn't yet completed. @@ -137,7 +137,7 @@ class KeyringTestCase(unittest.TestCase): @defer.inlineCallbacks def get_perspectives(**kwargs): self.assertEquals( - LoggingContext.current_context().test_key, "11", + LoggingContext.current_context().request, "11", ) with logcontext.PreserveLoggingContext(): yield persp_deferred @@ -145,7 +145,7 @@ class KeyringTestCase(unittest.TestCase): self.http_client.post_json.side_effect = get_perspectives with LoggingContext("11") as context_11: - context_11.test_key = "11" + context_11.request = "11" # start off a first set of lookups res_deferreds = kr.verify_json_objects_for_server( @@ -173,7 +173,7 @@ class KeyringTestCase(unittest.TestCase): self.assertIs(LoggingContext.current_context(), context_11) context_12 = LoggingContext("12") - context_12.test_key = "12" + context_12.request = "12" with logcontext.PreserveLoggingContext(context_12): # a second request for a server with outstanding requests # should block rather than start a second call @@ -211,7 +211,7 @@ class KeyringTestCase(unittest.TestCase): sentinel_context = LoggingContext.current_context() with LoggingContext("one") as context_one: - context_one.test_key = "one" + context_one.request = "one" defer = kr.verify_json_for_server("server9", {}) try: diff --git a/tests/util/test_logcontext.py b/tests/util/test_logcontext.py index e2f7765f49..4850722bc5 100644 --- a/tests/util/test_logcontext.py +++ b/tests/util/test_logcontext.py @@ -12,12 +12,12 @@ class LoggingContextTestCase(unittest.TestCase): def _check_test_key(self, value): self.assertEquals( - LoggingContext.current_context().test_key, value + LoggingContext.current_context().request, value ) def test_with_context(self): with LoggingContext() as context_one: - context_one.test_key = "test" + context_one.request = "test" self._check_test_key("test") @defer.inlineCallbacks @@ -25,14 +25,14 @@ class LoggingContextTestCase(unittest.TestCase): @defer.inlineCallbacks def competing_callback(): with LoggingContext() as competing_context: - competing_context.test_key = "competing" + competing_context.request = "competing" yield sleep(0) self._check_test_key("competing") reactor.callLater(0, competing_callback) with LoggingContext() as context_one: - context_one.test_key = "one" + context_one.request = "one" yield sleep(0) self._check_test_key("one") @@ -43,14 +43,14 @@ class LoggingContextTestCase(unittest.TestCase): @defer.inlineCallbacks def cb(): - context_one.test_key = "one" + context_one.request = "one" yield function() self._check_test_key("one") callback_completed[0] = True with LoggingContext() as context_one: - context_one.test_key = "one" + context_one.request = "one" # fire off function, but don't wait on it. logcontext.preserve_fn(cb)() @@ -107,7 +107,7 @@ class LoggingContextTestCase(unittest.TestCase): sentinel_context = LoggingContext.current_context() with LoggingContext() as context_one: - context_one.test_key = "one" + context_one.request = "one" d1 = logcontext.make_deferred_yieldable(blocking_function()) # make sure that the context was reset by make_deferred_yieldable @@ -124,7 +124,7 @@ class LoggingContextTestCase(unittest.TestCase): argument isn't actually a deferred""" with LoggingContext() as context_one: - context_one.test_key = "one" + context_one.request = "one" d1 = logcontext.make_deferred_yieldable("bum") self._check_test_key("one") From 6324b65f08b3f8dbfee6fef0079e2a87cb1c2c85 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 11 Jan 2018 18:17:54 +0000 Subject: [PATCH 066/348] Track db txn time in millisecs ... to reduce the amount of floating-point foo we do. --- synapse/http/server.py | 4 +++- synapse/http/site.py | 6 +++--- synapse/util/logcontext.py | 9 ++++++--- synapse/util/metrics.py | 8 +++++--- 4 files changed, 17 insertions(+), 10 deletions(-) diff --git a/synapse/http/server.py b/synapse/http/server.py index 269b65ca41..0f30e6fd56 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -93,6 +93,8 @@ response_db_txn_count = metrics.register_counter( ), ) +# seconds spent waiting for db txns, excluding scheduling time, when processing +# this request response_db_txn_duration = metrics.register_counter( "response_db_txn_duration_seconds", labels=["method", "servlet", "tag"], alternative_names=( @@ -377,7 +379,7 @@ class RequestMetrics(object): context.db_txn_count, request.method, self.name, tag ) response_db_txn_duration.inc_by( - context.db_txn_duration, request.method, self.name, tag + context.db_txn_duration_ms / 1000., request.method, self.name, tag ) diff --git a/synapse/http/site.py b/synapse/http/site.py index cd1492b1c3..dc64f0f6f5 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -66,10 +66,10 @@ class SynapseRequest(Request): context = LoggingContext.current_context() ru_utime, ru_stime = context.get_resource_usage() db_txn_count = context.db_txn_count - db_txn_duration = context.db_txn_duration + db_txn_duration_ms = context.db_txn_duration_ms except Exception: ru_utime, ru_stime = (0, 0) - db_txn_count, db_txn_duration = (0, 0) + db_txn_count, db_txn_duration_ms = (0, 0) self.site.access_logger.info( "%s - %s - {%s}" @@ -81,7 +81,7 @@ class SynapseRequest(Request): int(time.time() * 1000) - self.start_time, int(ru_utime * 1000), int(ru_stime * 1000), - int(db_txn_duration * 1000), + db_txn_duration_ms, int(db_txn_count), self.sentLength, self.code, diff --git a/synapse/util/logcontext.py b/synapse/util/logcontext.py index ca71a1fc27..a78e53812f 100644 --- a/synapse/util/logcontext.py +++ b/synapse/util/logcontext.py @@ -59,7 +59,7 @@ class LoggingContext(object): __slots__ = [ "previous_context", "name", "ru_stime", "ru_utime", - "db_txn_count", "db_txn_duration", "usage_start", "usage_end", + "db_txn_count", "db_txn_duration_ms", "usage_start", "usage_end", "main_thread", "alive", "request", "tag", ] @@ -97,7 +97,10 @@ class LoggingContext(object): self.ru_stime = 0. self.ru_utime = 0. self.db_txn_count = 0 - self.db_txn_duration = 0. + + # ms spent waiting for db txns, excluding scheduling time + self.db_txn_duration_ms = 0 + self.usage_start = None self.usage_end = None self.main_thread = threading.current_thread() @@ -205,7 +208,7 @@ class LoggingContext(object): def add_database_transaction(self, duration_ms): self.db_txn_count += 1 - self.db_txn_duration += duration_ms / 1000. + self.db_txn_duration_ms += duration_ms class LoggingContextFilter(logging.Filter): diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py index 8d22ff3068..d25629cc50 100644 --- a/synapse/util/metrics.py +++ b/synapse/util/metrics.py @@ -72,6 +72,7 @@ block_db_txn_count = metrics.register_counter( ), ) +# seconds spent waiting for db txns, excluding scheduling time, in this block block_db_txn_duration = metrics.register_counter( "block_db_txn_duration_seconds", labels=["block_name"], alternative_names=( @@ -95,7 +96,7 @@ def measure_func(name): class Measure(object): __slots__ = [ "clock", "name", "start_context", "start", "new_context", "ru_utime", - "ru_stime", "db_txn_count", "db_txn_duration", "created_context" + "ru_stime", "db_txn_count", "db_txn_duration_ms", "created_context" ] def __init__(self, clock, name): @@ -115,7 +116,7 @@ class Measure(object): self.ru_utime, self.ru_stime = self.start_context.get_resource_usage() self.db_txn_count = self.start_context.db_txn_count - self.db_txn_duration = self.start_context.db_txn_duration + self.db_txn_duration_ms = self.start_context.db_txn_duration_ms def __exit__(self, exc_type, exc_val, exc_tb): if isinstance(exc_type, Exception) or not self.start_context: @@ -145,7 +146,8 @@ class Measure(object): context.db_txn_count - self.db_txn_count, self.name ) block_db_txn_duration.inc_by( - context.db_txn_duration - self.db_txn_duration, self.name + (context.db_txn_duration_ms - self.db_txn_duration_ms) / 1000., + self.name ) if self.created_context: From 0a90d9ede4a39d720afd131866f98b51aa591cf7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jan 2018 16:03:05 +0000 Subject: [PATCH 067/348] Move setting of file_id up to caller --- synapse/rest/media/v1/media_repository.py | 21 +++++++++---------- synapse/rest/media/v1/preview_url_resource.py | 6 ++++-- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index d191f9d2f5..35c1dcbc97 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -151,7 +151,7 @@ class MediaRepository(object): "media_length": content_length, } - yield self._generate_thumbnails(None, media_id, media_info) + yield self._generate_thumbnails(None, media_id, media_id, media_info) defer.returnValue("mxc://%s/%s" % (self.server_name, media_id)) @@ -413,7 +413,7 @@ class MediaRepository(object): } yield self._generate_thumbnails( - server_name, media_id, media_info + server_name, media_id, file_id, media_info, ) defer.returnValue(media_info) @@ -525,24 +525,23 @@ class MediaRepository(object): defer.returnValue(output_path) @defer.inlineCallbacks - def _generate_thumbnails(self, server_name, media_id, media_info, url_cache=False): + def _generate_thumbnails(self, server_name, media_id, file_id, media_info, + url_cache=False): """Generate and store thumbnails for an image. Args: - server_name(str|None): The server name if remote media, else None if local - media_id(str) - media_info(dict) - url_cache(bool): If we are thumbnailing images downloaded for the URL cache, + server_name (str|None): The server name if remote media, else None if local + media_id (str): The media ID of the content. (This is the same as + the file_id for local content) + file_id (str): Local file ID + media_info (dict) + url_cache (bool): If we are thumbnailing images downloaded for the URL cache, used exclusively by the url previewer Returns: Deferred[dict]: Dict with "width" and "height" keys of original image """ media_type = media_info["media_type"] - if server_name: - file_id = media_info["filesystem_id"] - else: - file_id = media_id requirements = self._get_thumbnail_requirements(media_type) if not requirements: return diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index f3dbbb3fec..5ddf581bd2 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -185,8 +185,9 @@ class PreviewUrlResource(Resource): logger.debug("got media_info of '%s'" % media_info) if _is_media(media_info['media_type']): + file_id = media_info['filesystem_id'] dims = yield self.media_repo._generate_thumbnails( - None, media_info['filesystem_id'], media_info, url_cache=True, + None, file_id, file_id, media_info, url_cache=True, ) og = { @@ -231,8 +232,9 @@ class PreviewUrlResource(Resource): if _is_media(image_info['media_type']): # TODO: make sure we don't choke on white-on-transparent images + file_id = image_info['filesystem_id'] dims = yield self.media_repo._generate_thumbnails( - None, image_info['filesystem_id'], image_info, url_cache=True, + None, file_id, file_id, image_info, url_cache=True, ) if dims: og["og:image:width"] = dims['width'] From 6368e5c0ab39e43ee6950ba94a826c44db7d43f7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jan 2018 16:17:38 +0000 Subject: [PATCH 068/348] Change _generate_thumbnails to take media_type --- synapse/rest/media/v1/media_repository.py | 11 ++++++----- synapse/rest/media/v1/preview_url_resource.py | 6 ++++-- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 35c1dcbc97..e08cf8af46 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -151,7 +151,9 @@ class MediaRepository(object): "media_length": content_length, } - yield self._generate_thumbnails(None, media_id, media_id, media_info) + yield self._generate_thumbnails( + None, media_id, media_id, media_info["media_type"], + ) defer.returnValue("mxc://%s/%s" % (self.server_name, media_id)) @@ -413,7 +415,7 @@ class MediaRepository(object): } yield self._generate_thumbnails( - server_name, media_id, file_id, media_info, + server_name, media_id, file_id, media_info["media_type"], ) defer.returnValue(media_info) @@ -525,7 +527,7 @@ class MediaRepository(object): defer.returnValue(output_path) @defer.inlineCallbacks - def _generate_thumbnails(self, server_name, media_id, file_id, media_info, + def _generate_thumbnails(self, server_name, media_id, file_id, media_type, url_cache=False): """Generate and store thumbnails for an image. @@ -534,14 +536,13 @@ class MediaRepository(object): media_id (str): The media ID of the content. (This is the same as the file_id for local content) file_id (str): Local file ID - media_info (dict) + media_type (str) url_cache (bool): If we are thumbnailing images downloaded for the URL cache, used exclusively by the url previewer Returns: Deferred[dict]: Dict with "width" and "height" keys of original image """ - media_type = media_info["media_type"] requirements = self._get_thumbnail_requirements(media_type) if not requirements: return diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 5ddf581bd2..981f01e417 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -187,7 +187,8 @@ class PreviewUrlResource(Resource): if _is_media(media_info['media_type']): file_id = media_info['filesystem_id'] dims = yield self.media_repo._generate_thumbnails( - None, file_id, file_id, media_info, url_cache=True, + None, file_id, file_id, media_info["media_type"], + url_cache=True, ) og = { @@ -234,7 +235,8 @@ class PreviewUrlResource(Resource): # TODO: make sure we don't choke on white-on-transparent images file_id = image_info['filesystem_id'] dims = yield self.media_repo._generate_thumbnails( - None, file_id, file_id, image_info, url_cache=True, + None, file_id, file_id, image_info["media_type"], + url_cache=True, ) if dims: og["og:image:width"] = dims['width'] From d863f68cab9665335b1657feeab5d00724dddd95 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jan 2018 16:24:15 +0000 Subject: [PATCH 069/348] Use local vars --- synapse/rest/media/v1/media_repository.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index e08cf8af46..22f86781f7 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -146,13 +146,9 @@ class MediaRepository(object): media_length=content_length, user_id=auth_user, ) - media_info = { - "media_type": media_type, - "media_length": content_length, - } yield self._generate_thumbnails( - None, media_id, media_id, media_info["media_type"], + None, media_id, media_id, media_type, ) defer.returnValue("mxc://%s/%s" % (self.server_name, media_id)) @@ -415,7 +411,7 @@ class MediaRepository(object): } yield self._generate_thumbnails( - server_name, media_id, file_id, media_info["media_type"], + server_name, media_id, file_id, media_type, ) defer.returnValue(media_info) From 5e97ca7ee61761f6826fd1c4220877dc787d88dc Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 16 Jan 2018 16:52:31 +0000 Subject: [PATCH 070/348] fix typo --- synapse/config/tls.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/config/tls.py b/synapse/config/tls.py index 4748f71c2f..29eb012ddb 100644 --- a/synapse/config/tls.py +++ b/synapse/config/tls.py @@ -96,7 +96,7 @@ class TlsConfig(Config): # certificates returned by this server match one of the fingerprints. # # Synapse automatically adds the fingerprint of its own certificate - # to the list. So if federation traffic is handle directly by synapse + # to the list. So if federation traffic is handled directly by synapse # then no modification to the list is required. # # If synapse is run behind a load balancer that handles the TLS then it From 8615f19d20f4a2048773b60ce840aab48f3e11b0 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 16 Jan 2018 16:17:24 +0000 Subject: [PATCH 071/348] rework runInteraction in terms of runConnection ... so that we can share the code --- synapse/storage/_base.py | 51 ++++++++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 20 deletions(-) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index b971f0cb18..986617674c 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -291,33 +291,33 @@ class SQLBaseStore(object): @defer.inlineCallbacks def runInteraction(self, desc, func, *args, **kwargs): - """Wraps the .runInteraction() method on the underlying db_pool.""" - current_context = LoggingContext.current_context() + """Starts a transaction on the database and runs a given function - start_time = time.time() * 1000 + Arguments: + desc (str): description of the transaction, for logging and metrics + func (func): callback function, which will be called with a + database transaction (twisted.enterprise.adbapi.Transaction) as + its first argument, followed by `args` and `kwargs`. + + args (list): positional args to pass to `func` + kwargs (dict): named args to pass to `func` + + Returns: + Deferred: The result of func + """ + current_context = LoggingContext.current_context() after_callbacks = [] final_callbacks = [] def inner_func(conn, *args, **kwargs): - with LoggingContext("runInteraction") as context: - sql_scheduling_timer.inc_by(time.time() * 1000 - start_time) - - if self.database_engine.is_connection_closed(conn): - logger.debug("Reconnecting closed database connection") - conn.reconnect() - - current_context.copy_to(context) - return self._new_transaction( - conn, desc, after_callbacks, final_callbacks, current_context, - func, *args, **kwargs - ) + return self._new_transaction( + conn, desc, after_callbacks, final_callbacks, current_context, + func, *args, **kwargs + ) try: - with PreserveLoggingContext(): - result = yield self._db_pool.runWithConnection( - inner_func, *args, **kwargs - ) + result = yield self.runWithConnection(inner_func, *args, **kwargs) for after_callback, after_args, after_kwargs in after_callbacks: after_callback(*after_args, **after_kwargs) @@ -329,7 +329,18 @@ class SQLBaseStore(object): @defer.inlineCallbacks def runWithConnection(self, func, *args, **kwargs): - """Wraps the .runInteraction() method on the underlying db_pool.""" + """Wraps the .runWithConnection() method on the underlying db_pool. + + Arguments: + func (func): callback function, which will be called with a + database connection (twisted.enterprise.adbapi.Connection) as + its first argument, followed by `args` and `kwargs`. + args (list): positional args to pass to `func` + kwargs (dict): named args to pass to `func` + + Returns: + Deferred: The result of func + """ current_context = LoggingContext.current_context() start_time = time.time() * 1000 From 3d12d97415ac6d6a4ab8188af31c7df12c5d19f8 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 12 Jan 2018 00:27:14 +0000 Subject: [PATCH 072/348] Track DB scheduling delay per-request For each request, track the amount of time spent waiting for a db connection. This entails adding it to the LoggingContext and we may as well add metrics for it while we are passing. --- synapse/http/server.py | 7 +++++++ synapse/http/site.py | 4 +++- synapse/storage/_base.py | 4 +++- synapse/util/logcontext.py | 18 +++++++++++++++++- synapse/util/metrics.py | 14 +++++++++++++- 5 files changed, 43 insertions(+), 4 deletions(-) diff --git a/synapse/http/server.py b/synapse/http/server.py index 0f30e6fd56..7b6418bc2c 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -102,6 +102,10 @@ response_db_txn_duration = metrics.register_counter( ), ) +# seconds spent waiting for a db connection, when processing this request +response_db_sched_duration = metrics.register_counter( + "response_db_sched_duration_seconds", labels=["method", "servlet", "tag"] +) _next_request_id = 0 @@ -381,6 +385,9 @@ class RequestMetrics(object): response_db_txn_duration.inc_by( context.db_txn_duration_ms / 1000., request.method, self.name, tag ) + response_db_sched_duration.inc_by( + context.db_sched_duration_ms / 1000., request.method, self.name, tag + ) class RootRedirect(resource.Resource): diff --git a/synapse/http/site.py b/synapse/http/site.py index dc64f0f6f5..e422c8dfae 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -67,13 +67,14 @@ class SynapseRequest(Request): ru_utime, ru_stime = context.get_resource_usage() db_txn_count = context.db_txn_count db_txn_duration_ms = context.db_txn_duration_ms + db_sched_duration_ms = context.db_sched_duration_ms except Exception: ru_utime, ru_stime = (0, 0) db_txn_count, db_txn_duration_ms = (0, 0) self.site.access_logger.info( "%s - %s - {%s}" - " Processed request: %dms (%dms, %dms) (%dms/%d)" + " Processed request: %dms (%dms, %dms) (%dms/%dms/%d)" " %sB %s \"%s %s %s\" \"%s\"", self.getClientIP(), self.site.site_tag, @@ -81,6 +82,7 @@ class SynapseRequest(Request): int(time.time() * 1000) - self.start_time, int(ru_utime * 1000), int(ru_stime * 1000), + db_sched_duration_ms, db_txn_duration_ms, int(db_txn_count), self.sentLength, diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 986617674c..68125006eb 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -347,7 +347,9 @@ class SQLBaseStore(object): def inner_func(conn, *args, **kwargs): with LoggingContext("runWithConnection") as context: - sql_scheduling_timer.inc_by(time.time() * 1000 - start_time) + sched_duration_ms = time.time() * 1000 - start_time + sql_scheduling_timer.inc_by(sched_duration_ms) + current_context.add_database_scheduled(sched_duration_ms) if self.database_engine.is_connection_closed(conn): logger.debug("Reconnecting closed database connection") diff --git a/synapse/util/logcontext.py b/synapse/util/logcontext.py index a78e53812f..94fa7cac98 100644 --- a/synapse/util/logcontext.py +++ b/synapse/util/logcontext.py @@ -59,7 +59,8 @@ class LoggingContext(object): __slots__ = [ "previous_context", "name", "ru_stime", "ru_utime", - "db_txn_count", "db_txn_duration_ms", "usage_start", "usage_end", + "db_txn_count", "db_txn_duration_ms", "db_sched_duration_ms", + "usage_start", "usage_end", "main_thread", "alive", "request", "tag", ] @@ -86,6 +87,9 @@ class LoggingContext(object): def add_database_transaction(self, duration_ms): pass + def add_database_scheduled(self, sched_ms): + pass + def __nonzero__(self): return False @@ -101,6 +105,9 @@ class LoggingContext(object): # ms spent waiting for db txns, excluding scheduling time self.db_txn_duration_ms = 0 + # ms spent waiting for db txns to be scheduled + self.db_sched_duration_ms = 0 + self.usage_start = None self.usage_end = None self.main_thread = threading.current_thread() @@ -210,6 +217,15 @@ class LoggingContext(object): self.db_txn_count += 1 self.db_txn_duration_ms += duration_ms + def add_database_scheduled(self, sched_ms): + """Record a use of the database pool + + Args: + sched_ms (int): number of milliseconds it took us to get a + connection + """ + self.db_sched_duration_ms += sched_ms + class LoggingContextFilter(logging.Filter): """Logging filter that adds values from the current logging context to each diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py index d25629cc50..059bb7fedf 100644 --- a/synapse/util/metrics.py +++ b/synapse/util/metrics.py @@ -80,6 +80,11 @@ block_db_txn_duration = metrics.register_counter( ), ) +# seconds spent waiting for a db connection, in this block +block_db_sched_duration = metrics.register_counter( + "block_db_sched_duration_seconds", labels=["block_name"], +) + def measure_func(name): def wrapper(func): @@ -96,7 +101,9 @@ def measure_func(name): class Measure(object): __slots__ = [ "clock", "name", "start_context", "start", "new_context", "ru_utime", - "ru_stime", "db_txn_count", "db_txn_duration_ms", "created_context" + "ru_stime", + "db_txn_count", "db_txn_duration_ms", "db_sched_duration_ms", + "created_context", ] def __init__(self, clock, name): @@ -117,6 +124,7 @@ class Measure(object): self.ru_utime, self.ru_stime = self.start_context.get_resource_usage() self.db_txn_count = self.start_context.db_txn_count self.db_txn_duration_ms = self.start_context.db_txn_duration_ms + self.db_sched_duration_ms = self.start_context.db_sched_duration_ms def __exit__(self, exc_type, exc_val, exc_tb): if isinstance(exc_type, Exception) or not self.start_context: @@ -149,6 +157,10 @@ class Measure(object): (context.db_txn_duration_ms - self.db_txn_duration_ms) / 1000., self.name ) + block_db_sched_duration.inc_by( + (context.db_sched_duration_ms - self.db_sched_duration_ms) / 1000., + self.name + ) if self.created_context: self.start_context.__exit__(exc_type, exc_val, exc_tb) From 936482d507ffbf59d9aee58f851ec6ff2a120424 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 16 Jan 2018 17:58:16 +0000 Subject: [PATCH 073/348] Fix 'NoneType' object has no attribute 'writeHeaders' Avoid throwing a (harmless) exception when we try to write an error response to an http request where the client has disconnected. This comes up as a CRITICAL error in the logs which tends to mislead people into thinking there's an actual problem --- synapse/http/server.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/synapse/http/server.py b/synapse/http/server.py index 269b65ca41..8077e22794 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -316,15 +316,6 @@ class JsonResource(HttpServer, resource.Resource): def _send_response(self, request, code, response_json_object, response_code_message=None): - # could alternatively use request.notifyFinish() and flip a flag when - # the Deferred fires, but since the flag is RIGHT THERE it seems like - # a waste. - if request._disconnected: - logger.warn( - "Not sending response to request %s, already disconnected.", - request) - return - outgoing_responses_counter.inc(request.method, str(code)) # TODO: Only enable CORS for the requests that need it. @@ -400,6 +391,15 @@ class RootRedirect(resource.Resource): def respond_with_json(request, code, json_object, send_cors=False, response_code_message=None, pretty_print=False, version_string="", canonical_json=True): + # could alternatively use request.notifyFinish() and flip a flag when + # the Deferred fires, but since the flag is RIGHT THERE it seems like + # a waste. + if request._disconnected: + logger.warn( + "Not sending response to request %s, already disconnected.", + request) + return + if pretty_print: json_bytes = encode_pretty_printed_json(json_object) + "\n" else: From d728c47142019da2896bba7a84ac92e9959fd7af Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 17 Jan 2018 10:06:14 +0000 Subject: [PATCH 074/348] Add docstring --- synapse/rest/media/v1/media_repository.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 22f86781f7..97c82c150e 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -532,7 +532,7 @@ class MediaRepository(object): media_id (str): The media ID of the content. (This is the same as the file_id for local content) file_id (str): Local file ID - media_type (str) + media_type (str): The content type of the file url_cache (bool): If we are thumbnailing images downloaded for the URL cache, used exclusively by the url previewer From 05f98a22249974ce40a461d12da93af0bc624319 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 12 Jan 2018 16:42:43 +0000 Subject: [PATCH 075/348] Keep track of last access time for local media --- synapse/rest/media/v1/media_repository.py | 32 +++++++++++++++---- synapse/storage/media_repository.py | 23 +++++++++++-- synapse/storage/prepare_database.py | 2 +- .../schema/delta/47/last_access_media.sql | 19 +++++++++++ 4 files changed, 65 insertions(+), 11 deletions(-) create mode 100644 synapse/storage/schema/delta/47/last_access_media.sql diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 97c82c150e..b2c76440b7 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -53,7 +53,7 @@ import urlparse logger = logging.getLogger(__name__) -UPDATE_RECENTLY_ACCESSED_REMOTES_TS = 60 * 1000 +UPDATE_RECENTLY_ACCESSED_TS = 60 * 1000 class MediaRepository(object): @@ -75,6 +75,7 @@ class MediaRepository(object): self.remote_media_linearizer = Linearizer(name="media_remote") self.recently_accessed_remotes = set() + self.recently_accessed_locals = set() # List of StorageProviders where we should search for media and # potentially upload to. @@ -99,19 +100,34 @@ class MediaRepository(object): ) self.clock.looping_call( - self._update_recently_accessed_remotes, - UPDATE_RECENTLY_ACCESSED_REMOTES_TS + self._update_recently_accessed, + UPDATE_RECENTLY_ACCESSED_TS, ) @defer.inlineCallbacks - def _update_recently_accessed_remotes(self): - media = self.recently_accessed_remotes + def _update_recently_accessed(self): + remote_media = self.recently_accessed_remotes self.recently_accessed_remotes = set() + local_media = self.recently_accessed_locals + self.recently_accessed_locals = set() + yield self.store.update_cached_last_access_time( - media, self.clock.time_msec() + local_media, remote_media, self.clock.time_msec() ) + def mark_recently_accessed(self, server_name, media_id): + """Mark the given media as recently accessed. + + Args: + server_name (str|None): Origin server of media, or None if local + media_id (str): The media ID of the content + """ + if server_name: + self.recently_accessed_remotes.add((server_name, media_id)) + else: + self.recently_accessed_locals.add(media_id) + @defer.inlineCallbacks def create_content(self, media_type, upload_name, content, content_length, auth_user): @@ -173,6 +189,8 @@ class MediaRepository(object): respond_404(request) return + self.mark_recently_accessed(None, media_id) + media_type = media_info["media_type"] media_length = media_info["media_length"] upload_name = name if name else media_info["upload_name"] @@ -204,7 +222,7 @@ class MediaRepository(object): Deferred: Resolves once a response has successfully been written to request """ - self.recently_accessed_remotes.add((server_name, media_id)) + self.mark_recently_accessed(server_name, media_id) # We linearize here to ensure that we don't try and download remote # media multiple times concurrently diff --git a/synapse/storage/media_repository.py b/synapse/storage/media_repository.py index 6ebc372498..e6cdbb0545 100644 --- a/synapse/storage/media_repository.py +++ b/synapse/storage/media_repository.py @@ -173,7 +173,14 @@ class MediaRepositoryStore(BackgroundUpdateStore): desc="store_cached_remote_media", ) - def update_cached_last_access_time(self, origin_id_tuples, time_ts): + def update_cached_last_access_time(self, local_media, remote_media, time_ms): + """Updates the last access time of the given media + + Args: + local_media (iterable[str]): Set of media_ids + remote_media (iterable[(str, str)]): Set of (server_name, media_id) + time_ms: Current time in milliseconds + """ def update_cache_txn(txn): sql = ( "UPDATE remote_media_cache SET last_access_ts = ?" @@ -181,8 +188,18 @@ class MediaRepositoryStore(BackgroundUpdateStore): ) txn.executemany(sql, ( - (time_ts, media_origin, media_id) - for media_origin, media_id in origin_id_tuples + (time_ms, media_origin, media_id) + for media_origin, media_id in remote_media + )) + + sql = ( + "UPDATE local_media_repository SET last_access_ts = ?" + " WHERE media_id = ?" + ) + + txn.executemany(sql, ( + (time_ms, media_id) + for media_id in local_media )) return self.runInteraction("update_cached_last_access_time", update_cache_txn) diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index d1691bbac2..c845a0cec5 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -25,7 +25,7 @@ logger = logging.getLogger(__name__) # Remember to update this number every time a change is made to database # schema files, so the users will be informed on server restarts. -SCHEMA_VERSION = 46 +SCHEMA_VERSION = 47 dir_path = os.path.abspath(os.path.dirname(__file__)) diff --git a/synapse/storage/schema/delta/47/last_access_media.sql b/synapse/storage/schema/delta/47/last_access_media.sql new file mode 100644 index 0000000000..bc754ac861 --- /dev/null +++ b/synapse/storage/schema/delta/47/last_access_media.sql @@ -0,0 +1,19 @@ +/* Copyright 2018 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- drop the unique constraint on deleted_pushers so that we can just insert +-- into it rather than upserting. + +ALTER TABLE local_media_repository ADD COLUMN last_access_ts BIGINT; From 300edc23482fbc637a64a9e1cc1235a1fa7f9562 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 15 Jan 2018 11:53:04 +0000 Subject: [PATCH 076/348] Update last access time when thumbnails are viewed --- synapse/rest/media/v1/thumbnail_resource.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index 8c9653843b..c09f2dec41 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -67,6 +67,7 @@ class ThumbnailResource(Resource): yield self._respond_local_thumbnail( request, media_id, width, height, method, m_type ) + self.media_repo.mark_recently_accessed(server_name, media_id) else: if self.dynamic_thumbnails: yield self._select_or_generate_remote_thumbnail( @@ -78,6 +79,7 @@ class ThumbnailResource(Resource): request, server_name, media_id, width, height, method, m_type ) + self.media_repo.mark_recently_accessed(None, media_id) @defer.inlineCallbacks def _respond_local_thumbnail(self, request, media_id, width, height, From a5213df1f7448c1810e6ae842448c81abe57ba69 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 16 Jan 2018 18:25:28 +0000 Subject: [PATCH 077/348] Sanity checking for user ids Check the user_id passed to a couple of APIs for validity, to avoid "IndexError: list index out of range" exception which looks scary and results in a 500 rather than a more useful error. Fixes #1432, among other things --- synapse/handlers/devicemessage.py | 14 +++++++++++--- synapse/handlers/e2e_keys.py | 13 ++++++++----- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/synapse/handlers/devicemessage.py b/synapse/handlers/devicemessage.py index f7fad15c62..d996aa90bb 100644 --- a/synapse/handlers/devicemessage.py +++ b/synapse/handlers/devicemessage.py @@ -17,7 +17,8 @@ import logging from twisted.internet import defer -from synapse.types import get_domain_from_id +from synapse.api.errors import SynapseError +from synapse.types import get_domain_from_id, UserID from synapse.util.stringutils import random_string @@ -33,7 +34,7 @@ class DeviceMessageHandler(object): """ self.store = hs.get_datastore() self.notifier = hs.get_notifier() - self.is_mine_id = hs.is_mine_id + self.is_mine = hs.is_mine self.federation = hs.get_federation_sender() hs.get_replication_layer().register_edu_handler( @@ -52,6 +53,12 @@ class DeviceMessageHandler(object): message_type = content["type"] message_id = content["message_id"] for user_id, by_device in content["messages"].items(): + # we use UserID.from_string to catch invalid user ids + if not self.is_mine(UserID.from_string(user_id)): + logger.warning("Request for keys for non-local user %s", + user_id) + raise SynapseError(400, "Not a user here") + messages_by_device = { device_id: { "content": message_content, @@ -77,7 +84,8 @@ class DeviceMessageHandler(object): local_messages = {} remote_messages = {} for user_id, by_device in messages.items(): - if self.is_mine_id(user_id): + # we use UserID.from_string to catch invalid user ids + if self.is_mine(UserID.from_string(user_id)): messages_by_device = { device_id: { "content": message_content, diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 668a90e495..5af8abf66b 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -20,7 +20,7 @@ from canonicaljson import encode_canonical_json from twisted.internet import defer from synapse.api.errors import SynapseError, CodeMessageException -from synapse.types import get_domain_from_id +from synapse.types import get_domain_from_id, UserID from synapse.util.logcontext import preserve_fn, make_deferred_yieldable from synapse.util.retryutils import NotRetryingDestination @@ -32,7 +32,7 @@ class E2eKeysHandler(object): self.store = hs.get_datastore() self.federation = hs.get_replication_layer() self.device_handler = hs.get_device_handler() - self.is_mine_id = hs.is_mine_id + self.is_mine = hs.is_mine self.clock = hs.get_clock() # doesn't really work as part of the generic query API, because the @@ -70,7 +70,8 @@ class E2eKeysHandler(object): remote_queries = {} for user_id, device_ids in device_keys_query.items(): - if self.is_mine_id(user_id): + # we use UserID.from_string to catch invalid user ids + if self.is_mine(UserID.from_string(user_id)): local_query[user_id] = device_ids else: remote_queries[user_id] = device_ids @@ -170,7 +171,8 @@ class E2eKeysHandler(object): result_dict = {} for user_id, device_ids in query.items(): - if not self.is_mine_id(user_id): + # we use UserID.from_string to catch invalid user ids + if not self.is_mine(UserID.from_string(user_id)): logger.warning("Request for keys for non-local user %s", user_id) raise SynapseError(400, "Not a user here") @@ -213,7 +215,8 @@ class E2eKeysHandler(object): remote_queries = {} for user_id, device_keys in query.get("one_time_keys", {}).items(): - if self.is_mine_id(user_id): + # we use UserID.from_string to catch invalid user ids + if self.is_mine(UserID.from_string(user_id)): for device_id, algorithm in device_keys.items(): local_query.append((user_id, device_id, algorithm)) else: From 2fb3a28c9894f2cd1ed2ba3404a519d0bbd754cc Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 17 Jan 2018 14:59:44 +0000 Subject: [PATCH 078/348] Remove lost comment --- synapse/storage/schema/delta/47/last_access_media.sql | 3 --- 1 file changed, 3 deletions(-) diff --git a/synapse/storage/schema/delta/47/last_access_media.sql b/synapse/storage/schema/delta/47/last_access_media.sql index bc754ac861..f505fb22b5 100644 --- a/synapse/storage/schema/delta/47/last_access_media.sql +++ b/synapse/storage/schema/delta/47/last_access_media.sql @@ -13,7 +13,4 @@ * limitations under the License. */ --- drop the unique constraint on deleted_pushers so that we can just insert --- into it rather than upserting. - ALTER TABLE local_media_repository ADD COLUMN last_access_ts BIGINT; From 390093d45e1951b1a1d8a034667d2e84b3bf064d Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 17 Jan 2018 15:44:31 +0000 Subject: [PATCH 079/348] Split resolve_events into two functions ... so that the return type doesn't depend on the arg types --- synapse/state.py | 45 +++++++++++++++++++++++---------------- synapse/storage/events.py | 4 ++-- 2 files changed, 29 insertions(+), 20 deletions(-) diff --git a/synapse/state.py b/synapse/state.py index 9e624b4937..1f9abf9d3d 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -341,7 +341,7 @@ class StateHandler(object): if conflicted_state: logger.info("Resolving conflicted state for %r", room_id) with Measure(self.clock, "state._resolve_events"): - new_state = yield resolve_events( + new_state = yield resolve_events_with_factory( state_groups_ids.values(), state_map_factory=lambda ev_ids: self.store.get_events( ev_ids, get_prev_content=False, check_redacted=False, @@ -404,7 +404,7 @@ class StateHandler(object): } with Measure(self.clock, "state._resolve_events"): - new_state = resolve_events(state_set_ids, state_map) + new_state = resolve_events_with_state_map(state_set_ids, state_map) new_state = { key: state_map[ev_id] for key, ev_id in new_state.items() @@ -420,19 +420,17 @@ def _ordered_events(events): return sorted(events, key=key_func) -def resolve_events(state_sets, state_map_factory): +def resolve_events_with_state_map(state_sets, state_map): """ Args: state_sets(list): List of dicts of (type, state_key) -> event_id, which are the different state groups to resolve. - state_map_factory(dict|callable): If callable, then will be called - with a list of event_ids that are needed, and should return with - a Deferred of dict of event_id to event. Otherwise, should be - a dict from event_id to event of all events in state_sets. + state_map(dict): a dict from event_id to event, for all events in + state_sets. Returns - dict[(str, str), synapse.events.FrozenEvent] is a map from - (type, state_key) to event. + dict[(str, str), synapse.events.FrozenEvent]: + a map from (type, state_key) to event. """ if len(state_sets) == 1: return state_sets[0] @@ -441,13 +439,6 @@ def resolve_events(state_sets, state_map_factory): state_sets, ) - if callable(state_map_factory): - return _resolve_with_state_fac( - unconflicted_state, conflicted_state, state_map_factory - ) - - state_map = state_map_factory - auth_events = _create_auth_events_from_maps( unconflicted_state, conflicted_state, state_map ) @@ -491,8 +482,26 @@ def _seperate(state_sets): @defer.inlineCallbacks -def _resolve_with_state_fac(unconflicted_state, conflicted_state, - state_map_factory): +def resolve_events_with_factory(state_sets, state_map_factory): + """ + Args: + state_sets(list): List of dicts of (type, state_key) -> event_id, + which are the different state groups to resolve. + state_map_factory(func): will be called + with a list of event_ids that are needed, and should return with + a Deferred of dict of event_id to event. + + Returns + Deferred[dict[(str, str), synapse.events.FrozenEvent]]: + a map from (type, state_key) to event. + """ + if len(state_sets) == 1: + defer.returnValue(state_sets[0]) + + unconflicted_state, conflicted_state = _seperate( + state_sets, + ) + needed_events = set( event_id for event_ids in conflicted_state.itervalues() diff --git a/synapse/storage/events.py b/synapse/storage/events.py index ad1d782705..c5292a5311 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -27,7 +27,7 @@ from synapse.util.logutils import log_function from synapse.util.metrics import Measure from synapse.api.constants import EventTypes from synapse.api.errors import SynapseError -from synapse.state import resolve_events +from synapse.state import resolve_events_with_factory from synapse.util.caches.descriptors import cached from synapse.types import get_domain_from_id @@ -557,7 +557,7 @@ class EventsStore(SQLBaseStore): to_return.update(evs) defer.returnValue(to_return) - current_state = yield resolve_events( + current_state = yield resolve_events_with_factory( state_sets, state_map_factory=get_events, ) From 2d9ab533f9faa3f98eea166b05d3a0fb7fc2f80c Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Wed, 17 Jan 2018 15:58:52 +0000 Subject: [PATCH 080/348] fix SQL when searching all users --- synapse/storage/user_directory.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index c9bff408ef..f150ef0103 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -641,8 +641,13 @@ class UserDirectoryStore(SQLBaseStore): """ if self.hs.config.user_directory_search_all_users: - join_clause = "" - where_clause = "?<>''" # naughty hack to keep the same number of binds + # dummy to keep the number of binds & aliases the same + join_clause = """ + LEFT JOIN ( + SELECT NULL as user_id WHERE NULL = ? + ) AS s USING (user_id)" + """ + where_clause = "" else: join_clause = """ LEFT JOIN users_in_public_rooms AS p USING (user_id) From bc67e7d260631d3fa7bc78653376e15dc0771364 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 17 Jan 2018 16:43:03 +0000 Subject: [PATCH 081/348] Add decent impl of a FileConsumer Twisted core doesn't have a general purpose one, so we need to write one ourselves. Features: - All writing happens in background thread - Supports both push and pull producers - Push producers get paused if the consumer falls behind --- synapse/util/file_consumer.py | 158 +++++++++++++++++++++++++++++++ tests/util/test_file_consumer.py | 138 +++++++++++++++++++++++++++ 2 files changed, 296 insertions(+) create mode 100644 synapse/util/file_consumer.py create mode 100644 tests/util/test_file_consumer.py diff --git a/synapse/util/file_consumer.py b/synapse/util/file_consumer.py new file mode 100644 index 0000000000..de478fcb3e --- /dev/null +++ b/synapse/util/file_consumer.py @@ -0,0 +1,158 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vecotr Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet import defer, threads, reactor + +from synapse.util.logcontext import make_deferred_yieldable + +import Queue + + +class BackgroundFileConsumer(object): + """A consumer that writes to a file like object. Supports both push + and pull producers + + Args: + file_obj (file): The file like object to write to. Closed when + finished. + """ + + # For PushProducers pause if we have this many unwritten slices + _PAUSE_ON_QUEUE_SIZE = 5 + # And resume once the size of the queue is less than this + _RESUME_ON_QUEUE_SIZE = 2 + + def __init__(self, file_obj): + self.file_obj = file_obj + + # Producer we're registered with + self.producer = None + + # True if PushProducer, false if PullProducer + self.streaming = False + + # Queue of slices of bytes to be written. When producer calls + # unregister a final None is sent. + self.bytes_queue = Queue.Queue() + + # Deferred that is resolved when finished writing + self.finished_deferred = None + + # If the _writer thread throws an exception it gets stored here. + self._write_exception = None + + # A deferred that gets resolved when the bytes_queue gets empty. + # Mainly used for tests. + self._notify_empty_deferred = None + + def registerProducer(self, producer, streaming): + """Part of IProducer interface + + Args: + producer (IProducer) + streaming (bool): True if push based producer, False if pull + based. + """ + self.producer = producer + self.streaming = streaming + self.finished_deferred = threads.deferToThread(self._writer) + if not streaming: + self.producer.resumeProducing() + + self.paused_producer = False + + def unregisterProducer(self): + """Part of IProducer interface + """ + self.producer = None + if not self.finished_deferred.called: + self.bytes_queue.put_nowait(None) + + def write(self, bytes): + """Part of IProducer interface + """ + if self._write_exception: + raise self._write_exception + + if self.finished_deferred.called: + raise Exception("consumer has closed") + + self.bytes_queue.put_nowait(bytes) + + # If this is a pushed based consumer and the queue is getting behind + # then we pause the producer. + if self.streaming and self.bytes_queue.qsize() >= self._PAUSE_ON_QUEUE_SIZE: + self.paused_producer = True + self.producer.pauseProducing() + + def _writer(self): + """This is run in a background thread to write to the file. + """ + try: + while self.producer or not self.bytes_queue.empty(): + # If we've paused the producer check if we should resume the + # producer. + if self.producer and self.paused_producer: + if self.bytes_queue.qsize() <= self._RESUME_ON_QUEUE_SIZE: + reactor.callFromThread(self._resume_paused_producer) + + if self._notify_empty and self.bytes_queue.empty(): + reactor.callFromThread(self._notify_empty) + + bytes = self.bytes_queue.get() + + # If we get a None (or empty list) then that's a signal used + # to indicate we should check if we should stop. + if bytes: + self.file_obj.write(bytes) + + # If its a pull producer then we need to explicitly ask for + # more stuff. + if not self.streaming and self.producer: + reactor.callFromThread(self.producer.resumeProducing) + except Exception as e: + self._write_exception = e + raise + finally: + self.file_obj.close() + + def wait(self): + """Returns a deferred that resolves when finished writing to file + """ + return make_deferred_yieldable(self.finished_deferred) + + def _resume_paused_producer(self): + """Gets called if we should resume producing after being paused + """ + if self.paused_producer and self.producer: + self.paused_producer = False + self.producer.resumeProducing() + + def _notify_empty(self): + """Called when the _writer thread thinks the queue may be empty and + we should notify anything waiting on `wait_for_writes` + """ + if self._notify_empty_deferred and self.bytes_queue.empty(): + d = self._notify_empty_deferred + self._notify_empty_deferred = None + d.callback(None) + + def wait_for_writes(self): + """Wait for the write queue to be empty and for writes to have + finished. This is mainly useful for tests. + """ + if not self._notify_empty_deferred: + self._notify_empty_deferred = defer.Deferred() + return self._notify_empty_deferred diff --git a/tests/util/test_file_consumer.py b/tests/util/test_file_consumer.py new file mode 100644 index 0000000000..8acb68f0c3 --- /dev/null +++ b/tests/util/test_file_consumer.py @@ -0,0 +1,138 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from twisted.internet import defer +from mock import NonCallableMock + +from synapse.util.file_consumer import BackgroundFileConsumer + +from tests import unittest +from StringIO import StringIO + +import threading + + +class FileConsumerTests(unittest.TestCase): + + @defer.inlineCallbacks + def test_pull_consumer(self): + string_file = StringIO() + consumer = BackgroundFileConsumer(string_file) + + try: + producer = DummyPullProducer() + + yield producer.register_with_consumer(consumer) + + yield producer.write_and_wait("Foo") + + self.assertEqual(string_file.getvalue(), "Foo") + + yield producer.write_and_wait("Bar") + + self.assertEqual(string_file.getvalue(), "FooBar") + finally: + consumer.unregisterProducer() + + yield consumer.wait() + + self.assertTrue(string_file.closed) + + @defer.inlineCallbacks + def test_push_consumer(self): + string_file = StringIO() + consumer = BackgroundFileConsumer(string_file) + + try: + producer = NonCallableMock(spec_set=[]) + + consumer.registerProducer(producer, True) + + consumer.write("Foo") + yield consumer.wait_for_writes() + + self.assertEqual(string_file.getvalue(), "Foo") + + consumer.write("Bar") + yield consumer.wait_for_writes() + + self.assertEqual(string_file.getvalue(), "FooBar") + finally: + consumer.unregisterProducer() + + yield consumer.wait() + + self.assertTrue(string_file.closed) + + @defer.inlineCallbacks + def test_push_producer_feedback(self): + string_file = BlockingStringWrite() + consumer = BackgroundFileConsumer(string_file) + + try: + producer = NonCallableMock(spec_set=["pauseProducing", "resumeProducing"]) + + consumer.registerProducer(producer, True) + + with string_file.write_lock: + for _ in range(consumer._PAUSE_ON_QUEUE_SIZE): + consumer.write("Foo") + + producer.pauseProducing.assert_called_once() + + yield consumer.wait_for_writes() + producer.resumeProducing.assert_called_once() + finally: + consumer.unregisterProducer() + + yield consumer.wait() + + self.assertTrue(string_file.closed) + + +class DummyPullProducer(object): + def __init__(self): + self.consumer = None + self.deferred = defer.Deferred() + + def resumeProducing(self): + d = self.deferred + self.deferred = defer.Deferred() + d.callback(None) + + def write_and_wait(self, bytes): + d = self.deferred + self.consumer.write(bytes) + return d + + def register_with_consumer(self, consumer): + d = self.deferred + self.consumer = consumer + self.consumer.registerProducer(self, False) + return d + + +class BlockingStringWrite(object): + def __init__(self): + self.buffer = "" + self.closed = False + self.write_lock = threading.Lock() + + def write(self, bytes): + self.buffer += bytes + + def close(self): + self.closed = True From 1224612a798ce9f14f0d44e1246f87da15a959f1 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 17 Jan 2018 16:01:59 +0000 Subject: [PATCH 082/348] Log room when doing state resolution Mostly because it helps figure out what is prompting the resolution --- synapse/storage/events.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index d08f7571d7..ba0da83642 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -528,6 +528,12 @@ class EventsStore(SQLBaseStore): # the events we have yet to persist, so we need a slightly more # complicated event lookup function than simply looking the events # up in the db. + + logger.info( + "Resolving state for %s with %i state sets", + room_id, len(state_sets), + ) + events_map = {ev.event_id: ev for ev, _ in events_context} @defer.inlineCallbacks From a177325b49be4793c8ed21147f8d301a0649a2b6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Jan 2018 11:02:43 +0000 Subject: [PATCH 083/348] Fix comments --- synapse/util/file_consumer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/util/file_consumer.py b/synapse/util/file_consumer.py index de478fcb3e..5284c7967e 100644 --- a/synapse/util/file_consumer.py +++ b/synapse/util/file_consumer.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright 2018 New Vecotr Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -58,7 +58,7 @@ class BackgroundFileConsumer(object): self._notify_empty_deferred = None def registerProducer(self, producer, streaming): - """Part of IProducer interface + """Part of IConsumer interface Args: producer (IProducer) @@ -91,7 +91,7 @@ class BackgroundFileConsumer(object): self.bytes_queue.put_nowait(bytes) - # If this is a pushed based consumer and the queue is getting behind + # If this is a PushProducer and the queue is getting behind # then we pause the producer. if self.streaming and self.bytes_queue.qsize() >= self._PAUSE_ON_QUEUE_SIZE: self.paused_producer = True From 28b338ed9bafc2017a635848e14a2a25b78d0016 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Jan 2018 11:04:41 +0000 Subject: [PATCH 084/348] Move definition of paused_producer to __init__ --- synapse/util/file_consumer.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/synapse/util/file_consumer.py b/synapse/util/file_consumer.py index 5284c7967e..54c9da9573 100644 --- a/synapse/util/file_consumer.py +++ b/synapse/util/file_consumer.py @@ -43,6 +43,10 @@ class BackgroundFileConsumer(object): # True if PushProducer, false if PullProducer self.streaming = False + # For PushProducers, indicates whether we've paused the producer and + # need to call resumeProducing before we get more data. + self.paused_producer = False + # Queue of slices of bytes to be written. When producer calls # unregister a final None is sent. self.bytes_queue = Queue.Queue() @@ -71,8 +75,6 @@ class BackgroundFileConsumer(object): if not streaming: self.producer.resumeProducing() - self.paused_producer = False - def unregisterProducer(self): """Part of IProducer interface """ From 17b54389feb3855a33406149a8a59f0327bb3ad1 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Jan 2018 11:05:34 +0000 Subject: [PATCH 085/348] Fix _notify_empty typo --- synapse/util/file_consumer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/util/file_consumer.py b/synapse/util/file_consumer.py index 54c9da9573..479e480614 100644 --- a/synapse/util/file_consumer.py +++ b/synapse/util/file_consumer.py @@ -110,7 +110,7 @@ class BackgroundFileConsumer(object): if self.bytes_queue.qsize() <= self._RESUME_ON_QUEUE_SIZE: reactor.callFromThread(self._resume_paused_producer) - if self._notify_empty and self.bytes_queue.empty(): + if self._notify_empty_deferred and self.bytes_queue.empty(): reactor.callFromThread(self._notify_empty) bytes = self.bytes_queue.get() From dc519602ac0f35d39a70c91f0e6057e865a61dfc Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Jan 2018 11:07:17 +0000 Subject: [PATCH 086/348] Ensure we registerProducer isn't called twice --- synapse/util/file_consumer.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/synapse/util/file_consumer.py b/synapse/util/file_consumer.py index 479e480614..d7bbb0aeb8 100644 --- a/synapse/util/file_consumer.py +++ b/synapse/util/file_consumer.py @@ -69,6 +69,9 @@ class BackgroundFileConsumer(object): streaming (bool): True if push based producer, False if pull based. """ + if self.producer: + raise Exception("registerProducer called twice") + self.producer = producer self.streaming = streaming self.finished_deferred = threads.deferToThread(self._writer) From ce236f8ac890842e105fee0df96c79f3d8ab8783 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 18 Jan 2018 11:30:49 +0000 Subject: [PATCH 087/348] better exception logging in callbackmetrics when we fail to render a metric, give a clue as to which metric it was --- synapse/metrics/metric.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/synapse/metrics/metric.py b/synapse/metrics/metric.py index f480aae614..1e783e5ff4 100644 --- a/synapse/metrics/metric.py +++ b/synapse/metrics/metric.py @@ -15,6 +15,9 @@ from itertools import chain +import logging + +logger = logging.getLogger(__name__) def flatten(items): @@ -153,7 +156,11 @@ class CallbackMetric(BaseMetric): self.callback = callback def render(self): - value = self.callback() + try: + value = self.callback() + except Exception: + logger.exception("Failed to render %s", self.name) + return ["# FAILED to render " + self.name] if self.is_scalar(): return list(self._render_for_labels([], value)) From 2f18a2647b6b9cc07c3cc5f2bec3e1bab67d0eea Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Jan 2018 11:10:12 +0000 Subject: [PATCH 088/348] Make all fields private --- synapse/util/file_consumer.py | 62 +++++++++++++++++------------------ 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/synapse/util/file_consumer.py b/synapse/util/file_consumer.py index d7bbb0aeb8..d19d48665c 100644 --- a/synapse/util/file_consumer.py +++ b/synapse/util/file_consumer.py @@ -35,24 +35,24 @@ class BackgroundFileConsumer(object): _RESUME_ON_QUEUE_SIZE = 2 def __init__(self, file_obj): - self.file_obj = file_obj + self._file_obj = file_obj # Producer we're registered with - self.producer = None + self._producer = None # True if PushProducer, false if PullProducer self.streaming = False # For PushProducers, indicates whether we've paused the producer and # need to call resumeProducing before we get more data. - self.paused_producer = False + self._paused_producer = False # Queue of slices of bytes to be written. When producer calls # unregister a final None is sent. - self.bytes_queue = Queue.Queue() + self._bytes_queue = Queue.Queue() # Deferred that is resolved when finished writing - self.finished_deferred = None + self._finished_deferred = None # If the _writer thread throws an exception it gets stored here. self._write_exception = None @@ -69,21 +69,21 @@ class BackgroundFileConsumer(object): streaming (bool): True if push based producer, False if pull based. """ - if self.producer: + if self._producer: raise Exception("registerProducer called twice") - self.producer = producer + self._producer = producer self.streaming = streaming - self.finished_deferred = threads.deferToThread(self._writer) + self._finished_deferred = threads.deferToThread(self._writer) if not streaming: - self.producer.resumeProducing() + self._producer.resumeProducing() def unregisterProducer(self): """Part of IProducer interface """ - self.producer = None - if not self.finished_deferred.called: - self.bytes_queue.put_nowait(None) + self._producer = None + if not self._finished_deferred.called: + self._bytes_queue.put_nowait(None) def write(self, bytes): """Part of IProducer interface @@ -91,65 +91,65 @@ class BackgroundFileConsumer(object): if self._write_exception: raise self._write_exception - if self.finished_deferred.called: + if self._finished_deferred.called: raise Exception("consumer has closed") - self.bytes_queue.put_nowait(bytes) + self._bytes_queue.put_nowait(bytes) # If this is a PushProducer and the queue is getting behind # then we pause the producer. - if self.streaming and self.bytes_queue.qsize() >= self._PAUSE_ON_QUEUE_SIZE: - self.paused_producer = True - self.producer.pauseProducing() + if self.streaming and self._bytes_queue.qsize() >= self._PAUSE_ON_QUEUE_SIZE: + self._paused_producer = True + self._producer.pauseProducing() def _writer(self): """This is run in a background thread to write to the file. """ try: - while self.producer or not self.bytes_queue.empty(): + while self._producer or not self._bytes_queue.empty(): # If we've paused the producer check if we should resume the # producer. - if self.producer and self.paused_producer: - if self.bytes_queue.qsize() <= self._RESUME_ON_QUEUE_SIZE: + if self._producer and self._paused_producer: + if self._bytes_queue.qsize() <= self._RESUME_ON_QUEUE_SIZE: reactor.callFromThread(self._resume_paused_producer) - if self._notify_empty_deferred and self.bytes_queue.empty(): + if self._notify_empty_deferred and self._bytes_queue.empty(): reactor.callFromThread(self._notify_empty) - bytes = self.bytes_queue.get() + bytes = self._bytes_queue.get() # If we get a None (or empty list) then that's a signal used # to indicate we should check if we should stop. if bytes: - self.file_obj.write(bytes) + self._file_obj.write(bytes) # If its a pull producer then we need to explicitly ask for # more stuff. - if not self.streaming and self.producer: - reactor.callFromThread(self.producer.resumeProducing) + if not self.streaming and self._producer: + reactor.callFromThread(self._producer.resumeProducing) except Exception as e: self._write_exception = e raise finally: - self.file_obj.close() + self._file_obj.close() def wait(self): """Returns a deferred that resolves when finished writing to file """ - return make_deferred_yieldable(self.finished_deferred) + return make_deferred_yieldable(self._finished_deferred) def _resume_paused_producer(self): """Gets called if we should resume producing after being paused """ - if self.paused_producer and self.producer: - self.paused_producer = False - self.producer.resumeProducing() + if self._paused_producer and self._producer: + self._paused_producer = False + self._producer.resumeProducing() def _notify_empty(self): """Called when the _writer thread thinks the queue may be empty and we should notify anything waiting on `wait_for_writes` """ - if self._notify_empty_deferred and self.bytes_queue.empty(): + if self._notify_empty_deferred and self._bytes_queue.empty(): d = self._notify_empty_deferred self._notify_empty_deferred = None d.callback(None) From 1432f7ccd5a01e43d0c5417f3d2f4a6a0fbf5bfb Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Jan 2018 11:53:21 +0000 Subject: [PATCH 089/348] Move test stuff to tests --- synapse/util/file_consumer.py | 26 +-------------- tests/util/test_file_consumer.py | 54 +++++++++++++++++++++++++++----- 2 files changed, 47 insertions(+), 33 deletions(-) diff --git a/synapse/util/file_consumer.py b/synapse/util/file_consumer.py index d19d48665c..3241035247 100644 --- a/synapse/util/file_consumer.py +++ b/synapse/util/file_consumer.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from twisted.internet import defer, threads, reactor +from twisted.internet import threads, reactor from synapse.util.logcontext import make_deferred_yieldable @@ -57,10 +57,6 @@ class BackgroundFileConsumer(object): # If the _writer thread throws an exception it gets stored here. self._write_exception = None - # A deferred that gets resolved when the bytes_queue gets empty. - # Mainly used for tests. - self._notify_empty_deferred = None - def registerProducer(self, producer, streaming): """Part of IConsumer interface @@ -113,9 +109,6 @@ class BackgroundFileConsumer(object): if self._bytes_queue.qsize() <= self._RESUME_ON_QUEUE_SIZE: reactor.callFromThread(self._resume_paused_producer) - if self._notify_empty_deferred and self._bytes_queue.empty(): - reactor.callFromThread(self._notify_empty) - bytes = self._bytes_queue.get() # If we get a None (or empty list) then that's a signal used @@ -144,20 +137,3 @@ class BackgroundFileConsumer(object): if self._paused_producer and self._producer: self._paused_producer = False self._producer.resumeProducing() - - def _notify_empty(self): - """Called when the _writer thread thinks the queue may be empty and - we should notify anything waiting on `wait_for_writes` - """ - if self._notify_empty_deferred and self._bytes_queue.empty(): - d = self._notify_empty_deferred - self._notify_empty_deferred = None - d.callback(None) - - def wait_for_writes(self): - """Wait for the write queue to be empty and for writes to have - finished. This is mainly useful for tests. - """ - if not self._notify_empty_deferred: - self._notify_empty_deferred = defer.Deferred() - return self._notify_empty_deferred diff --git a/tests/util/test_file_consumer.py b/tests/util/test_file_consumer.py index 8acb68f0c3..76e2234255 100644 --- a/tests/util/test_file_consumer.py +++ b/tests/util/test_file_consumer.py @@ -14,7 +14,7 @@ # limitations under the License. -from twisted.internet import defer +from twisted.internet import defer, reactor from mock import NonCallableMock from synapse.util.file_consumer import BackgroundFileConsumer @@ -53,7 +53,7 @@ class FileConsumerTests(unittest.TestCase): @defer.inlineCallbacks def test_push_consumer(self): - string_file = StringIO() + string_file = BlockingStringWrite() consumer = BackgroundFileConsumer(string_file) try: @@ -62,14 +62,14 @@ class FileConsumerTests(unittest.TestCase): consumer.registerProducer(producer, True) consumer.write("Foo") - yield consumer.wait_for_writes() + yield string_file.wait_for_n_writes(1) - self.assertEqual(string_file.getvalue(), "Foo") + self.assertEqual(string_file.buffer, "Foo") consumer.write("Bar") - yield consumer.wait_for_writes() + yield string_file.wait_for_n_writes(2) - self.assertEqual(string_file.getvalue(), "FooBar") + self.assertEqual(string_file.buffer, "FooBar") finally: consumer.unregisterProducer() @@ -85,15 +85,22 @@ class FileConsumerTests(unittest.TestCase): try: producer = NonCallableMock(spec_set=["pauseProducing", "resumeProducing"]) + resume_deferred = defer.Deferred() + producer.resumeProducing.side_effect = lambda: resume_deferred.callback(None) + consumer.registerProducer(producer, True) + number_writes = 0 with string_file.write_lock: for _ in range(consumer._PAUSE_ON_QUEUE_SIZE): consumer.write("Foo") + number_writes += 1 producer.pauseProducing.assert_called_once() - yield consumer.wait_for_writes() + yield string_file.wait_for_n_writes(number_writes) + + yield resume_deferred producer.resumeProducing.assert_called_once() finally: consumer.unregisterProducer() @@ -131,8 +138,39 @@ class BlockingStringWrite(object): self.closed = False self.write_lock = threading.Lock() + self._notify_write_deferred = None + self._number_of_writes = 0 + def write(self, bytes): - self.buffer += bytes + with self.write_lock: + self.buffer += bytes + self._number_of_writes += 1 + + reactor.callFromThread(self._notify_write) def close(self): self.closed = True + + def _notify_write(self): + "Called by write to indicate a write happened" + with self.write_lock: + if not self._notify_write_deferred: + return + d = self._notify_write_deferred + self._notify_write_deferred = None + d.callback(None) + + @defer.inlineCallbacks + def wait_for_n_writes(self, n): + "Wait for n writes to have happened" + while True: + with self.write_lock: + if n <= self._number_of_writes: + return + + if not self._notify_write_deferred: + self._notify_write_deferred = defer.Deferred() + + d = self._notify_write_deferred + + yield d From be0dfcd4a29859f4c707c2b3cf1da38c5115d251 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Jan 2018 11:57:23 +0000 Subject: [PATCH 090/348] Do logcontexts correctly --- synapse/util/file_consumer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/util/file_consumer.py b/synapse/util/file_consumer.py index 3241035247..90a2608d6f 100644 --- a/synapse/util/file_consumer.py +++ b/synapse/util/file_consumer.py @@ -15,7 +15,7 @@ from twisted.internet import threads, reactor -from synapse.util.logcontext import make_deferred_yieldable +from synapse.util.logcontext import make_deferred_yieldable, preserve_fn import Queue @@ -70,7 +70,7 @@ class BackgroundFileConsumer(object): self._producer = producer self.streaming = streaming - self._finished_deferred = threads.deferToThread(self._writer) + self._finished_deferred = preserve_fn(threads.deferToThread)(self._writer) if not streaming: self._producer.resumeProducing() From 4a53f3a3e8fb7fe9df052790858ca3f7dbff78f9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jan 2018 10:52:32 +0000 Subject: [PATCH 091/348] Ensure media is in local cache before thumbnailing --- synapse/rest/media/v1/media_repository.py | 20 ++++++++------- synapse/rest/media/v1/media_storage.py | 27 +++++++++++++++++++++ synapse/rest/media/v1/thumbnail_resource.py | 3 ++- 3 files changed, 40 insertions(+), 10 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 97c82c150e..578d7f07c5 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -445,8 +445,10 @@ class MediaRepository(object): @defer.inlineCallbacks def generate_local_exact_thumbnail(self, media_id, t_width, t_height, - t_method, t_type): - input_path = self.filepaths.local_media_filepath(media_id) + t_method, t_type, url_cache): + input_path = yield self.media_storage.ensure_media_is_in_local_cache(FileInfo( + None, media_id, url_cache=url_cache, + )) thumbnailer = Thumbnailer(input_path) t_byte_source = yield make_deferred_yieldable(threads.deferToThread( @@ -459,6 +461,7 @@ class MediaRepository(object): file_info = FileInfo( server_name=None, file_id=media_id, + url_cache=url_cache, thumbnail=True, thumbnail_width=t_width, thumbnail_height=t_height, @@ -485,7 +488,9 @@ class MediaRepository(object): @defer.inlineCallbacks def generate_remote_exact_thumbnail(self, server_name, file_id, media_id, t_width, t_height, t_method, t_type): - input_path = self.filepaths.remote_media_filepath(server_name, file_id) + input_path = yield self.media_storage.ensure_media_is_in_local_cache(FileInfo( + server_name, file_id, url_cache=False, + )) thumbnailer = Thumbnailer(input_path) t_byte_source = yield make_deferred_yieldable(threads.deferToThread( @@ -543,12 +548,9 @@ class MediaRepository(object): if not requirements: return - if server_name: - input_path = self.filepaths.remote_media_filepath(server_name, file_id) - elif url_cache: - input_path = self.filepaths.url_cache_filepath(media_id) - else: - input_path = self.filepaths.local_media_filepath(media_id) + input_path = yield self.media_storage.ensure_media_is_in_local_cache(FileInfo( + server_name, file_id, url_cache=url_cache, + )) thumbnailer = Thumbnailer(input_path) m_width = thumbnailer.width diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py index 001e84578e..d3f54594a2 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py @@ -15,6 +15,7 @@ from twisted.internet import defer, threads from twisted.protocols.basic import FileSender +from twisted.protocols.ftp import FileConsumer # This isn't FTP specific from ._base import Responder @@ -151,6 +152,32 @@ class MediaStorage(object): defer.returnValue(None) + @defer.inlineCallbacks + def ensure_media_is_in_local_cache(self, file_info): + """Ensures that the given file is in the local cache. Attempts to + download it from storage providers if it isn't. + + Args: + file_info (FileInfo) + + Returns: + Deferred[str]: Full path to local file + """ + path = self._file_info_to_path(file_info) + local_path = os.path.join(self.local_media_directory, path) + if os.path.exists(local_path): + defer.returnValue(local_path) + + for provider in self.storage_providers: + res = yield provider.fetch(path, file_info) + if res: + with res: + with open(local_path, "w") as f: + res.write_to_consumer(FileConsumer(f)) + defer.returnValue(local_path) + + raise Exception("file could not be found") + def _file_info_to_path(self, file_info): """Converts file_info into a relative path. diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index 8c9653843b..49e4af514c 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -162,7 +162,8 @@ class ThumbnailResource(Resource): # Okay, so we generate one. file_path = yield self.media_repo.generate_local_exact_thumbnail( - media_id, desired_width, desired_height, desired_method, desired_type + media_id, desired_width, desired_height, desired_method, desired_type, + url_cache=media_info["url_cache"], ) if file_path: From 2cf6a7bc2068350d0701b156deb2bb3a6f0da88a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 17 Jan 2018 16:56:23 +0000 Subject: [PATCH 092/348] Use better file consumer --- synapse/rest/media/v1/media_storage.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py index d3f54594a2..5c3e4e5a65 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py @@ -15,10 +15,10 @@ from twisted.internet import defer, threads from twisted.protocols.basic import FileSender -from twisted.protocols.ftp import FileConsumer # This isn't FTP specific from ._base import Responder +from synapse.util.file_consumer import BackgroundFileConsumer from synapse.util.logcontext import make_deferred_yieldable import contextlib @@ -27,6 +27,7 @@ import logging import shutil import sys + logger = logging.getLogger(__name__) @@ -168,12 +169,17 @@ class MediaStorage(object): if os.path.exists(local_path): defer.returnValue(local_path) + dirname = os.path.dirname(local_path) + if not os.path.exists(dirname): + os.makedirs(dirname) + for provider in self.storage_providers: res = yield provider.fetch(path, file_info) if res: with res: - with open(local_path, "w") as f: - res.write_to_consumer(FileConsumer(f)) + consumer = BackgroundFileConsumer(open(local_path, "w")) + yield res.write_to_consumer(consumer) + yield consumer.wait() defer.returnValue(local_path) raise Exception("file could not be found") @@ -247,9 +253,8 @@ class FileResponder(Responder): def __init__(self, open_file): self.open_file = open_file - @defer.inlineCallbacks def write_to_consumer(self, consumer): - yield FileSender().beginFileTransfer(self.open_file, consumer) + return FileSender().beginFileTransfer(self.open_file, consumer) def __exit__(self, exc_type, exc_val, exc_tb): self.open_file.close() From d57765fc8a1b54dae001bbb97b2b529991292fbc Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 18 Jan 2018 12:23:04 +0000 Subject: [PATCH 093/348] Fix bugs in block metrics ... which I introduced in #2785 --- synapse/util/metrics.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py index 059bb7fedf..e4b5687a4b 100644 --- a/synapse/util/metrics.py +++ b/synapse/util/metrics.py @@ -28,7 +28,7 @@ logger = logging.getLogger(__name__) metrics = synapse.metrics.get_metrics_for(__name__) # total number of times we have hit this block -response_count = metrics.register_counter( +block_counter = metrics.register_counter( "block_count", labels=["block_name"], alternative_names=( @@ -76,7 +76,7 @@ block_db_txn_count = metrics.register_counter( block_db_txn_duration = metrics.register_counter( "block_db_txn_duration_seconds", labels=["block_name"], alternative_names=( - metrics.name_prefix + "_block_db_txn_count:total", + metrics.name_prefix + "_block_db_txn_duration:total", ), ) @@ -131,6 +131,8 @@ class Measure(object): return duration = self.clock.time_msec() - self.start + + block_counter.inc(self.name) block_timer.inc_by(duration, self.name) context = LoggingContext.current_context() From 0af5dc63a8d180a2b610c14ce415fdf9be96e2ff Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Jan 2018 15:44:08 +0000 Subject: [PATCH 094/348] Make storage providers more configurable --- synapse/config/repository.py | 83 ++++++++++++++++++++--- synapse/rest/media/v1/media_repository.py | 18 ++--- synapse/rest/media/v1/storage_provider.py | 28 +++++--- 3 files changed, 98 insertions(+), 31 deletions(-) diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 6baa474931..81db0193fb 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -16,6 +16,8 @@ from ._base import Config, ConfigError from collections import namedtuple +from synapse.util.module_loader import load_module + MISSING_NETADDR = ( "Missing netaddr library. This is required for URL preview API." @@ -36,6 +38,10 @@ ThumbnailRequirement = namedtuple( "ThumbnailRequirement", ["width", "height", "method", "media_type"] ) +MediaStorageProviderConfig = namedtuple( + "MediaStorageProviderConfig", ("store_local", "store_remote", "store_synchronous",) +) + def parse_thumbnail_requirements(thumbnail_sizes): """ Takes a list of dictionaries with "width", "height", and "method" keys @@ -73,16 +79,65 @@ class ContentRepositoryConfig(Config): self.media_store_path = self.ensure_directory(config["media_store_path"]) - self.backup_media_store_path = config.get("backup_media_store_path") - if self.backup_media_store_path: - self.backup_media_store_path = self.ensure_directory( + backup_media_store_path = config.get("backup_media_store_path") + if backup_media_store_path: + backup_media_store_path = self.ensure_directory( self.backup_media_store_path ) - self.synchronous_backup_media_store = config.get( + synchronous_backup_media_store = config.get( "synchronous_backup_media_store", False ) + storage_providers = config.get("media_storage_providers", []) + + if backup_media_store_path: + if storage_providers: + raise ConfigError( + "Cannot use both 'backup_media_store_path' and 'storage_providers'" + ) + + storage_providers = [{ + "module": "file_system", + "store_local": True, + "store_synchronous": synchronous_backup_media_store, + "store_remote": True, + "config": { + "directory": backup_media_store_path, + } + }] + + # This is a list of config that can be used to create the storage + # providers. The entries are tuples of (Class, class_config, + # MediaStorageProviderConfig), where Class is the class of the provider, + # the class_config the config to pass to it, and + # MediaStorageProviderConfig are options for StorageProviderWrapper. + # + # We don't create the storage providers here as not all workers need + # them to be started. + self.media_storage_providers = [] + + for provider_config in storage_providers: + # We special case the module "file_system" so as not to need to + # expose FileStorageProviderBackend + if provider_config["module"] == "file_system": + provider_config["module"] = ( + "synapse.rest.media.v1.storage_provider" + ".FileStorageProviderBackend" + ) + + provider_class, parsed_config = load_module(provider_config) + + wrapper_config = MediaStorageProviderConfig( + provider_config.get("store_local", False), + provider_config.get("store_remote", False), + provider_config.get("store_synchronous", False), + ) + + self.media_storage_providers.append( + (provider_class, provider_config, wrapper_config,) + ) + self.uploads_path = self.ensure_directory(config["uploads_path"]) self.dynamic_thumbnails = config["dynamic_thumbnails"] self.thumbnail_requirements = parse_thumbnail_requirements( @@ -127,13 +182,19 @@ class ContentRepositoryConfig(Config): # Directory where uploaded images and attachments are stored. media_store_path: "%(media_store)s" - # A secondary directory where uploaded images and attachments are - # stored as a backup. - # backup_media_store_path: "%(media_store)s" - - # Whether to wait for successful write to backup media store before - # returning successfully. - # synchronous_backup_media_store: false + # Media storage providers allow media to be stored in different + # locations. + # media_storage_providers: + # - module: file_system + # # Whether to write new local files. + # store_local: false + # # Whether to write new remote media + # store_remote: false + # # Whether to block upload requests waiting for write to this + # # provider to complete + # store_synchronous: false + # config: + # directory: /mnt/some/other/directory # Directory where in-progress uploads are stored. uploads_path: "%(uploads_path)s" diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 97c82c150e..4163c9e416 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -27,9 +27,7 @@ from .identicon_resource import IdenticonResource from .preview_url_resource import PreviewUrlResource from .filepath import MediaFilePaths from .thumbnailer import Thumbnailer -from .storage_provider import ( - StorageProviderWrapper, FileStorageProviderBackend, -) +from .storage_provider import StorageProviderWrapper from .media_storage import MediaStorage from synapse.http.matrixfederationclient import MatrixFederationHttpClient @@ -80,17 +78,13 @@ class MediaRepository(object): # potentially upload to. storage_providers = [] - # TODO: Move this into config and allow other storage providers to be - # defined. - if hs.config.backup_media_store_path: - backend = FileStorageProviderBackend( - self.primary_base_path, hs.config.backup_media_store_path, - ) + for clz, provider_config, wrapper_config in hs.config.media_storage_providers: + backend = clz(hs, provider_config) provider = StorageProviderWrapper( backend, - store=True, - store_synchronous=hs.config.synchronous_backup_media_store, - store_remote=True, + store_local=wrapper_config.store_local, + store_remote=wrapper_config.store_remote, + store_synchronous=wrapper_config.store_synchronous, ) storage_providers.append(provider) diff --git a/synapse/rest/media/v1/storage_provider.py b/synapse/rest/media/v1/storage_provider.py index 2ad602e101..0074d2d426 100644 --- a/synapse/rest/media/v1/storage_provider.py +++ b/synapse/rest/media/v1/storage_provider.py @@ -17,6 +17,7 @@ from twisted.internet import defer, threads from .media_storage import FileResponder +from synapse.config._base import Config from synapse.util.logcontext import preserve_fn import logging @@ -64,14 +65,14 @@ class StorageProviderWrapper(StorageProvider): Args: backend (StorageProvider) - store (bool): Whether to store new files or not. + store_local (bool): Whether to store new local files or not. store_synchronous (bool): Whether to wait for file to be successfully uploaded, or todo the upload in the backgroud. store_remote (bool): Whether remote media should be uploaded """ - def __init__(self, backend, store, store_synchronous, store_remote): + def __init__(self, backend, store_local, store_synchronous, store_remote): self.backend = backend - self.store = store + self.store_local = store_local self.store_synchronous = store_synchronous self.store_remote = store_remote @@ -97,13 +98,13 @@ class FileStorageProviderBackend(StorageProvider): """A storage provider that stores files in a directory on a filesystem. Args: - cache_directory (str): Base path of the local media repository - base_directory (str): Base path to store new files + hs (HomeServer) + config: The config returned by `parse_config`, i """ - def __init__(self, cache_directory, base_directory): - self.cache_directory = cache_directory - self.base_directory = base_directory + def __init__(self, hs, config): + self.cache_directory = hs.config.media_store_path + self.base_directory = config def store_file(self, path, file_info): """See StorageProvider.store_file""" @@ -125,3 +126,14 @@ class FileStorageProviderBackend(StorageProvider): backup_fname = os.path.join(self.base_directory, path) if os.path.isfile(backup_fname): return FileResponder(open(backup_fname, "rb")) + + def parse_config(config): + """Called on startup to parse config supplied. This should parse + the config and raise if there is a problem. + + The returned value is passed into the constructor. + + In this case we only care about a single param, the directory, so lets + just pull that out. + """ + return Config.ensure_directory(config["directory"]) From 9a89dae8c53140c74f968538f72a4045b6aca90d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Jan 2018 15:06:24 +0000 Subject: [PATCH 095/348] Fix typo in thumbnail resource causing access times to be incorrect --- synapse/rest/media/v1/thumbnail_resource.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/rest/media/v1/thumbnail_resource.py b/synapse/rest/media/v1/thumbnail_resource.py index c09f2dec41..12e84a2b7c 100644 --- a/synapse/rest/media/v1/thumbnail_resource.py +++ b/synapse/rest/media/v1/thumbnail_resource.py @@ -67,7 +67,7 @@ class ThumbnailResource(Resource): yield self._respond_local_thumbnail( request, media_id, width, height, method, m_type ) - self.media_repo.mark_recently_accessed(server_name, media_id) + self.media_repo.mark_recently_accessed(None, media_id) else: if self.dynamic_thumbnails: yield self._select_or_generate_remote_thumbnail( @@ -79,7 +79,7 @@ class ThumbnailResource(Resource): request, server_name, media_id, width, height, method, m_type ) - self.media_repo.mark_recently_accessed(None, media_id) + self.media_repo.mark_recently_accessed(server_name, media_id) @defer.inlineCallbacks def _respond_local_thumbnail(self, request, media_id, width, height, From ce4f66133e6d426dfd3323cef164703b2b79f6b8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 17 Jan 2018 16:56:35 +0000 Subject: [PATCH 096/348] Add unit tests --- tests/rest/media/__init__.py | 14 ++++ tests/rest/media/v1/__init__.py | 14 ++++ tests/rest/media/v1/test_media_storage.py | 81 +++++++++++++++++++++++ 3 files changed, 109 insertions(+) create mode 100644 tests/rest/media/__init__.py create mode 100644 tests/rest/media/v1/__init__.py create mode 100644 tests/rest/media/v1/test_media_storage.py diff --git a/tests/rest/media/__init__.py b/tests/rest/media/__init__.py new file mode 100644 index 0000000000..a354d38ca8 --- /dev/null +++ b/tests/rest/media/__init__.py @@ -0,0 +1,14 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/rest/media/v1/__init__.py b/tests/rest/media/v1/__init__.py new file mode 100644 index 0000000000..a354d38ca8 --- /dev/null +++ b/tests/rest/media/v1/__init__.py @@ -0,0 +1,14 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/rest/media/v1/test_media_storage.py b/tests/rest/media/v1/test_media_storage.py new file mode 100644 index 0000000000..c4de181579 --- /dev/null +++ b/tests/rest/media/v1/test_media_storage.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from twisted.internet import defer + +from synapse.rest.media.v1._base import FileInfo +from synapse.rest.media.v1.media_storage import MediaStorage +from synapse.rest.media.v1.filepath import MediaFilePaths +from synapse.rest.media.v1.storage_provider import FileStorageProviderBackend + +from tests import unittest + +import os +import shutil +import tempfile + + +class MediaStorageTests(unittest.TestCase): + def setUp(self): + self.test_dir = tempfile.mkdtemp(prefix="synapse-tests-") + + self.primary_base_path = os.path.join(self.test_dir, "primary") + self.secondary_base_path = os.path.join(self.test_dir, "secondary") + + storage_providers = [FileStorageProviderBackend( + self.primary_base_path, self.secondary_base_path + )] + + self.filepaths = MediaFilePaths(self.primary_base_path) + self.media_storage = MediaStorage( + self.primary_base_path, self.filepaths, storage_providers, + ) + + def tearDown(self): + shutil.rmtree(self.test_dir) + + @defer.inlineCallbacks + def test_ensure_media_is_in_local_cache(self): + media_id = "some_media_id" + test_body = "Test\n" + + # First we create a file that is in a storage provider but not in the + # local primary media store + rel_path = self.filepaths.local_media_filepath_rel(media_id) + secondary_path = os.path.join(self.secondary_base_path, rel_path) + + os.makedirs(os.path.dirname(secondary_path)) + + with open(secondary_path, "w") as f: + f.write(test_body) + + # Now we run ensure_media_is_in_local_cache, which should copy the file + # to the local cache. + file_info = FileInfo(None, media_id) + local_path = yield self.media_storage.ensure_media_is_in_local_cache(file_info) + + self.assertTrue(os.path.exists(local_path)) + + # Asserts the file is under the expected local cache directory + self.assertEquals( + os.path.commonprefix([self.primary_base_path, local_path]), + self.primary_base_path, + ) + + with open(local_path) as f: + body = f.read() + + self.assertEqual(test_body, body) From aae77da73ffc89c31d0b17fa8ce5d8b58605de63 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Jan 2018 17:11:20 +0000 Subject: [PATCH 097/348] Fixup comments --- synapse/config/repository.py | 6 +++++- synapse/rest/media/v1/storage_provider.py | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 81db0193fb..8bbc16ba40 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -39,7 +39,11 @@ ThumbnailRequirement = namedtuple( ) MediaStorageProviderConfig = namedtuple( - "MediaStorageProviderConfig", ("store_local", "store_remote", "store_synchronous",) + "MediaStorageProviderConfig", ( + "store_local", # Whether to store newly uploaded local files + "store_remote", # Whether to store newly downloaded remote files + "store_synchronous", # Whether to wait for successful storage for local uploads + ), ) diff --git a/synapse/rest/media/v1/storage_provider.py b/synapse/rest/media/v1/storage_provider.py index 0074d2d426..9bf88f01f9 100644 --- a/synapse/rest/media/v1/storage_provider.py +++ b/synapse/rest/media/v1/storage_provider.py @@ -99,7 +99,7 @@ class FileStorageProviderBackend(StorageProvider): Args: hs (HomeServer) - config: The config returned by `parse_config`, i + config: The config returned by `parse_config`. """ def __init__(self, hs, config): @@ -133,7 +133,7 @@ class FileStorageProviderBackend(StorageProvider): The returned value is passed into the constructor. - In this case we only care about a single param, the directory, so lets + In this case we only care about a single param, the directory, so let's just pull that out. """ return Config.ensure_directory(config["directory"]) From 3fe2bae857cda58055c32329e15d3fe9828cf8f8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Jan 2018 17:11:45 +0000 Subject: [PATCH 098/348] Missing staticmethod --- synapse/rest/media/v1/storage_provider.py | 1 + 1 file changed, 1 insertion(+) diff --git a/synapse/rest/media/v1/storage_provider.py b/synapse/rest/media/v1/storage_provider.py index 9bf88f01f9..0a84aba861 100644 --- a/synapse/rest/media/v1/storage_provider.py +++ b/synapse/rest/media/v1/storage_provider.py @@ -127,6 +127,7 @@ class FileStorageProviderBackend(StorageProvider): if os.path.isfile(backup_fname): return FileResponder(open(backup_fname, "rb")) + @staticmethod def parse_config(config): """Called on startup to parse config supplied. This should parse the config and raise if there is a problem. From 8e85220373ee7a0396f36ffd3fddab8b1d6a7a12 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Jan 2018 17:12:35 +0000 Subject: [PATCH 099/348] Remove duplicate directory test --- synapse/config/repository.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 8bbc16ba40..364e823cd3 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -84,10 +84,6 @@ class ContentRepositoryConfig(Config): self.media_store_path = self.ensure_directory(config["media_store_path"]) backup_media_store_path = config.get("backup_media_store_path") - if backup_media_store_path: - backup_media_store_path = self.ensure_directory( - self.backup_media_store_path - ) synchronous_backup_media_store = config.get( "synchronous_backup_media_store", False From d69768348fc053dd6e243479acceabbdd6167238 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Jan 2018 17:14:05 +0000 Subject: [PATCH 100/348] Fix passing wrong config to provider constructor --- synapse/config/repository.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 364e823cd3..25ea77738a 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -135,7 +135,7 @@ class ContentRepositoryConfig(Config): ) self.media_storage_providers.append( - (provider_class, provider_config, wrapper_config,) + (provider_class, parsed_config, wrapper_config,) ) self.uploads_path = self.ensure_directory(config["uploads_path"]) From cd871a305708bca1acb5289f93cb62c15ba438da Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 18 Jan 2018 18:37:59 +0000 Subject: [PATCH 101/348] Fix storage provider bug introduced when renamed to store_local --- synapse/rest/media/v1/storage_provider.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/rest/media/v1/storage_provider.py b/synapse/rest/media/v1/storage_provider.py index 0a84aba861..c188192f2b 100644 --- a/synapse/rest/media/v1/storage_provider.py +++ b/synapse/rest/media/v1/storage_provider.py @@ -77,7 +77,7 @@ class StorageProviderWrapper(StorageProvider): self.store_remote = store_remote def store_file(self, path, file_info): - if not self.store: + if not file_info.server_name and not self.store_local: return defer.succeed(None) if file_info.server_name and not self.store_remote: From 28a6ccb49c57cc686761b9e674b501b3b402e616 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 19 Jan 2018 00:19:58 +0000 Subject: [PATCH 102/348] add registrations_require_3pid lets homeservers specify a whitelist for 3PIDs that users are allowed to associate with. Typically useful for stopping people from registering with non-work emails --- synapse/api/errors.py | 1 + synapse/config/registration.py | 13 +++++ synapse/rest/client/v2_alpha/_base.py | 22 +++++++ synapse/rest/client/v2_alpha/account.py | 14 ++++- synapse/rest/client/v2_alpha/register.py | 73 ++++++++++++++++++++---- 5 files changed, 110 insertions(+), 13 deletions(-) diff --git a/synapse/api/errors.py b/synapse/api/errors.py index 79b35b3e7c..46b0d7b34c 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -46,6 +46,7 @@ class Codes(object): THREEPID_AUTH_FAILED = "M_THREEPID_AUTH_FAILED" THREEPID_IN_USE = "M_THREEPID_IN_USE" THREEPID_NOT_FOUND = "M_THREEPID_NOT_FOUND" + THREEPID_DENIED = "M_THREEPID_DENIED" INVALID_USERNAME = "M_INVALID_USERNAME" SERVER_NOT_TRUSTED = "M_SERVER_NOT_TRUSTED" diff --git a/synapse/config/registration.py b/synapse/config/registration.py index ef917fc9f2..e5e4f77872 100644 --- a/synapse/config/registration.py +++ b/synapse/config/registration.py @@ -31,6 +31,7 @@ class RegistrationConfig(Config): strtobool(str(config["disable_registration"])) ) + self.registrations_require_3pid = config.get("registrations_require_3pid", []) self.registration_shared_secret = config.get("registration_shared_secret") self.bcrypt_rounds = config.get("bcrypt_rounds", 12) @@ -52,6 +53,18 @@ class RegistrationConfig(Config): # Enable registration for new users. enable_registration: False + # Mandate that registrations require a 3PID which matches one or more + # of these 3PIDs. N.B. regexp escape backslashes are doubled (once for + # YAML and once for the regexp itself) + # + # registrations_require_3pid: + # - medium: email + # pattern: ".*@matrix\\.org" + # - medium: email + # pattern: ".*@vector\\.im" + # - medium: msisdn + # pattern: "\\+44" + # If set, allows registration by anyone who also has the shared # secret, even if registration is otherwise disabled. registration_shared_secret: "%(registration_shared_secret)s" diff --git a/synapse/rest/client/v2_alpha/_base.py b/synapse/rest/client/v2_alpha/_base.py index 77434937ff..7c46ef7cab 100644 --- a/synapse/rest/client/v2_alpha/_base.py +++ b/synapse/rest/client/v2_alpha/_base.py @@ -60,6 +60,28 @@ def set_timeline_upper_limit(filter_json, filter_timeline_limit): filter_timeline_limit) +def check_3pid_allowed(hs, medium, address): + # check whether the HS has whitelisted the given 3PID + + allow = False + if hs.config.registrations_require_3pid: + for constraint in hs.config.registrations_require_3pid: + logger.debug("Checking 3PID %s (%s) against %s (%s)" % ( + address, medium, constraint['pattern'], constraint['medium'] + ) + ) + if ( + medium == constraint['medium'] and + re.match(constraint['pattern'], address) + ): + allow = True + break + else: + allow = True + + return allow + + def interactive_auth_handler(orig): """Wraps an on_POST method to handle InteractiveAuthIncompleteErrors diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py index 385a3ad2ec..66221e8f00 100644 --- a/synapse/rest/client/v2_alpha/account.py +++ b/synapse/rest/client/v2_alpha/account.py @@ -26,7 +26,7 @@ from synapse.http.servlet import ( ) from synapse.util.async import run_on_reactor from synapse.util.msisdn import phone_number_to_msisdn -from ._base import client_v2_patterns, interactive_auth_handler +from ._base import client_v2_patterns, interactive_auth_handler, check_3pid_allowed logger = logging.getLogger(__name__) @@ -47,6 +47,9 @@ class EmailPasswordRequestTokenRestServlet(RestServlet): 'id_server', 'client_secret', 'email', 'send_attempt' ]) + if not check_3pid_allowed(self.hs, "email", body['email']): + raise SynapseError(403, "3PID denied", Codes.THREEPID_DENIED) + existingUid = yield self.hs.get_datastore().get_user_id_by_threepid( 'email', body['email'] ) @@ -78,6 +81,9 @@ class MsisdnPasswordRequestTokenRestServlet(RestServlet): msisdn = phone_number_to_msisdn(body['country'], body['phone_number']) + if not check_3pid_allowed(self.hs, "msisdn", msisdn): + raise SynapseError(403, "3PID denied", Codes.THREEPID_DENIED) + existingUid = yield self.datastore.get_user_id_by_threepid( 'msisdn', msisdn ) @@ -217,6 +223,9 @@ class EmailThreepidRequestTokenRestServlet(RestServlet): if absent: raise SynapseError(400, "Missing params: %r" % absent, Codes.MISSING_PARAM) + if not check_3pid_allowed(self.hs, "email", body['email']): + raise SynapseError(403, "3PID denied", Codes.THREEPID_DENIED) + existingUid = yield self.datastore.get_user_id_by_threepid( 'email', body['email'] ) @@ -255,6 +264,9 @@ class MsisdnThreepidRequestTokenRestServlet(RestServlet): msisdn = phone_number_to_msisdn(body['country'], body['phone_number']) + if not check_3pid_allowed(self.hs, "msisdn", msisdn): + raise SynapseError(403, "3PID denied", Codes.THREEPID_DENIED) + existingUid = yield self.datastore.get_user_id_by_threepid( 'msisdn', msisdn ) diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index e9d88a8895..762782c1f0 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -27,9 +27,10 @@ from synapse.http.servlet import ( ) from synapse.util.msisdn import phone_number_to_msisdn -from ._base import client_v2_patterns, interactive_auth_handler +from ._base import client_v2_patterns, interactive_auth_handler, check_3pid_allowed import logging +import re import hmac from hashlib import sha1 from synapse.util.async import run_on_reactor @@ -70,6 +71,9 @@ class EmailRegisterRequestTokenRestServlet(RestServlet): 'id_server', 'client_secret', 'email', 'send_attempt' ]) + if not check_3pid_allowed(self.hs, "email", body['email']): + raise SynapseError(403, "3PID denied", Codes.THREEPID_DENIED) + existingUid = yield self.hs.get_datastore().get_user_id_by_threepid( 'email', body['email'] ) @@ -105,6 +109,9 @@ class MsisdnRegisterRequestTokenRestServlet(RestServlet): msisdn = phone_number_to_msisdn(body['country'], body['phone_number']) + if not check_3pid_allowed(self.hs, "msisdn", msisdn): + raise SynapseError(403, "3PID denied", Codes.THREEPID_DENIED) + existingUid = yield self.hs.get_datastore().get_user_id_by_threepid( 'msisdn', msisdn ) @@ -305,31 +312,73 @@ class RegisterRestServlet(RestServlet): if 'x_show_msisdn' in body and body['x_show_msisdn']: show_msisdn = True + require_email = False + require_msisdn = False + for constraint in self.hs.config.registrations_require_3pid: + if constraint['medium'] == 'email': + require_email = True + elif constraint['medium'] == 'msisdn': + require_msisdn = True + else: + logger.warn( + "Unrecognised 3PID medium %s in registrations_require_3pid" % + constraint['medium'] + ) + + flows = [] if self.hs.config.enable_registration_captcha: - flows = [ - [LoginType.RECAPTCHA], - [LoginType.EMAIL_IDENTITY, LoginType.RECAPTCHA], - ] + if not require_email and not require_msisdn: + flows.extend([[LoginType.RECAPTCHA]]) + if require_email or not require_msisdn: + flows.extend([[LoginType.EMAIL_IDENTITY, LoginType.RECAPTCHA]]) + if show_msisdn: + if not require_email or require_msisdn: + flows.extend([[LoginType.MSISDN, LoginType.RECAPTCHA]]) flows.extend([ - [LoginType.MSISDN, LoginType.RECAPTCHA], [LoginType.MSISDN, LoginType.EMAIL_IDENTITY, LoginType.RECAPTCHA], ]) else: - flows = [ - [LoginType.DUMMY], - [LoginType.EMAIL_IDENTITY], - ] + if not require_email and not require_msisdn: + flows.extend([[LoginType.DUMMY]]) + if require_email or not require_msisdn: + flows.extend([[LoginType.EMAIL_IDENTITY]]) + if show_msisdn: + if not require_email or require_msisdn: + flows.extend([[LoginType.MSISDN]]) flows.extend([ - [LoginType.MSISDN], - [LoginType.MSISDN, LoginType.EMAIL_IDENTITY], + [LoginType.MSISDN, LoginType.EMAIL_IDENTITY] ]) auth_result, params, session_id = yield self.auth_handler.check_auth( flows, body, self.hs.get_ip_from_request(request) ) + # doublecheck that we're not trying to register an denied 3pid. + # the user-facing checks should already have happened when we requested + # a 3PID token to validate them in /register/email/requestToken etc + + for constraint in self.hs.config.registrations_require_3pid: + if ( + constraint['medium'] == 'email' and + auth_result and LoginType.EMAIL_IDENTITY in auth_result and + re.match( + constraint['pattern'], + auth_result[LoginType.EMAIL_IDENTITY].threepid.address + ) + ): + raise SynapseError(403, "3PID denied", Codes.THREEPID_DENIED) + elif ( + constraint['medium'] == 'msisdn' and + auth_result and LoginType.MSISDN in auth_result and + re.match( + constraint['pattern'], + auth_result[LoginType.MSISDN].threepid.address + ) + ): + raise SynapseError(403, "3PID denied", Codes.THREEPID_DENIED) + if registered_user_id is not None: logger.info( "Already registered user ID %r for this session", From 81d037dbd8c6616b33339f198f70134f73bbff5f Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 19 Jan 2018 00:28:08 +0000 Subject: [PATCH 103/348] mock registrations_require_3pid --- tests/rest/client/v2_alpha/test_register.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/rest/client/v2_alpha/test_register.py b/tests/rest/client/v2_alpha/test_register.py index 096f771bea..8aba456510 100644 --- a/tests/rest/client/v2_alpha/test_register.py +++ b/tests/rest/client/v2_alpha/test_register.py @@ -49,6 +49,7 @@ class RegisterRestServletTestCase(unittest.TestCase): self.hs.get_auth_handler = Mock(return_value=self.auth_handler) self.hs.get_device_handler = Mock(return_value=self.device_handler) self.hs.config.enable_registration = True + self.hs.config.registrations_require_3pid = [] self.hs.config.auto_join_rooms = [] # init the thing we're testing From 0af58f14ee351e7d52d7139df9218ff692764f20 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 19 Jan 2018 00:33:51 +0000 Subject: [PATCH 104/348] fix pep8 --- synapse/rest/client/v2_alpha/_base.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/synapse/rest/client/v2_alpha/_base.py b/synapse/rest/client/v2_alpha/_base.py index 7c46ef7cab..b286ff0d95 100644 --- a/synapse/rest/client/v2_alpha/_base.py +++ b/synapse/rest/client/v2_alpha/_base.py @@ -68,8 +68,7 @@ def check_3pid_allowed(hs, medium, address): for constraint in hs.config.registrations_require_3pid: logger.debug("Checking 3PID %s (%s) against %s (%s)" % ( address, medium, constraint['pattern'], constraint['medium'] - ) - ) + )) if ( medium == constraint['medium'] and re.match(constraint['pattern'], address) From 9d332e0f797e4f302a08b3708df4ac8b42b08216 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 19 Jan 2018 00:53:58 +0000 Subject: [PATCH 105/348] fix up v1, and improve errors --- synapse/handlers/register.py | 13 +++++++- synapse/rest/client/v1/register.py | 40 +++++++++++++++++------- synapse/rest/client/v2_alpha/account.py | 16 +++++++--- synapse/rest/client/v2_alpha/register.py | 16 +++++++--- 4 files changed, 65 insertions(+), 20 deletions(-) diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 5b808beac1..157ebaf251 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -15,6 +15,7 @@ """Contains functions for registering clients.""" import logging +import re from twisted.internet import defer @@ -293,7 +294,7 @@ class RegistrationHandler(BaseHandler): """ for c in threepidCreds: - logger.info("validating theeepidcred sid %s on id server %s", + logger.info("validating threepidcred sid %s on id server %s", c['sid'], c['idServer']) try: identity_handler = self.hs.get_handlers().identity_handler @@ -307,6 +308,16 @@ class RegistrationHandler(BaseHandler): logger.info("got threepid with medium '%s' and address '%s'", threepid['medium'], threepid['address']) + for constraint in self.hs.config.registrations_require_3pid: + if ( + constraint['medium'] == 'email' and + threepid['medium'] == 'email' and + re.match(constraint['pattern'], threepid['address']) + ): + raise RegistrationError( + 403, "Third party identifier is not allowed" + ) + @defer.inlineCallbacks def bind_emails(self, user_id, threepidCreds): """Links emails with a user ID and informs an identity server. diff --git a/synapse/rest/client/v1/register.py b/synapse/rest/client/v1/register.py index 32ed1d3ab2..f793542ad6 100644 --- a/synapse/rest/client/v1/register.py +++ b/synapse/rest/client/v1/register.py @@ -70,10 +70,24 @@ class RegisterRestServlet(ClientV1RestServlet): self.handlers = hs.get_handlers() def on_GET(self, request): + + require_email = False + require_msisdn = False + for constraint in self.hs.config.registrations_require_3pid: + if constraint['medium'] == 'email': + require_email = True + elif constraint['medium'] == 'msisdn': + require_msisdn = True + else: + logger.warn( + "Unrecognised 3PID medium %s in registrations_require_3pid" % + constraint['medium'] + ) + + flows = [] if self.hs.config.enable_registration_captcha: - return ( - 200, - {"flows": [ + if require_email or not require_msisdn: + flows.extend([ { "type": LoginType.RECAPTCHA, "stages": [ @@ -82,27 +96,31 @@ class RegisterRestServlet(ClientV1RestServlet): LoginType.PASSWORD ] }, + ]) + if not require_email and not require_msisdn: + flows.extend([ { "type": LoginType.RECAPTCHA, "stages": [LoginType.RECAPTCHA, LoginType.PASSWORD] } - ]} - ) + ]) else: - return ( - 200, - {"flows": [ + if require_email or not require_msisdn: + flows.extend([ { "type": LoginType.EMAIL_IDENTITY, "stages": [ LoginType.EMAIL_IDENTITY, LoginType.PASSWORD ] - }, + } + ]) + if not require_email and not require_msisdn: + flows.extend([ { "type": LoginType.PASSWORD } - ]} - ) + ]) + return (200, {"flows": flows}) @defer.inlineCallbacks def on_POST(self, request): diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py index 66221e8f00..2977ad439f 100644 --- a/synapse/rest/client/v2_alpha/account.py +++ b/synapse/rest/client/v2_alpha/account.py @@ -48,7 +48,9 @@ class EmailPasswordRequestTokenRestServlet(RestServlet): ]) if not check_3pid_allowed(self.hs, "email", body['email']): - raise SynapseError(403, "3PID denied", Codes.THREEPID_DENIED) + raise SynapseError( + 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED + ) existingUid = yield self.hs.get_datastore().get_user_id_by_threepid( 'email', body['email'] @@ -82,7 +84,9 @@ class MsisdnPasswordRequestTokenRestServlet(RestServlet): msisdn = phone_number_to_msisdn(body['country'], body['phone_number']) if not check_3pid_allowed(self.hs, "msisdn", msisdn): - raise SynapseError(403, "3PID denied", Codes.THREEPID_DENIED) + raise SynapseError( + 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED + ) existingUid = yield self.datastore.get_user_id_by_threepid( 'msisdn', msisdn @@ -224,7 +228,9 @@ class EmailThreepidRequestTokenRestServlet(RestServlet): raise SynapseError(400, "Missing params: %r" % absent, Codes.MISSING_PARAM) if not check_3pid_allowed(self.hs, "email", body['email']): - raise SynapseError(403, "3PID denied", Codes.THREEPID_DENIED) + raise SynapseError( + 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED + ) existingUid = yield self.datastore.get_user_id_by_threepid( 'email', body['email'] @@ -265,7 +271,9 @@ class MsisdnThreepidRequestTokenRestServlet(RestServlet): msisdn = phone_number_to_msisdn(body['country'], body['phone_number']) if not check_3pid_allowed(self.hs, "msisdn", msisdn): - raise SynapseError(403, "3PID denied", Codes.THREEPID_DENIED) + raise SynapseError( + 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED + ) existingUid = yield self.datastore.get_user_id_by_threepid( 'msisdn', msisdn diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 762782c1f0..898d8b133a 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -72,7 +72,9 @@ class EmailRegisterRequestTokenRestServlet(RestServlet): ]) if not check_3pid_allowed(self.hs, "email", body['email']): - raise SynapseError(403, "3PID denied", Codes.THREEPID_DENIED) + raise SynapseError( + 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED + ) existingUid = yield self.hs.get_datastore().get_user_id_by_threepid( 'email', body['email'] @@ -110,7 +112,9 @@ class MsisdnRegisterRequestTokenRestServlet(RestServlet): msisdn = phone_number_to_msisdn(body['country'], body['phone_number']) if not check_3pid_allowed(self.hs, "msisdn", msisdn): - raise SynapseError(403, "3PID denied", Codes.THREEPID_DENIED) + raise SynapseError( + 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED + ) existingUid = yield self.hs.get_datastore().get_user_id_by_threepid( 'msisdn', msisdn @@ -368,7 +372,9 @@ class RegisterRestServlet(RestServlet): auth_result[LoginType.EMAIL_IDENTITY].threepid.address ) ): - raise SynapseError(403, "3PID denied", Codes.THREEPID_DENIED) + raise SynapseError( + 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED + ) elif ( constraint['medium'] == 'msisdn' and auth_result and LoginType.MSISDN in auth_result and @@ -377,7 +383,9 @@ class RegisterRestServlet(RestServlet): auth_result[LoginType.MSISDN].threepid.address ) ): - raise SynapseError(403, "3PID denied", Codes.THREEPID_DENIED) + raise SynapseError( + 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED + ) if registered_user_id is not None: logger.info( From 447f4f0d5f136dcadd5fdc286ded2d6e24a3f686 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 19 Jan 2018 15:33:55 +0000 Subject: [PATCH 106/348] rewrite based on PR feedback: * [ ] split config options into allowed_local_3pids and registrations_require_3pid * [ ] simplify and comment logic for picking registration flows * [ ] fix docstring and move check_3pid_allowed into a new util module * [ ] use check_3pid_allowed everywhere @erikjohnston PTAL --- synapse/config/registration.py | 12 +++- synapse/handlers/register.py | 15 ++--- synapse/rest/client/v1/register.py | 20 +++---- synapse/rest/client/v2_alpha/_base.py | 21 ------- synapse/rest/client/v2_alpha/account.py | 3 +- synapse/rest/client/v2_alpha/register.py | 73 +++++++++++------------- synapse/util/threepids.py | 45 +++++++++++++++ 7 files changed, 101 insertions(+), 88 deletions(-) create mode 100644 synapse/util/threepids.py diff --git a/synapse/config/registration.py b/synapse/config/registration.py index e5e4f77872..336959094b 100644 --- a/synapse/config/registration.py +++ b/synapse/config/registration.py @@ -32,6 +32,7 @@ class RegistrationConfig(Config): ) self.registrations_require_3pid = config.get("registrations_require_3pid", []) + self.allowed_local_3pids = config.get("allowed_local_3pids", []) self.registration_shared_secret = config.get("registration_shared_secret") self.bcrypt_rounds = config.get("bcrypt_rounds", 12) @@ -53,11 +54,16 @@ class RegistrationConfig(Config): # Enable registration for new users. enable_registration: False - # Mandate that registrations require a 3PID which matches one or more - # of these 3PIDs. N.B. regexp escape backslashes are doubled (once for - # YAML and once for the regexp itself) + # The user must provide all of the below types of 3PID when registering. # # registrations_require_3pid: + # - email + # - msisdn + + # Mandate that users are only allowed to associate certain formats of + # 3PIDs with accounts on this server. + # + # allowed_local_3pids: # - medium: email # pattern: ".*@matrix\\.org" # - medium: email diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 157ebaf251..9021d4d57f 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -15,7 +15,6 @@ """Contains functions for registering clients.""" import logging -import re from twisted.internet import defer @@ -26,6 +25,7 @@ from synapse.http.client import CaptchaServerHttpClient from synapse import types from synapse.types import UserID from synapse.util.async import run_on_reactor +from synapse.util.threepids import check_3pid_allowed from ._base import BaseHandler logger = logging.getLogger(__name__) @@ -308,15 +308,10 @@ class RegistrationHandler(BaseHandler): logger.info("got threepid with medium '%s' and address '%s'", threepid['medium'], threepid['address']) - for constraint in self.hs.config.registrations_require_3pid: - if ( - constraint['medium'] == 'email' and - threepid['medium'] == 'email' and - re.match(constraint['pattern'], threepid['address']) - ): - raise RegistrationError( - 403, "Third party identifier is not allowed" - ) + if not check_3pid_allowed(self.hs, threepid['medium'], threepid['address']): + raise RegistrationError( + 403, "Third party identifier is not allowed" + ) @defer.inlineCallbacks def bind_emails(self, user_id, threepidCreds): diff --git a/synapse/rest/client/v1/register.py b/synapse/rest/client/v1/register.py index f793542ad6..5c5fa8f7ab 100644 --- a/synapse/rest/client/v1/register.py +++ b/synapse/rest/client/v1/register.py @@ -71,22 +71,13 @@ class RegisterRestServlet(ClientV1RestServlet): def on_GET(self, request): - require_email = False - require_msisdn = False - for constraint in self.hs.config.registrations_require_3pid: - if constraint['medium'] == 'email': - require_email = True - elif constraint['medium'] == 'msisdn': - require_msisdn = True - else: - logger.warn( - "Unrecognised 3PID medium %s in registrations_require_3pid" % - constraint['medium'] - ) + require_email = 'email' in self.hs.config.registrations_require_3pid + require_msisdn = 'msisdn' in self.hs.config.registrations_require_3pid flows = [] if self.hs.config.enable_registration_captcha: - if require_email or not require_msisdn: + # only support the email-only flow if we don't require MSISDN 3PIDs + if not require_msisdn: flows.extend([ { "type": LoginType.RECAPTCHA, @@ -97,6 +88,7 @@ class RegisterRestServlet(ClientV1RestServlet): ] }, ]) + # only support 3PIDless registration if no 3PIDs are required if not require_email and not require_msisdn: flows.extend([ { @@ -105,6 +97,7 @@ class RegisterRestServlet(ClientV1RestServlet): } ]) else: + # only support the email-only flow if we don't require MSISDN 3PIDs if require_email or not require_msisdn: flows.extend([ { @@ -114,6 +107,7 @@ class RegisterRestServlet(ClientV1RestServlet): ] } ]) + # only support 3PIDless registration if no 3PIDs are required if not require_email and not require_msisdn: flows.extend([ { diff --git a/synapse/rest/client/v2_alpha/_base.py b/synapse/rest/client/v2_alpha/_base.py index b286ff0d95..77434937ff 100644 --- a/synapse/rest/client/v2_alpha/_base.py +++ b/synapse/rest/client/v2_alpha/_base.py @@ -60,27 +60,6 @@ def set_timeline_upper_limit(filter_json, filter_timeline_limit): filter_timeline_limit) -def check_3pid_allowed(hs, medium, address): - # check whether the HS has whitelisted the given 3PID - - allow = False - if hs.config.registrations_require_3pid: - for constraint in hs.config.registrations_require_3pid: - logger.debug("Checking 3PID %s (%s) against %s (%s)" % ( - address, medium, constraint['pattern'], constraint['medium'] - )) - if ( - medium == constraint['medium'] and - re.match(constraint['pattern'], address) - ): - allow = True - break - else: - allow = True - - return allow - - def interactive_auth_handler(orig): """Wraps an on_POST method to handle InteractiveAuthIncompleteErrors diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py index 2977ad439f..514bb37da1 100644 --- a/synapse/rest/client/v2_alpha/account.py +++ b/synapse/rest/client/v2_alpha/account.py @@ -26,7 +26,8 @@ from synapse.http.servlet import ( ) from synapse.util.async import run_on_reactor from synapse.util.msisdn import phone_number_to_msisdn -from ._base import client_v2_patterns, interactive_auth_handler, check_3pid_allowed +from synapse.util.threepids import check_3pid_allowed +from ._base import client_v2_patterns, interactive_auth_handler logger = logging.getLogger(__name__) diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 898d8b133a..c3479e29de 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -26,11 +26,11 @@ from synapse.http.servlet import ( RestServlet, parse_json_object_from_request, assert_params_in_request, parse_string ) from synapse.util.msisdn import phone_number_to_msisdn +from synapse.util.threepids import check_3pid_allowed -from ._base import client_v2_patterns, interactive_auth_handler, check_3pid_allowed +from ._base import client_v2_patterns, interactive_auth_handler import logging -import re import hmac from hashlib import sha1 from synapse.util.async import run_on_reactor @@ -316,41 +316,41 @@ class RegisterRestServlet(RestServlet): if 'x_show_msisdn' in body and body['x_show_msisdn']: show_msisdn = True - require_email = False - require_msisdn = False - for constraint in self.hs.config.registrations_require_3pid: - if constraint['medium'] == 'email': - require_email = True - elif constraint['medium'] == 'msisdn': - require_msisdn = True - else: - logger.warn( - "Unrecognised 3PID medium %s in registrations_require_3pid" % - constraint['medium'] - ) + # FIXME: need a better error than "no auth flow found" for scenarios + # where we required 3PID for registration but the user didn't give one + require_email = 'email' in self.hs.config.registrations_require_3pid + require_msisdn = 'msisdn' in self.hs.config.registrations_require_3pid flows = [] if self.hs.config.enable_registration_captcha: + # only support 3PIDless registration if no 3PIDs are required if not require_email and not require_msisdn: flows.extend([[LoginType.RECAPTCHA]]) - if require_email or not require_msisdn: + # only support the email-only flow if we don't require MSISDN 3PIDs + if not require_msisdn: flows.extend([[LoginType.EMAIL_IDENTITY, LoginType.RECAPTCHA]]) if show_msisdn: - if not require_email or require_msisdn: + # only support the MSISDN-only flow if we don't require email 3PIDs + if not require_email: flows.extend([[LoginType.MSISDN, LoginType.RECAPTCHA]]) + # always let users provide both MSISDN & email flows.extend([ [LoginType.MSISDN, LoginType.EMAIL_IDENTITY, LoginType.RECAPTCHA], ]) else: + # only support 3PIDless registration if no 3PIDs are required if not require_email and not require_msisdn: flows.extend([[LoginType.DUMMY]]) - if require_email or not require_msisdn: + # only support the email-only flow if we don't require MSISDN 3PIDs + if not require_msisdn: flows.extend([[LoginType.EMAIL_IDENTITY]]) if show_msisdn: + # only support the MSISDN-only flow if we don't require email 3PIDs if not require_email or require_msisdn: flows.extend([[LoginType.MSISDN]]) + # always let users provide both MSISDN & email flows.extend([ [LoginType.MSISDN, LoginType.EMAIL_IDENTITY] ]) @@ -359,30 +359,23 @@ class RegisterRestServlet(RestServlet): flows, body, self.hs.get_ip_from_request(request) ) - # doublecheck that we're not trying to register an denied 3pid. - # the user-facing checks should already have happened when we requested - # a 3PID token to validate them in /register/email/requestToken etc + # Check that we're not trying to register a denied 3pid. + # + # the user-facing checks will probably already have happened in + # /register/email/requestToken when we requested a 3pid, but that's not + # guaranteed. - for constraint in self.hs.config.registrations_require_3pid: - if ( - constraint['medium'] == 'email' and - auth_result and LoginType.EMAIL_IDENTITY in auth_result and - re.match( - constraint['pattern'], - auth_result[LoginType.EMAIL_IDENTITY].threepid.address - ) - ): - raise SynapseError( - 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED - ) - elif ( - constraint['medium'] == 'msisdn' and - auth_result and LoginType.MSISDN in auth_result and - re.match( - constraint['pattern'], - auth_result[LoginType.MSISDN].threepid.address - ) - ): + if ( + auth_result and + ( + LoginType.EMAIL_IDENTITY in auth_result or + LoginType.EMAIL_MSISDN in auth_result + ) + ): + medium = auth_result[LoginType.EMAIL_IDENTITY].threepid['medium'] + address = auth_result[LoginType.EMAIL_IDENTITY].threepid['address'] + + if not check_3pid_allowed(self.hs, medium, address): raise SynapseError( 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED ) diff --git a/synapse/util/threepids.py b/synapse/util/threepids.py new file mode 100644 index 0000000000..e921b97796 --- /dev/null +++ b/synapse/util/threepids.py @@ -0,0 +1,45 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import re + +logger = logging.getLogger(__name__) + + +def check_3pid_allowed(hs, medium, address): + """Checks whether a given format of 3PID is allowed to be used on this HS + + Args: + hs (synapse.server.HomeServer): server + medium (str): 3pid medium - e.g. email, msisdn + address (str): address within that medium (e.g. "wotan@matrix.org") + msisdns need to first have been canonicalised + """ + + if hs.config.allowed_local_3pids: + for constraint in hs.config.allowed_local_3pids: + logger.debug("Checking 3PID %s (%s) against %s (%s)" % ( + address, medium, constraint['pattern'], constraint['medium'] + )) + if ( + medium == constraint['medium'] and + re.match(constraint['pattern'], address) + ): + return True + else: + return True + + return False From 293380bef761b67479a90d2837bbf6dfa5a70a90 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 19 Jan 2018 15:38:53 +0000 Subject: [PATCH 107/348] trailing commas --- synapse/rest/client/v2_alpha/account.py | 8 ++++---- synapse/rest/client/v2_alpha/register.py | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py index 514bb37da1..30523995af 100644 --- a/synapse/rest/client/v2_alpha/account.py +++ b/synapse/rest/client/v2_alpha/account.py @@ -50,7 +50,7 @@ class EmailPasswordRequestTokenRestServlet(RestServlet): if not check_3pid_allowed(self.hs, "email", body['email']): raise SynapseError( - 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED + 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED, ) existingUid = yield self.hs.get_datastore().get_user_id_by_threepid( @@ -86,7 +86,7 @@ class MsisdnPasswordRequestTokenRestServlet(RestServlet): if not check_3pid_allowed(self.hs, "msisdn", msisdn): raise SynapseError( - 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED + 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED, ) existingUid = yield self.datastore.get_user_id_by_threepid( @@ -230,7 +230,7 @@ class EmailThreepidRequestTokenRestServlet(RestServlet): if not check_3pid_allowed(self.hs, "email", body['email']): raise SynapseError( - 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED + 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED, ) existingUid = yield self.datastore.get_user_id_by_threepid( @@ -273,7 +273,7 @@ class MsisdnThreepidRequestTokenRestServlet(RestServlet): if not check_3pid_allowed(self.hs, "msisdn", msisdn): raise SynapseError( - 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED + 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED, ) existingUid = yield self.datastore.get_user_id_by_threepid( diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index c3479e29de..bf68e34a58 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -73,7 +73,7 @@ class EmailRegisterRequestTokenRestServlet(RestServlet): if not check_3pid_allowed(self.hs, "email", body['email']): raise SynapseError( - 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED + 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED, ) existingUid = yield self.hs.get_datastore().get_user_id_by_threepid( @@ -113,7 +113,7 @@ class MsisdnRegisterRequestTokenRestServlet(RestServlet): if not check_3pid_allowed(self.hs, "msisdn", msisdn): raise SynapseError( - 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED + 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED, ) existingUid = yield self.hs.get_datastore().get_user_id_by_threepid( @@ -377,7 +377,7 @@ class RegisterRestServlet(RestServlet): if not check_3pid_allowed(self.hs, medium, address): raise SynapseError( - 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED + 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED, ) if registered_user_id is not None: From 8fe253f19b1c61c38111948cce00a7d260d2925a Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 19 Jan 2018 18:23:45 +0000 Subject: [PATCH 108/348] fix PR nitpicking --- synapse/util/threepids.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/synapse/util/threepids.py b/synapse/util/threepids.py index e921b97796..75efa0117b 100644 --- a/synapse/util/threepids.py +++ b/synapse/util/threepids.py @@ -27,13 +27,16 @@ def check_3pid_allowed(hs, medium, address): medium (str): 3pid medium - e.g. email, msisdn address (str): address within that medium (e.g. "wotan@matrix.org") msisdns need to first have been canonicalised + Returns: + bool: whether the 3PID medium/address is allowed to be added to this HS """ if hs.config.allowed_local_3pids: for constraint in hs.config.allowed_local_3pids: - logger.debug("Checking 3PID %s (%s) against %s (%s)" % ( - address, medium, constraint['pattern'], constraint['medium'] - )) + logger.debug( + "Checking 3PID %s (%s) against %s (%s)", + address, medium, constraint['pattern'], constraint['medium'], + ) if ( medium == constraint['medium'] and re.match(constraint['pattern'], address) From 62d7d66ae5592175bb35a2a8d2f69b1924d6e1f2 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 19 Jan 2018 18:23:56 +0000 Subject: [PATCH 109/348] oops, check all login types --- synapse/rest/client/v2_alpha/register.py | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index bf68e34a58..4e73f9a40a 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -365,20 +365,17 @@ class RegisterRestServlet(RestServlet): # /register/email/requestToken when we requested a 3pid, but that's not # guaranteed. - if ( - auth_result and - ( - LoginType.EMAIL_IDENTITY in auth_result or - LoginType.EMAIL_MSISDN in auth_result - ) - ): - medium = auth_result[LoginType.EMAIL_IDENTITY].threepid['medium'] - address = auth_result[LoginType.EMAIL_IDENTITY].threepid['address'] + if auth_result: + for login_type in [LoginType.EMAIL_IDENTITY, LoginType.EMAIL_MSISDN]: + if login_type in auth_result: + medium = auth_result[login_type].threepid['medium'] + address = auth_result[login_type].threepid['address'] - if not check_3pid_allowed(self.hs, medium, address): - raise SynapseError( - 403, "Third party identifier is not allowed", Codes.THREEPID_DENIED, - ) + if not check_3pid_allowed(self.hs, medium, address): + raise SynapseError( + 403, "Third party identifier is not allowed", + Codes.THREEPID_DENIED, + ) if registered_user_id is not None: logger.info( From ad7ec63d08c9d814766ea4764e187bc7ba5589d7 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 19 Jan 2018 18:29:39 +0000 Subject: [PATCH 110/348] Use the right path for url_preview thumbnails This was introduced by #2627: we were overwriting the original media for url previews with the thumbnails :/ (fixes https://github.com/vector-im/riot-web/issues/6012, hopefully) --- synapse/rest/media/v1/media_storage.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py index 001e84578e..041ae396cd 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py @@ -164,6 +164,14 @@ class MediaStorage(object): str """ if file_info.url_cache: + if file_info.thumbnail: + return self.filepaths.url_cache_thumbnail_rel( + media_id=file_info.file_id, + width=file_info.thumbnail_width, + height=file_info.thumbnail_height, + content_type=file_info.thumbnail_type, + method=file_info.thumbnail_method, + ) return self.filepaths.url_cache_filepath_rel(file_info.file_id) if file_info.server_name: From 49fce046243ae3e2c38ac6ac39001172667b8dfa Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 19 Jan 2018 19:55:33 +0000 Subject: [PATCH 111/348] fix typo (thanks sytest) --- synapse/rest/client/v2_alpha/register.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 4e73f9a40a..3abfe35479 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -366,7 +366,7 @@ class RegisterRestServlet(RestServlet): # guaranteed. if auth_result: - for login_type in [LoginType.EMAIL_IDENTITY, LoginType.EMAIL_MSISDN]: + for login_type in [LoginType.EMAIL_IDENTITY, LoginType.MSISDN]: if login_type in auth_result: medium = auth_result[login_type].threepid['medium'] address = auth_result[login_type].threepid['address'] From 87b7d727605c8e122adb768b7487dfcae830593f Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 19 Jan 2018 23:51:04 +0000 Subject: [PATCH 112/348] Add some comments about the reactor tick time metric --- synapse/metrics/__init__.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 2265e6e8d6..e0cfb7d08f 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -146,10 +146,15 @@ def runUntilCurrentTimer(func): num_pending += 1 num_pending += len(reactor.threadCallQueue) - start = time.time() * 1000 ret = func(*args, **kwargs) end = time.time() * 1000 + + # record the amount of wallclock time spent running pending calls. + # This is a proxy for the actual amount of time between reactor polls, + # since about 25% of time is actually spent running things triggered by + # I/O events, but that is harder to capture without rewriting half the + # reactor. tick_time.inc_by(end - start) pending_calls_metric.inc_by(num_pending) From 2c8526cac795fa2aa795e1a1aaae2ffb2558824d Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 20 Jan 2018 00:55:44 +0000 Subject: [PATCH 113/348] Use a connection pool for the SimpleHttpClient In particular I hope this will help the pusher, which makes many requests to sygnal, and is currently negotiating SSL for each one. --- synapse/http/client.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/synapse/http/client.py b/synapse/http/client.py index 4abb479ae3..930d713013 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -30,6 +30,7 @@ from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS from twisted.web.client import ( BrowserLikeRedirectAgent, ContentDecoderAgent, GzipDecoder, Agent, readBody, PartialDownloadError, + HTTPConnectionPool, ) from twisted.web.client import FileBodyProducer as TwistedFileBodyProducer from twisted.web.http import PotentialDataLoss @@ -64,13 +65,19 @@ class SimpleHttpClient(object): """ def __init__(self, hs): self.hs = hs + + pool = HTTPConnectionPool(reactor) + pool.maxPersistentPerHost = 5 + pool.cachedConnectionTimeout = 2 * 60 + # The default context factory in Twisted 14.0.0 (which we require) is # BrowserLikePolicyForHTTPS which will do regular cert validation # 'like a browser' self.agent = Agent( reactor, connectTimeout=15, - contextFactory=hs.get_http_client_context_factory() + contextFactory=hs.get_http_client_context_factory(), + pool=pool, ) self.user_agent = hs.version_string self.clock = hs.get_clock() From 5552ed9a7fb1300142a7aebe7fc85b0bd2535bcf Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Sat, 20 Jan 2018 22:25:23 -0700 Subject: [PATCH 114/348] Add an admin route to get all the media in a room This is intended to be used by administrators to monitor the media that is passing through their server, if they wish. Signed-off-by: Travis Ralston --- synapse/rest/client/v1/admin.py | 22 ++++++ synapse/storage/room.py | 133 ++++++++++++++++++-------------- 2 files changed, 98 insertions(+), 57 deletions(-) diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index 5022808ea9..0615e5d807 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -289,6 +289,27 @@ class QuarantineMediaInRoom(ClientV1RestServlet): defer.returnValue((200, {"num_quarantined": num_quarantined})) +class ListMediaInRoom(ClientV1RestServlet): + """Lists all of the media in a given room. + """ + PATTERNS = client_path_patterns("/admin/room/(?P[^/]+)/media") + + def __init__(self, hs): + super(ListMediaInRoom, self).__init__(hs) + self.store = hs.get_datastore() + + @defer.inlineCallbacks + def on_GET(self, request, room_id): + requester = yield self.auth.get_user_by_req(request) + is_admin = yield self.auth.is_server_admin(requester.user) + if not is_admin: + raise AuthError(403, "You are not a server admin") + + local_mxcs, remote_mxcs = yield self.store.get_media_mxcs_in_room(room_id) + + defer.returnValue((200, {"local": local_mxcs, "remote": remote_mxcs})) + + class ResetPasswordRestServlet(ClientV1RestServlet): """Post request to allow an administrator reset password for a user. This needs user to have administrator access in Synapse. @@ -487,3 +508,4 @@ def register_servlets(hs, http_server): SearchUsersRestServlet(hs).register(http_server) ShutdownRoomRestServlet(hs).register(http_server) QuarantineMediaInRoom(hs).register(http_server) + ListMediaInRoom(hs).register(http_server) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 23688430b7..cd6899a4b5 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -533,73 +533,92 @@ class RoomStore(SQLBaseStore): ) self.is_room_blocked.invalidate((room_id,)) + def get_media_mxcs_in_room(self, room_id): + def _get_media_ids_in_room(txn): + local_media_ids, remote_media_ids = self._get_media_ids_in_room(txn, room_id) + local_media_mxcs = [] + remote_media_mxcs = [] + + # Convert the IDs to MXC URIs + for media_id in local_media_ids: + local_media_mxcs.append("mxc://%s/%s" % (self.hostname, media_id)) + for hostname, media_id in remote_media_ids: + remote_media_mxcs.append("mxc://%s/%s" % (hostname, media_id)) + + return local_media_mxcs, remote_media_mxcs + return self.runInteraction("get_media_ids_in_room", _get_media_ids_in_room) + def quarantine_media_ids_in_room(self, room_id, quarantined_by): """For a room loops through all events with media and quarantines the associated media """ - def _get_media_ids_in_room(txn): - mxc_re = re.compile("^mxc://([^/]+)/([^/#?]+)") - - next_token = self.get_current_events_token() + 1 - + def _quarantine_media_in_room(txn): + local_media_mxcs, remote_media_mxcs = self._get_media_ids_in_room(txn, room_id) total_media_quarantined = 0 - while next_token: - sql = """ - SELECT stream_ordering, content FROM events - WHERE room_id = ? - AND stream_ordering < ? - AND contains_url = ? AND outlier = ? - ORDER BY stream_ordering DESC - LIMIT ? + # Now update all the tables to set the quarantined_by flag + + txn.executemany(""" + UPDATE local_media_repository + SET quarantined_by = ? + WHERE media_id = ? + """, ((quarantined_by, media_id) for media_id in local_media_mxcs)) + + txn.executemany( """ - txn.execute(sql, (room_id, next_token, True, False, 100)) - - next_token = None - local_media_mxcs = [] - remote_media_mxcs = [] - for stream_ordering, content_json in txn: - next_token = stream_ordering - content = json.loads(content_json) - - content_url = content.get("url") - thumbnail_url = content.get("info", {}).get("thumbnail_url") - - for url in (content_url, thumbnail_url): - if not url: - continue - matches = mxc_re.match(url) - if matches: - hostname = matches.group(1) - media_id = matches.group(2) - if hostname == self.hostname: - local_media_mxcs.append(media_id) - else: - remote_media_mxcs.append((hostname, media_id)) - - # Now update all the tables to set the quarantined_by flag - - txn.executemany(""" - UPDATE local_media_repository + UPDATE remote_media_cache SET quarantined_by = ? - WHERE media_id = ? - """, ((quarantined_by, media_id) for media_id in local_media_mxcs)) - - txn.executemany( - """ - UPDATE remote_media_cache - SET quarantined_by = ? - WHERE media_origin AND media_id = ? - """, - ( - (quarantined_by, origin, media_id) - for origin, media_id in remote_media_mxcs - ) + WHERE media_origin AND media_id = ? + """, + ( + (quarantined_by, origin, media_id) + for origin, media_id in remote_media_mxcs ) + ) - total_media_quarantined += len(local_media_mxcs) - total_media_quarantined += len(remote_media_mxcs) + total_media_quarantined += len(local_media_mxcs) + total_media_quarantined += len(remote_media_mxcs) return total_media_quarantined - return self.runInteraction("get_media_ids_in_room", _get_media_ids_in_room) + return self.runInteraction("quarantine_media_in_room", _quarantine_media_in_room) + + def _get_media_ids_in_room(self, txn, room_id): + mxc_re = re.compile("^mxc://([^/]+)/([^/#?]+)") + + next_token = self.get_current_events_token() + 1 + local_media_mxcs = [] + remote_media_mxcs = [] + + while next_token: + sql = """ + SELECT stream_ordering, content FROM events + WHERE room_id = ? + AND stream_ordering < ? + AND contains_url = ? AND outlier = ? + ORDER BY stream_ordering DESC + LIMIT ? + """ + txn.execute(sql, (room_id, next_token, True, False, 100)) + + next_token = None + for stream_ordering, content_json in txn: + next_token = stream_ordering + content = json.loads(content_json) + + content_url = content.get("url") + thumbnail_url = content.get("info", {}).get("thumbnail_url") + + for url in (content_url, thumbnail_url): + if not url: + continue + matches = mxc_re.match(url) + if matches: + hostname = matches.group(1) + media_id = matches.group(2) + if hostname == self.hostname: + local_media_mxcs.append(media_id) + else: + remote_media_mxcs.append((hostname, media_id)) + + return local_media_mxcs, remote_media_mxcs From a94d9b6b825c6b2db375460268567e637e10709a Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Sat, 20 Jan 2018 22:49:46 -0700 Subject: [PATCH 115/348] Appease the linter These are ids anyways, not mxc uris. Signed-off-by: Travis Ralston --- synapse/storage/room.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index cd6899a4b5..d1d63f4041 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -553,7 +553,7 @@ class RoomStore(SQLBaseStore): the associated media """ def _quarantine_media_in_room(txn): - local_media_mxcs, remote_media_mxcs = self._get_media_ids_in_room(txn, room_id) + local_media_ids, remote_media_ids = self._get_media_ids_in_room(txn, room_id) total_media_quarantined = 0 # Now update all the tables to set the quarantined_by flag @@ -562,7 +562,7 @@ class RoomStore(SQLBaseStore): UPDATE local_media_repository SET quarantined_by = ? WHERE media_id = ? - """, ((quarantined_by, media_id) for media_id in local_media_mxcs)) + """, ((quarantined_by, media_id) for media_id in local_media_ids)) txn.executemany( """ @@ -572,12 +572,12 @@ class RoomStore(SQLBaseStore): """, ( (quarantined_by, origin, media_id) - for origin, media_id in remote_media_mxcs + for origin, media_id in remote_media_ids ) ) - total_media_quarantined += len(local_media_mxcs) - total_media_quarantined += len(remote_media_mxcs) + total_media_quarantined += len(local_media_ids) + total_media_quarantined += len(remote_media_ids) return total_media_quarantined From 5c431f421c0edcdc4582da7e0d780355ebf647e1 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 22 Jan 2018 16:45:43 +0000 Subject: [PATCH 116/348] Matthew's fixes to the unit tests Extracted from https://github.com/matrix-org/synapse/pull/2820 --- tests/replication/slave/storage/_base.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/replication/slave/storage/_base.py b/tests/replication/slave/storage/_base.py index 81063f19a1..74f104e3b8 100644 --- a/tests/replication/slave/storage/_base.py +++ b/tests/replication/slave/storage/_base.py @@ -15,6 +15,8 @@ from twisted.internet import defer, reactor from tests import unittest +import tempfile + from mock import Mock, NonCallableMock from tests.utils import setup_test_homeserver from synapse.replication.tcp.resource import ReplicationStreamProtocolFactory @@ -41,7 +43,9 @@ class BaseSlavedStoreTestCase(unittest.TestCase): self.event_id = 0 server_factory = ReplicationStreamProtocolFactory(self.hs) - listener = reactor.listenUNIX("\0xxx", server_factory) + # XXX: mktemp is unsafe and should never be used. but we're just a test. + path = tempfile.mktemp(prefix="base_slaved_store_test_case_socket") + listener = reactor.listenUNIX(path, server_factory) self.addCleanup(listener.stopListening) self.streamer = server_factory.streamer @@ -49,7 +53,7 @@ class BaseSlavedStoreTestCase(unittest.TestCase): client_factory = ReplicationClientFactory( self.hs, "client_name", self.replication_handler ) - client_connector = reactor.connectUNIX("\0xxx", client_factory) + client_connector = reactor.connectUNIX(path, client_factory) self.addCleanup(client_factory.stopTrying) self.addCleanup(client_connector.disconnect) From ab9f844aaf3662a64dbc4c56077e9fa37bc7d5d0 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 22 Jan 2018 19:11:18 +0100 Subject: [PATCH 117/348] Add federation_domain_whitelist option (#2820) Add federation_domain_whitelist gives a way to restrict which domains your HS is allowed to federate with. useful mainly for gracefully preventing a private but internet-connected HS from trying to federate to the wider public Matrix network --- synapse/api/errors.py | 26 ++++++++++++++++++++ synapse/config/server.py | 22 +++++++++++++++++ synapse/federation/federation_client.py | 5 +++- synapse/federation/transaction_queue.py | 4 +++- synapse/federation/transport/client.py | 3 +++ synapse/federation/transport/server.py | 9 ++++++- synapse/handlers/device.py | 4 ++++ synapse/handlers/e2e_keys.py | 8 ++++++- synapse/handlers/federation.py | 4 ++++ synapse/http/matrixfederationclient.py | 28 +++++++++++++++++++++- synapse/rest/key/v2/remote_key_resource.py | 8 +++++++ synapse/rest/media/v1/media_repository.py | 19 +++++++++++++-- synapse/util/retryutils.py | 12 ++++++++++ tests/utils.py | 1 + 14 files changed, 146 insertions(+), 7 deletions(-) diff --git a/synapse/api/errors.py b/synapse/api/errors.py index 46b0d7b34c..aa15f73f36 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -141,6 +141,32 @@ class RegistrationError(SynapseError): pass +class FederationDeniedError(SynapseError): + """An error raised when the server tries to federate with a server which + is not on its federation whitelist. + + Attributes: + destination (str): The destination which has been denied + """ + + def __init__(self, destination): + """Raised by federation client or server to indicate that we are + are deliberately not attempting to contact a given server because it is + not on our federation whitelist. + + Args: + destination (str): the domain in question + """ + + self.destination = destination + + super(FederationDeniedError, self).__init__( + code=403, + msg="Federation denied with %s." % (self.destination,), + errcode=Codes.FORBIDDEN, + ) + + class InteractiveAuthIncompleteError(Exception): """An error raised when UI auth is not yet complete diff --git a/synapse/config/server.py b/synapse/config/server.py index 436dd8a6fe..8f0b6d1f28 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -55,6 +55,17 @@ class ServerConfig(Config): "block_non_admin_invites", False, ) + # FIXME: federation_domain_whitelist needs sytests + self.federation_domain_whitelist = None + federation_domain_whitelist = config.get( + "federation_domain_whitelist", None + ) + # turn the whitelist into a hash for speed of lookup + if federation_domain_whitelist is not None: + self.federation_domain_whitelist = {} + for domain in federation_domain_whitelist: + self.federation_domain_whitelist[domain] = True + if self.public_baseurl is not None: if self.public_baseurl[-1] != '/': self.public_baseurl += '/' @@ -210,6 +221,17 @@ class ServerConfig(Config): # (except those sent by local server admins). The default is False. # block_non_admin_invites: True + # Restrict federation to the following whitelist of domains. + # N.B. we recommend also firewalling your federation listener to limit + # inbound federation traffic as early as possible, rather than relying + # purely on this application-layer restriction. If not specified, the + # default is to whitelist everything. + # + # federation_domain_whitelist: + # - lon.example.com + # - nyc.example.com + # - syd.example.com + # List of ports that Synapse should listen on, their purpose and their # configuration. listeners: diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index b1fe03f702..813907f7f2 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -23,7 +23,7 @@ from twisted.internet import defer from synapse.api.constants import Membership from synapse.api.errors import ( - CodeMessageException, HttpResponseException, SynapseError, + CodeMessageException, HttpResponseException, SynapseError, FederationDeniedError ) from synapse.events import builder from synapse.federation.federation_base import ( @@ -266,6 +266,9 @@ class FederationClient(FederationBase): except NotRetryingDestination as e: logger.info(e.message) continue + except FederationDeniedError as e: + logger.info(e.message) + continue except Exception as e: pdu_attempts[destination] = now diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index 9d39f46583..a141ec9953 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -19,7 +19,7 @@ from twisted.internet import defer from .persistence import TransactionActions from .units import Transaction, Edu -from synapse.api.errors import HttpResponseException +from synapse.api.errors import HttpResponseException, FederationDeniedError from synapse.util import logcontext, PreserveLoggingContext from synapse.util.async import run_on_reactor from synapse.util.retryutils import NotRetryingDestination, get_retry_limiter @@ -490,6 +490,8 @@ class TransactionQueue(object): (e.retry_last_ts + e.retry_interval) / 1000.0 ), ) + except FederationDeniedError as e: + logger.info(e) except Exception as e: logger.warn( "TX [%s] Failed to send transaction: %s", diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py index 1f3ce238f6..5488e82985 100644 --- a/synapse/federation/transport/client.py +++ b/synapse/federation/transport/client.py @@ -212,6 +212,9 @@ class TransportLayerClient(object): Fails with ``NotRetryingDestination`` if we are not yet ready to retry this server. + + Fails with ``FederationDeniedError`` if the remote destination + is not in our federation whitelist """ valid_memberships = {Membership.JOIN, Membership.LEAVE} if membership not in valid_memberships: diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 2b02b021ec..06c16ba4fa 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -16,7 +16,7 @@ from twisted.internet import defer from synapse.api.urls import FEDERATION_PREFIX as PREFIX -from synapse.api.errors import Codes, SynapseError +from synapse.api.errors import Codes, SynapseError, FederationDeniedError from synapse.http.server import JsonResource from synapse.http.servlet import ( parse_json_object_from_request, parse_integer_from_args, parse_string_from_args, @@ -81,6 +81,7 @@ class Authenticator(object): self.keyring = hs.get_keyring() self.server_name = hs.hostname self.store = hs.get_datastore() + self.federation_domain_whitelist = hs.config.federation_domain_whitelist # A method just so we can pass 'self' as the authenticator to the Servlets @defer.inlineCallbacks @@ -92,6 +93,12 @@ class Authenticator(object): "signatures": {}, } + if ( + self.federation_domain_whitelist is not None and + self.server_name not in self.federation_domain_whitelist + ): + raise FederationDeniedError(self.server_name) + if content is not None: json_request["content"] = content diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index 2152efc692..0e83453851 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -14,6 +14,7 @@ # limitations under the License. from synapse.api import errors from synapse.api.constants import EventTypes +from synapse.api.errors import FederationDeniedError from synapse.util import stringutils from synapse.util.async import Linearizer from synapse.util.caches.expiringcache import ExpiringCache @@ -513,6 +514,9 @@ class DeviceListEduUpdater(object): # This makes it more likely that the device lists will # eventually become consistent. return + except FederationDeniedError as e: + logger.info(e) + return except Exception: # TODO: Remember that we are now out of sync and try again # later diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 5af8abf66b..9aa95f89e6 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -19,7 +19,9 @@ import logging from canonicaljson import encode_canonical_json from twisted.internet import defer -from synapse.api.errors import SynapseError, CodeMessageException +from synapse.api.errors import ( + SynapseError, CodeMessageException, FederationDeniedError, +) from synapse.types import get_domain_from_id, UserID from synapse.util.logcontext import preserve_fn, make_deferred_yieldable from synapse.util.retryutils import NotRetryingDestination @@ -140,6 +142,10 @@ class E2eKeysHandler(object): failures[destination] = { "status": 503, "message": "Not ready for retry", } + except FederationDeniedError as e: + failures[destination] = { + "status": 403, "message": "Federation Denied", + } except Exception as e: # include ConnectionRefused and other errors failures[destination] = { diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index ac70730885..677532c87b 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -22,6 +22,7 @@ from ._base import BaseHandler from synapse.api.errors import ( AuthError, FederationError, StoreError, CodeMessageException, SynapseError, + FederationDeniedError, ) from synapse.api.constants import EventTypes, Membership, RejectedReason from synapse.events.validator import EventValidator @@ -782,6 +783,9 @@ class FederationHandler(BaseHandler): except NotRetryingDestination as e: logger.info(e.message) continue + except FederationDeniedError as e: + logger.info(e) + continue except Exception as e: logger.exception( "Failed to backfill from %s because %s", diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 833496b72d..9145405cb0 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -27,7 +27,7 @@ import synapse.metrics from canonicaljson import encode_canonical_json from synapse.api.errors import ( - SynapseError, Codes, HttpResponseException, + SynapseError, Codes, HttpResponseException, FederationDeniedError, ) from signedjson.sign import sign_json @@ -123,11 +123,22 @@ class MatrixFederationHttpClient(object): Fails with ``HTTPRequestException``: if we get an HTTP response code >= 300. + Fails with ``NotRetryingDestination`` if we are not yet ready to retry this server. + + Fails with ``FederationDeniedError`` if this destination + is not on our federation whitelist + (May also fail with plenty of other Exceptions for things like DNS failures, connection failures, SSL failures.) """ + if ( + self.hs.config.federation_domain_whitelist and + destination not in self.hs.config.federation_domain_whitelist + ): + raise FederationDeniedError(destination) + limiter = yield synapse.util.retryutils.get_retry_limiter( destination, self.clock, @@ -308,6 +319,9 @@ class MatrixFederationHttpClient(object): Fails with ``NotRetryingDestination`` if we are not yet ready to retry this server. + + Fails with ``FederationDeniedError`` if this destination + is not on our federation whitelist """ if not json_data_callback: @@ -368,6 +382,9 @@ class MatrixFederationHttpClient(object): Fails with ``NotRetryingDestination`` if we are not yet ready to retry this server. + + Fails with ``FederationDeniedError`` if this destination + is not on our federation whitelist """ def body_callback(method, url_bytes, headers_dict): @@ -422,6 +439,9 @@ class MatrixFederationHttpClient(object): Fails with ``NotRetryingDestination`` if we are not yet ready to retry this server. + + Fails with ``FederationDeniedError`` if this destination + is not on our federation whitelist """ logger.debug("get_json args: %s", args) @@ -475,6 +495,9 @@ class MatrixFederationHttpClient(object): Fails with ``NotRetryingDestination`` if we are not yet ready to retry this server. + + Fails with ``FederationDeniedError`` if this destination + is not on our federation whitelist """ response = yield self._request( @@ -518,6 +541,9 @@ class MatrixFederationHttpClient(object): Fails with ``NotRetryingDestination`` if we are not yet ready to retry this server. + + Fails with ``FederationDeniedError`` if this destination + is not on our federation whitelist """ encoded_args = {} diff --git a/synapse/rest/key/v2/remote_key_resource.py b/synapse/rest/key/v2/remote_key_resource.py index cc2842aa72..17e6079cba 100644 --- a/synapse/rest/key/v2/remote_key_resource.py +++ b/synapse/rest/key/v2/remote_key_resource.py @@ -93,6 +93,7 @@ class RemoteKey(Resource): self.store = hs.get_datastore() self.version_string = hs.version_string self.clock = hs.get_clock() + self.federation_domain_whitelist = hs.config.federation_domain_whitelist def render_GET(self, request): self.async_render_GET(request) @@ -137,6 +138,13 @@ class RemoteKey(Resource): logger.info("Handling query for keys %r", query) store_queries = [] for server_name, key_ids in query.items(): + if ( + self.federation_domain_whitelist is not None and + server_name not in self.federation_domain_whitelist + ): + logger.debug("Federation denied with %s", server_name) + continue + if not key_ids: key_ids = (None,) for key_id in key_ids: diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 4f56bcf577..485db8577a 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -32,8 +32,9 @@ from .media_storage import MediaStorage from synapse.http.matrixfederationclient import MatrixFederationHttpClient from synapse.util.stringutils import random_string -from synapse.api.errors import SynapseError, HttpResponseException, \ - NotFoundError +from synapse.api.errors import ( + SynapseError, HttpResponseException, NotFoundError, FederationDeniedError, +) from synapse.util.async import Linearizer from synapse.util.stringutils import is_ascii @@ -75,6 +76,8 @@ class MediaRepository(object): self.recently_accessed_remotes = set() self.recently_accessed_locals = set() + self.federation_domain_whitelist = hs.config.federation_domain_whitelist + # List of StorageProviders where we should search for media and # potentially upload to. storage_providers = [] @@ -216,6 +219,12 @@ class MediaRepository(object): Deferred: Resolves once a response has successfully been written to request """ + if ( + self.federation_domain_whitelist is not None and + server_name not in self.federation_domain_whitelist + ): + raise FederationDeniedError(server_name) + self.mark_recently_accessed(server_name, media_id) # We linearize here to ensure that we don't try and download remote @@ -250,6 +259,12 @@ class MediaRepository(object): Returns: Deferred[dict]: The media_info of the file """ + if ( + self.federation_domain_whitelist is not None and + server_name not in self.federation_domain_whitelist + ): + raise FederationDeniedError(server_name) + # We linearize here to ensure that we don't try and download remote # media multiple times concurrently key = (server_name, media_id) diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py index 1adedbb361..47b0bb5eb3 100644 --- a/synapse/util/retryutils.py +++ b/synapse/util/retryutils.py @@ -26,6 +26,18 @@ logger = logging.getLogger(__name__) class NotRetryingDestination(Exception): def __init__(self, retry_last_ts, retry_interval, destination): + """Raised by the limiter (and federation client) to indicate that we are + are deliberately not attempting to contact a given server. + + Args: + retry_last_ts (int): the unix ts in milliseconds of our last attempt + to contact the server. 0 indicates that the last attempt was + successful or that we've never actually attempted to connect. + retry_interval (int): the time in milliseconds to wait until the next + attempt. + destination (str): the domain in question + """ + msg = "Not retrying server %s." % (destination,) super(NotRetryingDestination, self).__init__(msg) diff --git a/tests/utils.py b/tests/utils.py index 44e5f75093..3116047892 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -57,6 +57,7 @@ def setup_test_homeserver(name="test", datastore=None, config=None, **kargs): config.worker_app = None config.email_enable_notifs = False config.block_non_admin_invites = False + config.federation_domain_whitelist = None # disable user directory updates, because they get done in the # background, which upsets the test runner. From 93efd7eb04601ee103f176819283e0298c660adc Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 22 Jan 2018 18:14:10 +0000 Subject: [PATCH 118/348] logging and debug for http pusher --- synapse/push/httppusher.py | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py index c16f61452c..f2517f39a8 100644 --- a/synapse/push/httppusher.py +++ b/synapse/push/httppusher.py @@ -13,21 +13,30 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -from synapse.push import PusherConfigException +import logging from twisted.internet import defer, reactor from twisted.internet.error import AlreadyCalled, AlreadyCancelled -import logging import push_rule_evaluator import push_tools - +import synapse +from synapse.push import PusherConfigException from synapse.util.logcontext import LoggingContext from synapse.util.metrics import Measure logger = logging.getLogger(__name__) +metrics = synapse.metrics.get_metrics_for(__name__) + +http_push_processed_counter = metrics.register_counter( + "http_pushes_processed", +) + +http_push_failed_counter = metrics.register_counter( + "http_pushes_failed", +) + class HttpPusher(object): INITIAL_BACKOFF_SEC = 1 # in seconds because that's what Twisted takes @@ -152,9 +161,15 @@ class HttpPusher(object): self.user_id, self.last_stream_ordering, self.max_stream_ordering ) + logger.info( + "Processing %i unprocessed push actions starting at stream_ordering %i", + len(unprocessed), self.last_stream_ordering, + ) + for push_action in unprocessed: processed = yield self._process_one(push_action) if processed: + http_push_processed_counter.inc() self.backoff_delay = HttpPusher.INITIAL_BACKOFF_SEC self.last_stream_ordering = push_action['stream_ordering'] yield self.store.update_pusher_last_stream_ordering_and_success( @@ -169,6 +184,7 @@ class HttpPusher(object): self.failing_since ) else: + http_push_failed_counter.inc() if not self.failing_since: self.failing_since = self.clock.time_msec() yield self.store.update_pusher_failing_since( From 4528dd2443f4dc9e737bf4eeccedfb8807a1ea2c Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 22 Jan 2018 20:15:42 +0000 Subject: [PATCH 119/348] Fix logging and add user_id --- synapse/push/httppusher.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py index f2517f39a8..4a03af5b21 100644 --- a/synapse/push/httppusher.py +++ b/synapse/push/httppusher.py @@ -162,8 +162,9 @@ class HttpPusher(object): ) logger.info( - "Processing %i unprocessed push actions starting at stream_ordering %i", - len(unprocessed), self.last_stream_ordering, + "Processing %i unprocessed push actions for %s starting at " + "stream_ordering %s", + len(unprocessed), self.user_id, self.last_stream_ordering, ) for push_action in unprocessed: From d32385336f2edf2c98ae9eb560bad9402860d7cc Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 23 Jan 2018 09:59:06 +0100 Subject: [PATCH 120/348] add ?ts massaging for ASes (#2754) blindly implement ?ts for AS. untested --- synapse/rest/client/v1/room.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 682a0af9fc..867ec8602c 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -195,15 +195,20 @@ class RoomSendEventRestServlet(ClientV1RestServlet): requester = yield self.auth.get_user_by_req(request, allow_guest=True) content = parse_json_object_from_request(request) + event_dict = { + "type": event_type, + "content": content, + "room_id": room_id, + "sender": requester.user.to_string(), + } + + if 'ts' in request.args and requester.app_service: + event_dict['origin_server_ts'] = parse_integer(request, "ts", 0) + msg_handler = self.handlers.message_handler event = yield msg_handler.create_and_send_nonmember_event( requester, - { - "type": event_type, - "content": content, - "room_id": room_id, - "sender": requester.user.to_string(), - }, + event_dict, txn_id=txn_id, ) From 9a72b70630e111639243c6ab7867c4d4b970e2df Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Wed, 24 Jan 2018 11:07:24 +0100 Subject: [PATCH 121/348] fix thinko on 3pid whitelisting --- synapse/rest/client/v2_alpha/register.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 3abfe35479..c6f4680a76 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -368,8 +368,8 @@ class RegisterRestServlet(RestServlet): if auth_result: for login_type in [LoginType.EMAIL_IDENTITY, LoginType.MSISDN]: if login_type in auth_result: - medium = auth_result[login_type].threepid['medium'] - address = auth_result[login_type].threepid['address'] + medium = auth_result[login_type]['medium'] + address = auth_result[login_type]['address'] if not check_3pid_allowed(self.hs, medium, address): raise SynapseError( From 349c7399663b5fce856995a3a901019f5d210cc4 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Thu, 25 Jan 2018 23:28:44 +0000 Subject: [PATCH 122/348] synapse 500s on a call to publicRooms in the case where the number of public rooms is zero, the specific cause is due to xrange trying to use a step value of zero, but if the total room number really is zero then it makes sense to just bail and save the extra processing --- synapse/handlers/room_list.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index bb40075387..ae5db4d2c5 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -186,6 +186,11 @@ class RoomListHandler(BaseHandler): logger.info("After sorting and filtering, %i rooms remain", len(rooms_to_scan)) + #bail if no rooms to work on + if len(rooms_to_scan) == 0: + defer.returnValue([]) + + # _append_room_entry_to_chunk will append to chunk but will stop if # len(chunk) > limit # From d02e43b15f6b9b24ffe5e0c0d696f8fd71fc8af3 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Thu, 25 Jan 2018 23:29:46 +0000 Subject: [PATCH 123/348] remove white space --- synapse/handlers/room_list.py | 1 - 1 file changed, 1 deletion(-) diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index ae5db4d2c5..9f8173644a 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -190,7 +190,6 @@ class RoomListHandler(BaseHandler): if len(rooms_to_scan) == 0: defer.returnValue([]) - # _append_room_entry_to_chunk will append to chunk but will stop if # len(chunk) > limit # From 6c6e197b0a100f14fa69d8decba59e58c7c25b6c Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Thu, 25 Jan 2018 23:47:46 +0000 Subject: [PATCH 124/348] fix PEP8 violation --- synapse/handlers/room_list.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index 9f8173644a..f466a64ed9 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -186,7 +186,7 @@ class RoomListHandler(BaseHandler): logger.info("After sorting and filtering, %i rooms remain", len(rooms_to_scan)) - #bail if no rooms to work on + # bail if no rooms to work on if len(rooms_to_scan) == 0: defer.returnValue([]) From f6320835764dbb6cac058763737d67ca6359e3a9 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Thu, 25 Jan 2018 23:52:17 +0000 Subject: [PATCH 125/348] fix return type, should be a dict --- synapse/handlers/room_list.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index f466a64ed9..2ee63548ca 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -188,7 +188,7 @@ class RoomListHandler(BaseHandler): # bail if no rooms to work on if len(rooms_to_scan) == 0: - defer.returnValue([]) + defer.returnValue({}) # _append_room_entry_to_chunk will append to chunk but will stop if # len(chunk) > limit From 86c4f49a31fe044a727c64e40009596050cdab95 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Fri, 26 Jan 2018 00:12:02 +0000 Subject: [PATCH 126/348] rather than try reconstruct the results object, better to guard against the xrange step argument being 0 --- synapse/handlers/room_list.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index 2ee63548ca..cf62ead816 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -186,10 +186,6 @@ class RoomListHandler(BaseHandler): logger.info("After sorting and filtering, %i rooms remain", len(rooms_to_scan)) - # bail if no rooms to work on - if len(rooms_to_scan) == 0: - defer.returnValue({}) - # _append_room_entry_to_chunk will append to chunk but will stop if # len(chunk) > limit # @@ -207,8 +203,8 @@ class RoomListHandler(BaseHandler): if limit: step = limit + 1 else: - step = len(rooms_to_scan) - + # step cannot be zero + step = len(rooms_to_scan) if len(rooms_to_scan) != 0 else 1 chunk = [] for i in xrange(0, len(rooms_to_scan), step): batch = rooms_to_scan[i:i + step] From 73560237d646835197e07e9e6c50674786a79a28 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Fri, 26 Jan 2018 00:15:10 +0000 Subject: [PATCH 127/348] add white space line --- synapse/handlers/room_list.py | 1 + 1 file changed, 1 insertion(+) diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index cf62ead816..dfa09141ed 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -205,6 +205,7 @@ class RoomListHandler(BaseHandler): else: # step cannot be zero step = len(rooms_to_scan) if len(rooms_to_scan) != 0 else 1 + chunk = [] for i in xrange(0, len(rooms_to_scan), step): batch = rooms_to_scan[i:i + step] From 2b918464979c958447e5405c26b0832adbb8913a Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 25 Jan 2018 23:12:31 +0000 Subject: [PATCH 128/348] Remove spurious unittest.DEBUG --- tests/handlers/test_e2e_keys.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/handlers/test_e2e_keys.py b/tests/handlers/test_e2e_keys.py index 19f5ed6bce..d92bf240b1 100644 --- a/tests/handlers/test_e2e_keys.py +++ b/tests/handlers/test_e2e_keys.py @@ -143,7 +143,6 @@ class E2eKeysHandlerTestCase(unittest.TestCase): except errors.SynapseError: pass - @unittest.DEBUG @defer.inlineCallbacks def test_claim_one_time_key(self): local_user = "@boris:" + self.hs.hostname From 6e9bf67f189c09a65eea4e348c5b9ac265ea096f Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 25 Jan 2018 21:35:21 +0000 Subject: [PATCH 129/348] Remove unused/bitrotted MemoryDataStore This isn't used, and looks thoroughly bitrotted. --- tests/utils.py | 164 ++++--------------------------------------------- 1 file changed, 13 insertions(+), 151 deletions(-) diff --git a/tests/utils.py b/tests/utils.py index 3116047892..de33deb0b2 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -13,27 +13,23 @@ # See the License for the specific language governing permissions and # limitations under the License. -from synapse.http.server import HttpServer -from synapse.api.errors import cs_error, CodeMessageException, StoreError -from synapse.api.constants import EventTypes -from synapse.storage.prepare_database import prepare_database -from synapse.storage.engines import create_engine -from synapse.server import HomeServer -from synapse.federation.transport import server -from synapse.util.ratelimitutils import FederationRateLimiter - -from synapse.util.logcontext import LoggingContext - -from twisted.internet import defer, reactor -from twisted.enterprise.adbapi import ConnectionPool - -from collections import namedtuple -from mock import patch, Mock import hashlib +from inspect import getcallargs import urllib import urlparse -from inspect import getcallargs +from mock import Mock, patch +from twisted.enterprise.adbapi import ConnectionPool +from twisted.internet import defer, reactor + +from synapse.api.errors import CodeMessageException, cs_error +from synapse.federation.transport import server +from synapse.http.server import HttpServer +from synapse.server import HomeServer +from synapse.storage.engines import create_engine +from synapse.storage.prepare_database import prepare_database +from synapse.util.logcontext import LoggingContext +from synapse.util.ratelimitutils import FederationRateLimiter @defer.inlineCallbacks @@ -334,140 +330,6 @@ class SQLiteMemoryDbPool(ConnectionPool, object): return create_engine(self.config.database_config) -class MemoryDataStore(object): - - Room = namedtuple( - "Room", - ["room_id", "is_public", "creator"] - ) - - def __init__(self): - self.tokens_to_users = {} - self.paths_to_content = {} - - self.members = {} - self.rooms = {} - - self.current_state = {} - self.events = [] - - class Snapshot(namedtuple("Snapshot", "room_id user_id membership_state")): - def fill_out_prev_events(self, event): - pass - - def snapshot_room(self, room_id, user_id, state_type=None, state_key=None): - return self.Snapshot( - room_id, user_id, self.get_room_member(user_id, room_id) - ) - - def register(self, user_id, token, password_hash): - if user_id in self.tokens_to_users.values(): - raise StoreError(400, "User in use.") - self.tokens_to_users[token] = user_id - - def get_user_by_access_token(self, token): - try: - return { - "name": self.tokens_to_users[token], - } - except Exception: - raise StoreError(400, "User does not exist.") - - def get_room(self, room_id): - try: - return self.rooms[room_id] - except Exception: - return None - - def store_room(self, room_id, room_creator_user_id, is_public): - if room_id in self.rooms: - raise StoreError(409, "Conflicting room!") - - room = MemoryDataStore.Room( - room_id=room_id, - is_public=is_public, - creator=room_creator_user_id - ) - self.rooms[room_id] = room - - def get_room_member(self, user_id, room_id): - return self.members.get(room_id, {}).get(user_id) - - def get_room_members(self, room_id, membership=None): - if membership: - return [ - v for k, v in self.members.get(room_id, {}).items() - if v.membership == membership - ] - else: - return self.members.get(room_id, {}).values() - - def get_rooms_for_user_where_membership_is(self, user_id, membership_list): - return [ - m[user_id] for m in self.members.values() - if user_id in m and m[user_id].membership in membership_list - ] - - def get_room_events_stream(self, user_id=None, from_key=None, to_key=None, - limit=0, with_feedback=False): - return ([], from_key) # TODO - - def get_joined_hosts_for_room(self, room_id): - return defer.succeed([]) - - def persist_event(self, event): - if event.type == EventTypes.Member: - room_id = event.room_id - user = event.state_key - self.members.setdefault(room_id, {})[user] = event - - if hasattr(event, "state_key"): - key = (event.room_id, event.type, event.state_key) - self.current_state[key] = event - - self.events.append(event) - - def get_current_state(self, room_id, event_type=None, state_key=""): - if event_type: - key = (room_id, event_type, state_key) - if self.current_state.get(key): - return [self.current_state.get(key)] - return None - else: - return [ - e for e in self.current_state - if e[0] == room_id - ] - - def set_presence_state(self, user_localpart, state): - return defer.succeed({"state": 0}) - - def get_presence_list(self, user_localpart, accepted): - return [] - - def get_room_events_max_id(self): - return "s0" # TODO (erikj) - - def get_send_event_level(self, room_id): - return defer.succeed(0) - - def get_power_level(self, room_id, user_id): - return defer.succeed(0) - - def get_add_state_level(self, room_id): - return defer.succeed(0) - - def get_room_join_rule(self, room_id): - # TODO (erikj): This should be configurable - return defer.succeed("invite") - - def get_ops_levels(self, room_id): - return defer.succeed((5, 5, 5)) - - def insert_client_ip(self, user, access_token, ip, user_agent): - return defer.succeed(None) - - def _format_call(args, kwargs): return ", ".join( ["%r" % (a) for a in args] + From 25adde9a04b82a2261404f9b6f9a25cec086f62f Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 25 Jan 2018 22:01:02 +0000 Subject: [PATCH 130/348] Factor out get_db_conn to HomeServer base class This function is identical to all subclasses, so we may as well push it up to the base class to reduce duplication (and make use of it in the tests) --- synapse/app/appservice.py | 13 ------------- synapse/app/client_reader.py | 13 ------------- synapse/app/federation_reader.py | 13 ------------- synapse/app/federation_sender.py | 13 ------------- synapse/app/frontend_proxy.py | 13 ------------- synapse/app/homeserver.py | 13 ------------- synapse/app/media_repository.py | 13 ------------- synapse/app/pusher.py | 13 ------------- synapse/app/synchrotron.py | 13 ------------- synapse/app/user_dir.py | 13 ------------- synapse/server.py | 17 +++++++++++++++++ 11 files changed, 17 insertions(+), 130 deletions(-) diff --git a/synapse/app/appservice.py b/synapse/app/appservice.py index 7d0c2879ae..c6fe4516d1 100644 --- a/synapse/app/appservice.py +++ b/synapse/app/appservice.py @@ -49,19 +49,6 @@ class AppserviceSlaveStore( class AppserviceServer(HomeServer): - def get_db_conn(self, run_new_connection=True): - # Any param beginning with cp_ is a parameter for adbapi, and should - # not be passed to the database engine. - db_params = { - k: v for k, v in self.db_config.get("args", {}).items() - if not k.startswith("cp_") - } - db_conn = self.database_engine.module.connect(**db_params) - - if run_new_connection: - self.database_engine.on_new_connection(db_conn) - return db_conn - def setup(self): logger.info("Setting up.") self.datastore = AppserviceSlaveStore(self.get_db_conn(), self) diff --git a/synapse/app/client_reader.py b/synapse/app/client_reader.py index dc3f6efd43..3b3352798d 100644 --- a/synapse/app/client_reader.py +++ b/synapse/app/client_reader.py @@ -64,19 +64,6 @@ class ClientReaderSlavedStore( class ClientReaderServer(HomeServer): - def get_db_conn(self, run_new_connection=True): - # Any param beginning with cp_ is a parameter for adbapi, and should - # not be passed to the database engine. - db_params = { - k: v for k, v in self.db_config.get("args", {}).items() - if not k.startswith("cp_") - } - db_conn = self.database_engine.module.connect(**db_params) - - if run_new_connection: - self.database_engine.on_new_connection(db_conn) - return db_conn - def setup(self): logger.info("Setting up.") self.datastore = ClientReaderSlavedStore(self.get_db_conn(), self) diff --git a/synapse/app/federation_reader.py b/synapse/app/federation_reader.py index a072291e1f..4de43c41f0 100644 --- a/synapse/app/federation_reader.py +++ b/synapse/app/federation_reader.py @@ -58,19 +58,6 @@ class FederationReaderSlavedStore( class FederationReaderServer(HomeServer): - def get_db_conn(self, run_new_connection=True): - # Any param beginning with cp_ is a parameter for adbapi, and should - # not be passed to the database engine. - db_params = { - k: v for k, v in self.db_config.get("args", {}).items() - if not k.startswith("cp_") - } - db_conn = self.database_engine.module.connect(**db_params) - - if run_new_connection: - self.database_engine.on_new_connection(db_conn) - return db_conn - def setup(self): logger.info("Setting up.") self.datastore = FederationReaderSlavedStore(self.get_db_conn(), self) diff --git a/synapse/app/federation_sender.py b/synapse/app/federation_sender.py index 09e9488f06..f760826d27 100644 --- a/synapse/app/federation_sender.py +++ b/synapse/app/federation_sender.py @@ -76,19 +76,6 @@ class FederationSenderSlaveStore( class FederationSenderServer(HomeServer): - def get_db_conn(self, run_new_connection=True): - # Any param beginning with cp_ is a parameter for adbapi, and should - # not be passed to the database engine. - db_params = { - k: v for k, v in self.db_config.get("args", {}).items() - if not k.startswith("cp_") - } - db_conn = self.database_engine.module.connect(**db_params) - - if run_new_connection: - self.database_engine.on_new_connection(db_conn) - return db_conn - def setup(self): logger.info("Setting up.") self.datastore = FederationSenderSlaveStore(self.get_db_conn(), self) diff --git a/synapse/app/frontend_proxy.py b/synapse/app/frontend_proxy.py index ae531c0aa4..e32ee8fe93 100644 --- a/synapse/app/frontend_proxy.py +++ b/synapse/app/frontend_proxy.py @@ -118,19 +118,6 @@ class FrontendProxySlavedStore( class FrontendProxyServer(HomeServer): - def get_db_conn(self, run_new_connection=True): - # Any param beginning with cp_ is a parameter for adbapi, and should - # not be passed to the database engine. - db_params = { - k: v for k, v in self.db_config.get("args", {}).items() - if not k.startswith("cp_") - } - db_conn = self.database_engine.module.connect(**db_params) - - if run_new_connection: - self.database_engine.on_new_connection(db_conn) - return db_conn - def setup(self): logger.info("Setting up.") self.datastore = FrontendProxySlavedStore(self.get_db_conn(), self) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 92ab3b311b..cb82a415a6 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -266,19 +266,6 @@ class SynapseHomeServer(HomeServer): except IncorrectDatabaseSetup as e: quit_with_error(e.message) - def get_db_conn(self, run_new_connection=True): - # Any param beginning with cp_ is a parameter for adbapi, and should - # not be passed to the database engine. - db_params = { - k: v for k, v in self.db_config.get("args", {}).items() - if not k.startswith("cp_") - } - db_conn = self.database_engine.module.connect(**db_params) - - if run_new_connection: - self.database_engine.on_new_connection(db_conn) - return db_conn - def setup(config_options): """ diff --git a/synapse/app/media_repository.py b/synapse/app/media_repository.py index eab1597aaa..1ed1ca8772 100644 --- a/synapse/app/media_repository.py +++ b/synapse/app/media_repository.py @@ -60,19 +60,6 @@ class MediaRepositorySlavedStore( class MediaRepositoryServer(HomeServer): - def get_db_conn(self, run_new_connection=True): - # Any param beginning with cp_ is a parameter for adbapi, and should - # not be passed to the database engine. - db_params = { - k: v for k, v in self.db_config.get("args", {}).items() - if not k.startswith("cp_") - } - db_conn = self.database_engine.module.connect(**db_params) - - if run_new_connection: - self.database_engine.on_new_connection(db_conn) - return db_conn - def setup(self): logger.info("Setting up.") self.datastore = MediaRepositorySlavedStore(self.get_db_conn(), self) diff --git a/synapse/app/pusher.py b/synapse/app/pusher.py index 7fbbb0b0e1..32ccea3f13 100644 --- a/synapse/app/pusher.py +++ b/synapse/app/pusher.py @@ -81,19 +81,6 @@ class PusherSlaveStore( class PusherServer(HomeServer): - def get_db_conn(self, run_new_connection=True): - # Any param beginning with cp_ is a parameter for adbapi, and should - # not be passed to the database engine. - db_params = { - k: v for k, v in self.db_config.get("args", {}).items() - if not k.startswith("cp_") - } - db_conn = self.database_engine.module.connect(**db_params) - - if run_new_connection: - self.database_engine.on_new_connection(db_conn) - return db_conn - def setup(self): logger.info("Setting up.") self.datastore = PusherSlaveStore(self.get_db_conn(), self) diff --git a/synapse/app/synchrotron.py b/synapse/app/synchrotron.py index 0abba3016e..f87531f1b6 100644 --- a/synapse/app/synchrotron.py +++ b/synapse/app/synchrotron.py @@ -246,19 +246,6 @@ class SynchrotronApplicationService(object): class SynchrotronServer(HomeServer): - def get_db_conn(self, run_new_connection=True): - # Any param beginning with cp_ is a parameter for adbapi, and should - # not be passed to the database engine. - db_params = { - k: v for k, v in self.db_config.get("args", {}).items() - if not k.startswith("cp_") - } - db_conn = self.database_engine.module.connect(**db_params) - - if run_new_connection: - self.database_engine.on_new_connection(db_conn) - return db_conn - def setup(self): logger.info("Setting up.") self.datastore = SynchrotronSlavedStore(self.get_db_conn(), self) diff --git a/synapse/app/user_dir.py b/synapse/app/user_dir.py index a48c4a2ae6..494ccb702c 100644 --- a/synapse/app/user_dir.py +++ b/synapse/app/user_dir.py @@ -92,19 +92,6 @@ class UserDirectorySlaveStore( class UserDirectoryServer(HomeServer): - def get_db_conn(self, run_new_connection=True): - # Any param beginning with cp_ is a parameter for adbapi, and should - # not be passed to the database engine. - db_params = { - k: v for k, v in self.db_config.get("args", {}).items() - if not k.startswith("cp_") - } - db_conn = self.database_engine.module.connect(**db_params) - - if run_new_connection: - self.database_engine.on_new_connection(db_conn) - return db_conn - def setup(self): logger.info("Setting up.") self.datastore = UserDirectorySlaveStore(self.get_db_conn(), self) diff --git a/synapse/server.py b/synapse/server.py index 99693071b6..ff8a8fbc46 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -307,6 +307,23 @@ class HomeServer(object): **self.db_config.get("args", {}) ) + def get_db_conn(self, run_new_connection=True): + """Makes a new connection to the database, skipping the db pool + + Returns: + Connection: a connection object implementing the PEP-249 spec + """ + # Any param beginning with cp_ is a parameter for adbapi, and should + # not be passed to the database engine. + db_params = { + k: v for k, v in self.db_config.get("args", {}).items() + if not k.startswith("cp_") + } + db_conn = self.database_engine.module.connect(**db_params) + if run_new_connection: + self.database_engine.on_new_connection(db_conn) + return db_conn + def build_media_repository_resource(self): # build the media repo resource. This indirects through the HomeServer # to ensure that we only have a single instance of From b178eca2616d8210a212fbd04be253158fc7fc47 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 25 Jan 2018 21:12:46 +0000 Subject: [PATCH 131/348] Run on_new_connection for unit tests Configure the connectionpool used for unit tests to run the `on_new_connection` function. --- tests/utils.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/tests/utils.py b/tests/utils.py index de33deb0b2..ab5e2341c9 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -66,13 +66,19 @@ def setup_test_homeserver(name="test", datastore=None, config=None, **kargs): if "clock" not in kargs: kargs["clock"] = MockClock() + db_engine = create_engine(config.database_config) if datastore is None: - db_pool = SQLiteMemoryDbPool() + # we need to configure the connection pool to run the on_new_connection + # function, so that we can test code that uses custom sqlite functions + # (like rank). + db_pool = SQLiteMemoryDbPool( + cp_openfun=db_engine.on_new_connection, + ) yield db_pool.prepare() hs = HomeServer( name, db_pool=db_pool, config=config, version_string="Synapse/tests", - database_engine=create_engine(config.database_config), + database_engine=db_engine, get_db_conn=db_pool.get_db_conn, room_list_handler=object(), tls_server_context_factory=Mock(), @@ -83,7 +89,7 @@ def setup_test_homeserver(name="test", datastore=None, config=None, **kargs): hs = HomeServer( name, db_pool=None, datastore=datastore, config=config, version_string="Synapse/tests", - database_engine=create_engine(config.database_config), + database_engine=db_engine, room_list_handler=object(), tls_server_context_factory=Mock(), **kargs @@ -303,11 +309,15 @@ class MockClock(object): class SQLiteMemoryDbPool(ConnectionPool, object): - def __init__(self): + def __init__(self, **kwargs): + connkw = { + "cp_min": 1, + "cp_max": 1, + } + connkw.update(kwargs) + super(SQLiteMemoryDbPool, self).__init__( - "sqlite3", ":memory:", - cp_min=1, - cp_max=1, + "sqlite3", ":memory:", **connkw ) self.config = Mock() From d7eacc4f87ca8353f612171fa7e070c14e3b35ee Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 25 Jan 2018 23:14:24 +0000 Subject: [PATCH 132/348] Create dbpool as normal in tests ... instead of creating our own special SQLiteMemoryDbPool, whose purpose was a bit of a mystery. For some reason this makes one of the tests run slightly slower, so bump the sleep(). Sorry. --- tests/crypto/test_keyring.py | 4 +-- tests/utils.py | 60 ++++++++++-------------------------- 2 files changed, 19 insertions(+), 45 deletions(-) diff --git a/tests/crypto/test_keyring.py b/tests/crypto/test_keyring.py index c899fecf5d..d4ec02ffc2 100644 --- a/tests/crypto/test_keyring.py +++ b/tests/crypto/test_keyring.py @@ -167,7 +167,7 @@ class KeyringTestCase(unittest.TestCase): # wait a tick for it to send the request to the perspectives server # (it first tries the datastore) - yield async.sleep(0.005) + yield async.sleep(1) # XXX find out why this takes so long! self.http_client.post_json.assert_called_once() self.assertIs(LoggingContext.current_context(), context_11) @@ -183,7 +183,7 @@ class KeyringTestCase(unittest.TestCase): res_deferreds_2 = kr.verify_json_objects_for_server( [("server10", json1)], ) - yield async.sleep(0.005) + yield async.sleep(01) self.http_client.post_json.assert_not_called() res_deferreds_2[0].addBoth(self.check_context, None) diff --git a/tests/utils.py b/tests/utils.py index ab5e2341c9..50de4199be 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -19,7 +19,6 @@ import urllib import urlparse from mock import Mock, patch -from twisted.enterprise.adbapi import ConnectionPool from twisted.internet import defer, reactor from synapse.api.errors import CodeMessageException, cs_error @@ -60,30 +59,37 @@ def setup_test_homeserver(name="test", datastore=None, config=None, **kargs): config.update_user_directory = False config.use_frozen_dicts = True - config.database_config = {"name": "sqlite3"} config.ldap_enabled = False if "clock" not in kargs: kargs["clock"] = MockClock() + config.database_config = { + "name": "sqlite3", + "args": { + "database": ":memory:", + "cp_min": 1, + "cp_max": 1, + }, + } db_engine = create_engine(config.database_config) + + # we need to configure the connection pool to run the on_new_connection + # function, so that we can test code that uses custom sqlite functions + # (like rank). + config.database_config["args"]["cp_openfun"] = db_engine.on_new_connection + if datastore is None: - # we need to configure the connection pool to run the on_new_connection - # function, so that we can test code that uses custom sqlite functions - # (like rank). - db_pool = SQLiteMemoryDbPool( - cp_openfun=db_engine.on_new_connection, - ) - yield db_pool.prepare() hs = HomeServer( - name, db_pool=db_pool, config=config, + name, config=config, + db_config=config.database_config, version_string="Synapse/tests", database_engine=db_engine, - get_db_conn=db_pool.get_db_conn, room_list_handler=object(), tls_server_context_factory=Mock(), **kargs ) + yield prepare_database(hs.get_db_conn(), db_engine, config) hs.setup() else: hs = HomeServer( @@ -308,38 +314,6 @@ class MockClock(object): return d -class SQLiteMemoryDbPool(ConnectionPool, object): - def __init__(self, **kwargs): - connkw = { - "cp_min": 1, - "cp_max": 1, - } - connkw.update(kwargs) - - super(SQLiteMemoryDbPool, self).__init__( - "sqlite3", ":memory:", **connkw - ) - - self.config = Mock() - self.config.password_providers = [] - self.config.database_config = {"name": "sqlite3"} - - def prepare(self): - engine = self.create_engine() - return self.runWithConnection( - lambda conn: prepare_database(conn, engine, self.config) - ) - - def get_db_conn(self): - conn = self.connect() - engine = self.create_engine() - prepare_database(conn, engine, self.config) - return conn - - def create_engine(self): - return create_engine(self.config.database_config) - - def _format_call(args, kwargs): return ", ".join( ["%r" % (a) for a in args] + From 88b9c5cbf0e571ba955bbd483a0169a0b79c78c7 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 25 Jan 2018 23:54:38 +0000 Subject: [PATCH 133/348] Make it possible to run tests against postgres --- tests/utils.py | 43 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/tests/utils.py b/tests/utils.py index 50de4199be..d1f59551e8 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -25,11 +25,17 @@ from synapse.api.errors import CodeMessageException, cs_error from synapse.federation.transport import server from synapse.http.server import HttpServer from synapse.server import HomeServer +from synapse.storage import PostgresEngine from synapse.storage.engines import create_engine from synapse.storage.prepare_database import prepare_database from synapse.util.logcontext import LoggingContext from synapse.util.ratelimitutils import FederationRateLimiter +# set this to True to run the tests against postgres instead of sqlite. +# It requires you to have a local postgres database called synapse_test, within +# which ALL TABLES WILL BE DROPPED +USE_POSTGRES_FOR_TESTS = False + @defer.inlineCallbacks def setup_test_homeserver(name="test", datastore=None, config=None, **kargs): @@ -64,14 +70,25 @@ def setup_test_homeserver(name="test", datastore=None, config=None, **kargs): if "clock" not in kargs: kargs["clock"] = MockClock() - config.database_config = { - "name": "sqlite3", - "args": { - "database": ":memory:", - "cp_min": 1, - "cp_max": 1, - }, - } + if USE_POSTGRES_FOR_TESTS: + config.database_config = { + "name": "psycopg2", + "args": { + "database": "synapse_test", + "cp_min": 1, + "cp_max": 5, + }, + } + else: + config.database_config = { + "name": "sqlite3", + "args": { + "database": ":memory:", + "cp_min": 1, + "cp_max": 1, + }, + } + db_engine = create_engine(config.database_config) # we need to configure the connection pool to run the on_new_connection @@ -89,7 +106,15 @@ def setup_test_homeserver(name="test", datastore=None, config=None, **kargs): tls_server_context_factory=Mock(), **kargs ) - yield prepare_database(hs.get_db_conn(), db_engine, config) + db_conn = hs.get_db_conn() + # make sure that the database is empty + if isinstance(db_engine, PostgresEngine): + cur = db_conn.cursor() + cur.execute("SELECT tablename FROM pg_tables where schemaname='public'") + rows = cur.fetchall() + for r in rows: + cur.execute("DROP TABLE %s CASCADE" % r[0]) + yield prepare_database(db_conn, db_engine, config) hs.setup() else: hs = HomeServer( From 46022025ea35895af3cf8d15973fb94a3a6b4f38 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 25 Jan 2018 21:20:28 +0000 Subject: [PATCH 134/348] Fix SQL for user search fix some syntax errors for user search when search_all_users is enabled fixes #2801, hopefully --- synapse/storage/user_directory.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index f150ef0103..dfdcbb3181 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -641,13 +641,12 @@ class UserDirectoryStore(SQLBaseStore): """ if self.hs.config.user_directory_search_all_users: - # dummy to keep the number of binds & aliases the same + # make s.user_id null to keep the ordering algorithm happy join_clause = """ - LEFT JOIN ( - SELECT NULL as user_id WHERE NULL = ? - ) AS s USING (user_id)" + CROSS JOIN (SELECT NULL as user_id) AS s """ - where_clause = "" + join_args = () + where_clause = "1=1" else: join_clause = """ LEFT JOIN users_in_public_rooms AS p USING (user_id) @@ -656,6 +655,7 @@ class UserDirectoryStore(SQLBaseStore): WHERE user_id = ? AND share_private ) AS s USING (user_id) """ + join_args = (user_id,) where_clause = "(s.user_id IS NOT NULL OR p.user_id IS NOT NULL)" if isinstance(self.database_engine, PostgresEngine): @@ -697,7 +697,7 @@ class UserDirectoryStore(SQLBaseStore): avatar_url IS NULL LIMIT ? """ % (join_clause, where_clause) - args = (user_id, full_query, exact_query, prefix_query, limit + 1,) + args = join_args + (full_query, exact_query, prefix_query, limit + 1,) elif isinstance(self.database_engine, Sqlite3Engine): search_query = _parse_query_sqlite(search_term) @@ -715,7 +715,7 @@ class UserDirectoryStore(SQLBaseStore): avatar_url IS NULL LIMIT ? """ % (join_clause, where_clause) - args = (user_id, search_query, limit + 1) + args = join_args + (search_query, limit + 1) else: # This should be unreachable. raise Exception("Unrecognized database engine") From d1f3490e75df49bf417eeeaef115df16729244ce Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 25 Jan 2018 21:25:03 +0000 Subject: [PATCH 135/348] Add tests for user directory search --- tests/storage/test_user_directory.py | 88 ++++++++++++++++++++++++++++ tests/utils.py | 1 + 2 files changed, 89 insertions(+) create mode 100644 tests/storage/test_user_directory.py diff --git a/tests/storage/test_user_directory.py b/tests/storage/test_user_directory.py new file mode 100644 index 0000000000..0891308f25 --- /dev/null +++ b/tests/storage/test_user_directory.py @@ -0,0 +1,88 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet import defer + +from synapse.storage import UserDirectoryStore +from synapse.storage.roommember import ProfileInfo +from tests import unittest +from tests.utils import setup_test_homeserver + +ALICE = "@alice:a" +BOB = "@bob:b" +BOBBY = "@bobby:a" + + +class UserDirectoryStoreTestCase(unittest.TestCase): + @defer.inlineCallbacks + def setUp(self): + self.hs = yield setup_test_homeserver() + self.store = UserDirectoryStore(None, self.hs) + + # alice and bob are both in !room_id. bobby is not but shares + # a homeserver with alice. + yield self.store.add_profiles_to_user_dir( + "!room:id", + { + ALICE: ProfileInfo(None, "alice"), + BOB: ProfileInfo(None, "bob"), + BOBBY: ProfileInfo(None, "bobby") + }, + ) + yield self.store.add_users_to_public_room( + "!room:id", + [ALICE, BOB], + ) + yield self.store.add_users_who_share_room( + "!room:id", + False, + ( + (ALICE, BOB), + (BOB, ALICE), + ), + ) + + @defer.inlineCallbacks + def test_search_user_dir(self): + # normally when alice searches the directory she should just find + # bob because bobby doesn't share a room with her. + r = yield self.store.search_user_dir(ALICE, "bob", 10) + self.assertFalse(r["limited"]) + self.assertEqual(1, len(r["results"])) + self.assertDictEqual(r["results"][0], { + "user_id": BOB, + "display_name": "bob", + "avatar_url": None, + }) + + @defer.inlineCallbacks + def test_search_user_dir_all_users(self): + self.hs.config.user_directory_search_all_users = True + try: + r = yield self.store.search_user_dir(ALICE, "bob", 10) + self.assertFalse(r["limited"]) + self.assertEqual(2, len(r["results"])) + self.assertDictEqual(r["results"][0], { + "user_id": BOB, + "display_name": "bob", + "avatar_url": None, + }) + self.assertDictEqual(r["results"][1], { + "user_id": BOBBY, + "display_name": "bobby", + "avatar_url": None, + }) + finally: + self.hs.config.user_directory_search_all_users = False diff --git a/tests/utils.py b/tests/utils.py index d1f59551e8..8efd3a3475 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -59,6 +59,7 @@ def setup_test_homeserver(name="test", datastore=None, config=None, **kargs): config.email_enable_notifs = False config.block_non_admin_invites = False config.federation_domain_whitelist = None + config.user_directory_search_all_users = False # disable user directory updates, because they get done in the # background, which upsets the test runner. From 02ba118f81009b1c3ae290a17c35e1b9d75e802b Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 29 Jan 2018 14:30:15 +0000 Subject: [PATCH 136/348] Increase http conn pool size --- synapse/http/client.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/synapse/http/client.py b/synapse/http/client.py index 930d713013..f3e4973c2e 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -18,6 +18,7 @@ from OpenSSL.SSL import VERIFY_NONE from synapse.api.errors import ( CodeMessageException, MatrixCodeMessageException, SynapseError, Codes, ) +from synapse.util.caches import CACHE_SIZE_FACTOR from synapse.util.logcontext import make_deferred_yieldable from synapse.util import logcontext import synapse.metrics @@ -67,7 +68,11 @@ class SimpleHttpClient(object): self.hs = hs pool = HTTPConnectionPool(reactor) - pool.maxPersistentPerHost = 5 + + # the pusher makes lots of concurrent SSL connections to sygnal, and + # tends to do so in batches, so we need to allow the pool to keep lots + # of idle connections around. + pool.maxPersistentPerHost = max((100 * CACHE_SIZE_FACTOR, 5)) pool.cachedConnectionTimeout = 2 * 60 # The default context factory in Twisted 14.0.0 (which we require) is From e051abd20b1978ddc53723c8233bc54742243045 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 24 Jan 2018 21:06:54 +0000 Subject: [PATCH 137/348] add appid/device_display_name to to pusher logging --- synapse/push/httppusher.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py index 4a03af5b21..02bd013ca3 100644 --- a/synapse/push/httppusher.py +++ b/synapse/push/httppusher.py @@ -164,7 +164,7 @@ class HttpPusher(object): logger.info( "Processing %i unprocessed push actions for %s starting at " "stream_ordering %s", - len(unprocessed), self.user_id, self.last_stream_ordering, + len(unprocessed), self.name, self.last_stream_ordering, ) for push_action in unprocessed: @@ -342,7 +342,7 @@ class HttpPusher(object): @defer.inlineCallbacks def _send_badge(self, badge): - logger.info("Sending updated badge count %d to %r", badge, self.user_id) + logger.info("Sending updated badge count %d to %s", badge, self.name) d = { 'notification': { 'id': '', From 03dd745fe28a00c8788a2147d4f5c2a852182429 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 29 Jan 2018 15:49:06 +0000 Subject: [PATCH 138/348] Better logging when pushes fail --- synapse/push/httppusher.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py index 02bd013ca3..2cbac571b8 100644 --- a/synapse/push/httppusher.py +++ b/synapse/push/httppusher.py @@ -333,7 +333,10 @@ class HttpPusher(object): try: resp = yield self.http_client.post_json_get_json(self.url, notification_dict) except Exception: - logger.warn("Failed to push %s ", self.url) + logger.warn( + "Failed to push event %s to %s", + event.event_id, self.name, exc_info=True, + ) defer.returnValue(False) rejected = [] if 'rejected' in resp: @@ -364,7 +367,10 @@ class HttpPusher(object): try: resp = yield self.http_client.post_json_get_json(self.url, d) except Exception: - logger.exception("Failed to push %s ", self.url) + logger.warn( + "Failed to send badge count to %s", + self.name, exc_info=True, + ) defer.returnValue(False) rejected = [] if 'rejected' in resp: From b387ee17b68e4398a8fa26fdf122b773a046e429 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 27 Jan 2018 14:00:11 +0000 Subject: [PATCH 139/348] Improve exception handling in persist_event 1. use `deferred.errback()` instead of `deferred.errback(e)`, which means that a Failure object will be constructed using the current exception state, *including* its stack trace - so the stack trace is saved in the Failure, leading to better exception reports. 2. Set `consumeErrors=True` on the ObservableDeferred, because we know that there will always be at least one observer - which avoids a spurious "CRITICAL: unhandled exception in Deferred" error in the logs --- synapse/storage/events.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 7a9cd3ec90..33fccfa7a8 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -110,7 +110,7 @@ class _EventPeristenceQueue(object): end_item.events_and_contexts.extend(events_and_contexts) return end_item.deferred.observe() - deferred = ObservableDeferred(defer.Deferred()) + deferred = ObservableDeferred(defer.Deferred(), consumeErrors=True) queue.append(self._EventPersistQueueItem( events_and_contexts=events_and_contexts, @@ -152,8 +152,8 @@ class _EventPeristenceQueue(object): try: ret = yield per_item_callback(item) item.deferred.callback(ret) - except Exception as e: - item.deferred.errback(e) + except Exception: + item.deferred.errback() finally: queue = self._event_persist_queues.pop(room_id, None) if queue: From d413a2ba981b3e8d443d2ccdd0c8de7d43b62e73 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 19 Jan 2018 17:10:26 +0000 Subject: [PATCH 140/348] Remove unused "event_type" param on state.get_current_state_ids this param doesn't seem to be used, and is a bit pointless anyway because it can easily be replicated by the caller. It is also horrible, because it changes the return type of the method. --- synapse/state.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/synapse/state.py b/synapse/state.py index 1f9abf9d3d..18f1edef42 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -146,8 +146,7 @@ class StateHandler(object): defer.returnValue(state) @defer.inlineCallbacks - def get_current_state_ids(self, room_id, event_type=None, state_key="", - latest_event_ids=None): + def get_current_state_ids(self, room_id, latest_event_ids=None): if not latest_event_ids: latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) @@ -155,10 +154,6 @@ class StateHandler(object): ret = yield self.resolve_state_groups(room_id, latest_event_ids) state = ret.state - if event_type: - defer.returnValue(state.get((event_type, state_key))) - return - defer.returnValue(state) @defer.inlineCallbacks From adfc0c95391bf43d48f7069823fc0f4902803402 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 29 Jan 2018 17:39:55 +0000 Subject: [PATCH 141/348] docstring for get_current_state_ids --- synapse/state.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/synapse/state.py b/synapse/state.py index 18f1edef42..4c8247e7c2 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -147,6 +147,19 @@ class StateHandler(object): @defer.inlineCallbacks def get_current_state_ids(self, room_id, latest_event_ids=None): + """Get the current state, or the state at a set of events, for a room + + Args: + room_id (str): + + latest_event_ids (iterable[str]|None): if given, the forward + extremities to resolve. If None, we look them up from the + database (via a cache) + + Returns: + Deferred[dict[(str, str), str)]]: the state dict, mapping from + (event_type, state_key) -> event_id + """ if not latest_event_ids: latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) From 773f0eed1efa114bb32f6e54e8edc038a04d3526 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 30 Jan 2018 15:02:51 +0000 Subject: [PATCH 142/348] Fix sql error in quarantine_media --- synapse/storage/room.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 23688430b7..d91c853070 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -589,7 +589,7 @@ class RoomStore(SQLBaseStore): """ UPDATE remote_media_cache SET quarantined_by = ? - WHERE media_origin AND media_id = ? + WHERE media_origin = ? AND media_id = ? """, ( (quarantined_by, origin, media_id) From af19f5e9aa47dd9875df3937d514ae7708eed539 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 30 Jan 2018 17:52:03 +0000 Subject: [PATCH 143/348] Remove spurious log argument ... which would cause scary-looking and unhelpful errors in the log on dns fail --- synapse/http/endpoint.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/synapse/http/endpoint.py b/synapse/http/endpoint.py index e2b99ef3bd..87639b9151 100644 --- a/synapse/http/endpoint.py +++ b/synapse/http/endpoint.py @@ -357,8 +357,7 @@ def _get_hosts_for_srv_record(dns_client, host): def eb(res, record_type): if res.check(DNSNameError): return [] - logger.warn("Error looking up %s for %s: %s", - record_type, host, res, res.value) + logger.warn("Error looking up %s for %s: %s", record_type, host, res) return res # no logcontexts here, so we can safely fire these off and gatherResults From 63c4383927cfb759046ccf576e0c7e35a70f6168 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Wed, 31 Jan 2018 08:07:41 -0700 Subject: [PATCH 144/348] Documentation and naming Signed-off-by: Travis Ralston --- synapse/storage/room.py | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index d1d63f4041..5dfb0e19f7 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -534,8 +534,17 @@ class RoomStore(SQLBaseStore): self.is_room_blocked.invalidate((room_id,)) def get_media_mxcs_in_room(self, room_id): - def _get_media_ids_in_room(txn): - local_media_ids, remote_media_ids = self._get_media_ids_in_room(txn, room_id) + """Retrieves all the local and remote media MXC URIs in a given room + + Args: + room_id (str) + + Returns: + The local and remote media as a lists of tuples where the key is + the hostname and the value is the media ID. + """ + def _get_media_mxcs_in_room_txn(txn): + local_media_ids, remote_media_ids = self._get_media_mxcs_in_room_txn(txn, room_id) local_media_mxcs = [] remote_media_mxcs = [] @@ -546,14 +555,14 @@ class RoomStore(SQLBaseStore): remote_media_mxcs.append("mxc://%s/%s" % (hostname, media_id)) return local_media_mxcs, remote_media_mxcs - return self.runInteraction("get_media_ids_in_room", _get_media_ids_in_room) + return self.runInteraction("get_media_ids_in_room", _get_media_mxcs_in_room_txn) def quarantine_media_ids_in_room(self, room_id, quarantined_by): """For a room loops through all events with media and quarantines the associated media """ - def _quarantine_media_in_room(txn): - local_media_ids, remote_media_ids = self._get_media_ids_in_room(txn, room_id) + def _quarantine_media_in_room_txn(txn): + local_media_ids, remote_media_ids = self._get_media_mxcs_in_room_txn(txn, room_id) total_media_quarantined = 0 # Now update all the tables to set the quarantined_by flag @@ -581,9 +590,19 @@ class RoomStore(SQLBaseStore): return total_media_quarantined - return self.runInteraction("quarantine_media_in_room", _quarantine_media_in_room) + return self.runInteraction("quarantine_media_in_room", _quarantine_media_in_room_txn) - def _get_media_ids_in_room(self, txn, room_id): + def _get_media_mxcs_in_room_txn(self, txn, room_id): + """Retrieves all the local and remote media MXC URIs in a given room + + Args: + txn (cursor) + room_id (str) + + Returns: + The local and remote media as a lists of tuples where the key is + the hostname and the value is the media ID. + """ mxc_re = re.compile("^mxc://([^/]+)/([^/#?]+)") next_token = self.get_current_events_token() + 1 From 3af53c183a0ab5d30ce0fb40e9b8eee8da7ad75a Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Wed, 31 Jan 2018 08:15:59 -0700 Subject: [PATCH 145/348] Add admin api documentation for list media endpoint Signed-off-by: Travis Ralston --- docs/admin_api/media_admin_api.md | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 docs/admin_api/media_admin_api.md diff --git a/docs/admin_api/media_admin_api.md b/docs/admin_api/media_admin_api.md new file mode 100644 index 0000000000..abdbc1ea86 --- /dev/null +++ b/docs/admin_api/media_admin_api.md @@ -0,0 +1,23 @@ +# List all media in a room + +This API gets a list of known media in a room. + +The API is: +``` +GET /_matrix/client/r0/admin/room//media +``` +including an `access_token` of a server admin. + +It returns a JSON body like the following: +``` +{ + "local": [ + "mxc://localhost/xwvutsrqponmlkjihgfedcba", + "mxc://localhost/abcdefghijklmnopqrstuvwx" + ], + "remote": [ + "mxc://matrix.org/xwvutsrqponmlkjihgfedcba", + "mxc://matrix.org/abcdefghijklmnopqrstuvwx" + ] +} +``` From 326189c25a14a2506831e74f6f111754ad656916 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 31 Jan 2018 18:43:54 +0000 Subject: [PATCH 146/348] Script to move remote media to another media store --- scripts/move_remote_media_to_new_store.py | 133 ++++++++++++++++++++++ 1 file changed, 133 insertions(+) create mode 100755 scripts/move_remote_media_to_new_store.py diff --git a/scripts/move_remote_media_to_new_store.py b/scripts/move_remote_media_to_new_store.py new file mode 100755 index 0000000000..7914ead889 --- /dev/null +++ b/scripts/move_remote_media_to_new_store.py @@ -0,0 +1,133 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright 2017 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Moves a list of remote media from one media store to another. + +The input should be a list of media files to be moved, one per line. Each line +should be formatted:: + + | + +This can be extracted from postgres with:: + + psql --tuples-only -A -c "select media_origin, filesystem_id from + matrix.remote_media_cache where ..." + +To use, pipe the above into:: + + PYTHON_PATH=. ./scripts/move_remote_media_to_new_store.py +""" + +from __future__ import print_function + +import argparse +import logging + +import sys + +import os + +import shutil + +from synapse.rest.media.v1.filepath import MediaFilePaths + +logger = logging.getLogger() + + +def main(src_repo, dest_repo): + src_paths = MediaFilePaths(src_repo) + dest_paths = MediaFilePaths(dest_repo) + for line in sys.stdin: + line = line.strip() + parts = line.split('|') + if len(parts) != 2: + print("Unable to parse input line %s" % line, file=sys.stderr) + exit(1) + + move_media(parts[0], parts[1], src_paths, dest_paths) + + +def move_media(origin_server, file_id, src_paths, dest_paths): + """Move the given file, and any thumbnails, to the dest repo + + Args: + origin_server (str): + file_id (str): + src_paths (MediaFilePaths): + dest_paths (MediaFilePaths): + """ + logger.info("%s/%s", origin_server, file_id) + + # check that the original exists + original_file = src_paths.remote_media_filepath(origin_server, file_id) + if not os.path.exists(original_file): + logger.warn( + "Original for %s/%s (%s) does not exist", + origin_server, file_id, original_file, + ) + else: + mkdir_and_move( + original_file, + dest_paths.remote_media_filepath(origin_server, file_id), + ) + + # now look for thumbnails + original_thumb_dir = src_paths.remote_media_thumbnail_dir( + origin_server, file_id, + ) + if not os.path.exists(original_thumb_dir): + return + + mkdir_and_move( + original_thumb_dir, + dest_paths.remote_media_thumbnail_dir(origin_server, file_id) + ) + + +def mkdir_and_move(original_file, dest_file): + dirname = os.path.dirname(dest_file) + if not os.path.exists(dirname): + logger.debug("mkdir %s", dirname) + os.makedirs(dirname) + logger.debug("mv %s %s", original_file, dest_file) + shutil.move(original_file, dest_file) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class = argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "-v", action='store_true', help='enable debug logging') + parser.add_argument( + "src_repo", + help="Path to source content repo", + ) + parser.add_argument( + "dest_repo", + help="Path to source content repo", + ) + args = parser.parse_args() + + logging_config = { + "level": logging.DEBUG if args.v else logging.INFO, + "format": "%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(message)s" + } + logging.basicConfig(**logging_config) + + main(args.src_repo, args.dest_repo) From e1e4ec9f9d6570e7f5a3f519113516f47ec872e4 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 19 Jan 2018 17:43:40 +0000 Subject: [PATCH 147/348] factor _get_new_state_after_events out of _calculate_state_delta This reduces the scope of a bunch of variables --- synapse/storage/events.py | 57 ++++++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 33fccfa7a8..dd28c2efe3 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -386,11 +386,18 @@ class EventsStore(SQLBaseStore): if all_single_prev_not_state: continue - state = yield self._calculate_state_delta( - room_id, ev_ctx_rm, new_latest_event_ids + logger.info( + "Calculating state delta for room %s", room_id, ) - if state: - current_state_for_room[room_id] = state + current_state = yield self._get_new_state_after_events( + ev_ctx_rm, new_latest_event_ids, + ) + if current_state is not None: + delta = yield self._calculate_state_delta( + room_id, current_state, + ) + if delta is not None: + current_state_for_room[room_id] = delta yield self.runInteraction( "persist_events", @@ -467,20 +474,22 @@ class EventsStore(SQLBaseStore): defer.returnValue(new_latest_event_ids) @defer.inlineCallbacks - def _calculate_state_delta(self, room_id, events_context, new_latest_event_ids): - """Calculate the new state deltas for a room. + def _get_new_state_after_events(self, events_context, new_latest_event_ids): + """Calculate the current state dict after adding some new events to + a room - Assumes that we are only persisting events for one room at a time. + Args: + events_context (list[(EventBase, EventContext)]): + events and contexts which are being added to the room + + new_latest_event_ids (iterable[str]): + the new forward extremities for the room. Returns: - 3-tuple (to_delete, to_insert, new_state) where both are state dicts, - i.e. (type, state_key) -> event_id. `to_delete` are the entries to - first be deleted from current_state_events, `to_insert` are entries - to insert. `new_state` is the full set of state. - May return None if there are no changes to be applied. + Deferred[dict[(str,str), str]|None]: + None if there are no changes to the room state, or + a dict of (type, state_key) -> event_id]. """ - # Now we need to work out the different state sets for - # each state extremities state_sets = [] state_groups = set() missing_event_ids = [] @@ -523,12 +532,12 @@ class EventsStore(SQLBaseStore): state_sets.extend(group_to_state.itervalues()) if not new_latest_event_ids: - current_state = {} + defer.returnValue({}) elif was_updated: if len(state_sets) == 1: # If there is only one state set, then we know what the current # state is. - current_state = state_sets[0] + defer.returnValue(state_sets[0]) else: # We work out the current state by passing the state sets to the # state resolution algorithm. It may ask for some events, including @@ -537,8 +546,7 @@ class EventsStore(SQLBaseStore): # up in the db. logger.info( - "Resolving state for %s with %i state sets", - room_id, len(state_sets), + "Resolving state with %i state sets", len(state_sets), ) events_map = {ev.event_id: ev for ev, _ in events_context} @@ -567,9 +575,22 @@ class EventsStore(SQLBaseStore): state_sets, state_map_factory=get_events, ) + defer.returnValue(current_state) else: return + @defer.inlineCallbacks + def _calculate_state_delta(self, room_id, current_state): + """Calculate the new state deltas for a room. + + Assumes that we are only persisting events for one room at a time. + + Returns: + 3-tuple (to_delete, to_insert, new_state) where both are state dicts, + i.e. (type, state_key) -> event_id. `to_delete` are the entries to + first be deleted from current_state_events, `to_insert` are entries + to insert. `new_state` is the full set of state. + """ existing_state = yield self.get_current_state_ids(room_id) existing_events = set(existing_state.itervalues()) From e16e45b1b44c9b9f7d44e6b50406268869759397 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Wed, 31 Jan 2018 15:30:38 -0700 Subject: [PATCH 148/348] pep8 Signed-off-by: Travis Ralston --- synapse/storage/room.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 5dfb0e19f7..961ad5abca 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -544,14 +544,14 @@ class RoomStore(SQLBaseStore): the hostname and the value is the media ID. """ def _get_media_mxcs_in_room_txn(txn): - local_media_ids, remote_media_ids = self._get_media_mxcs_in_room_txn(txn, room_id) + local_mxcs, remote_mxcs = self._get_media_mxcs_in_room_txn(txn, room_id) local_media_mxcs = [] remote_media_mxcs = [] # Convert the IDs to MXC URIs - for media_id in local_media_ids: + for media_id in local_mxcs: local_media_mxcs.append("mxc://%s/%s" % (self.hostname, media_id)) - for hostname, media_id in remote_media_ids: + for hostname, media_id in remote_mxcs: remote_media_mxcs.append("mxc://%s/%s" % (hostname, media_id)) return local_media_mxcs, remote_media_mxcs @@ -562,7 +562,7 @@ class RoomStore(SQLBaseStore): the associated media """ def _quarantine_media_in_room_txn(txn): - local_media_ids, remote_media_ids = self._get_media_mxcs_in_room_txn(txn, room_id) + local_mxcs, remote_mxcs = self._get_media_mxcs_in_room_txn(txn, room_id) total_media_quarantined = 0 # Now update all the tables to set the quarantined_by flag @@ -571,7 +571,7 @@ class RoomStore(SQLBaseStore): UPDATE local_media_repository SET quarantined_by = ? WHERE media_id = ? - """, ((quarantined_by, media_id) for media_id in local_media_ids)) + """, ((quarantined_by, media_id) for media_id in local_mxcs)) txn.executemany( """ @@ -581,16 +581,19 @@ class RoomStore(SQLBaseStore): """, ( (quarantined_by, origin, media_id) - for origin, media_id in remote_media_ids + for origin, media_id in remote_mxcs ) ) - total_media_quarantined += len(local_media_ids) - total_media_quarantined += len(remote_media_ids) + total_media_quarantined += len(local_mxcs) + total_media_quarantined += len(remote_mxcs) return total_media_quarantined - return self.runInteraction("quarantine_media_in_room", _quarantine_media_in_room_txn) + return self.runInteraction( + "quarantine_media_in_room", + _quarantine_media_in_room_txn, + ) def _get_media_mxcs_in_room_txn(self, txn, room_id): """Retrieves all the local and remote media MXC URIs in a given room From 0cbda53819dd66df05c872ea021767336b457769 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 27 Jan 2018 08:48:41 +0000 Subject: [PATCH 149/348] Rename resolve_state_groups -> resolve_state_groups_for_events (to make way for a method that actually just does the state group resolution) --- synapse/handlers/federation.py | 11 +++++------ synapse/state.py | 12 ++++++------ 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 677532c87b..8ee9434c9b 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -808,13 +808,12 @@ class FederationHandler(BaseHandler): event_ids = list(extremities.keys()) logger.debug("calling resolve_state_groups in _maybe_backfill") + resolve = logcontext.preserve_fn( + self.state_handler.resolve_state_groups_for_events + ) states = yield logcontext.make_deferred_yieldable(defer.gatherResults( - [ - logcontext.preserve_fn(self.state_handler.resolve_state_groups)( - room_id, [e] - ) - for e in event_ids - ], consumeErrors=True, + [resolve(room_id, [e]) for e in event_ids], + consumeErrors=True, )) states = dict(zip(event_ids, [s.state for s in states])) diff --git a/synapse/state.py b/synapse/state.py index 4c8247e7c2..8daf900133 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -127,7 +127,7 @@ class StateHandler(object): latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) logger.debug("calling resolve_state_groups from get_current_state") - ret = yield self.resolve_state_groups(room_id, latest_event_ids) + ret = yield self.resolve_state_groups_for_events(room_id, latest_event_ids) state = ret.state if event_type: @@ -164,7 +164,7 @@ class StateHandler(object): latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) logger.debug("calling resolve_state_groups from get_current_state_ids") - ret = yield self.resolve_state_groups(room_id, latest_event_ids) + ret = yield self.resolve_state_groups_for_events(room_id, latest_event_ids) state = ret.state defer.returnValue(state) @@ -174,7 +174,7 @@ class StateHandler(object): if not latest_event_ids: latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) logger.debug("calling resolve_state_groups from get_current_user_in_room") - entry = yield self.resolve_state_groups(room_id, latest_event_ids) + entry = yield self.resolve_state_groups_for_events(room_id, latest_event_ids) joined_users = yield self.store.get_joined_users_from_state(room_id, entry) defer.returnValue(joined_users) @@ -183,7 +183,7 @@ class StateHandler(object): if not latest_event_ids: latest_event_ids = yield self.store.get_latest_event_ids_in_room(room_id) logger.debug("calling resolve_state_groups from get_current_hosts_in_room") - entry = yield self.resolve_state_groups(room_id, latest_event_ids) + entry = yield self.resolve_state_groups_for_events(room_id, latest_event_ids) joined_hosts = yield self.store.get_joined_hosts(room_id, entry) defer.returnValue(joined_hosts) @@ -241,7 +241,7 @@ class StateHandler(object): defer.returnValue(context) logger.debug("calling resolve_state_groups from compute_event_context") - entry = yield self.resolve_state_groups( + entry = yield self.resolve_state_groups_for_events( event.room_id, [e for e, _ in event.prev_events], ) @@ -284,7 +284,7 @@ class StateHandler(object): @defer.inlineCallbacks @log_function - def resolve_state_groups(self, room_id, event_ids): + def resolve_state_groups_for_events(self, room_id, event_ids): """ Given a list of event_ids this method fetches the state at each event, resolves conflicts between them and returns them. From 6da4c4d3bdceb99f20bc208dc4809d3d76ba9a72 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 27 Jan 2018 09:15:45 +0000 Subject: [PATCH 150/348] Factor out resolve_state_groups to a separate handler We extract the storage-independent bits of the state group resolution out to a separate functiom, and stick it in a new handler, in preparation for its use from the storage layer. --- synapse/server.py | 6 +- synapse/server.pyi | 3 + synapse/state.py | 149 ++++++++++++++++++++++++++++---------------- tests/test_state.py | 4 +- 4 files changed, 108 insertions(+), 54 deletions(-) diff --git a/synapse/server.py b/synapse/server.py index ff8a8fbc46..3173aed1d0 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -66,7 +66,7 @@ from synapse.rest.media.v1.media_repository import ( MediaRepository, MediaRepositoryResource, ) -from synapse.state import StateHandler +from synapse.state import StateHandler, StateResolutionHandler from synapse.storage import DataStore from synapse.streams.events import EventSources from synapse.util import Clock @@ -102,6 +102,7 @@ class HomeServer(object): 'v1auth', 'auth', 'state_handler', + 'state_resolution_handler', 'presence_handler', 'sync_handler', 'typing_handler', @@ -224,6 +225,9 @@ class HomeServer(object): def build_state_handler(self): return StateHandler(self) + def build_state_resolution_handler(self): + return StateResolutionHandler(self) + def build_presence_handler(self): return PresenceHandler(self) diff --git a/synapse/server.pyi b/synapse/server.pyi index 41416ef252..c3a9a3847b 100644 --- a/synapse/server.pyi +++ b/synapse/server.pyi @@ -34,6 +34,9 @@ class HomeServer(object): def get_state_handler(self) -> synapse.state.StateHandler: pass + def get_state_resolution_handler(self) -> synapse.state.StateResolutionHandler: + pass + def get_deactivate_account_handler(self) -> synapse.handlers.deactivate_account.DeactivateAccountHandler: pass diff --git a/synapse/state.py b/synapse/state.py index 8daf900133..5004e0f913 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -81,31 +81,19 @@ class _StateCacheEntry(object): class StateHandler(object): - """ Responsible for doing state conflict resolution. + """Fetches bits of state from the stores, and does state resolution + where necessary """ def __init__(self, hs): self.clock = hs.get_clock() self.store = hs.get_datastore() self.hs = hs - - # dict of set of event_ids -> _StateCacheEntry. - self._state_cache = None - self.resolve_linearizer = Linearizer(name="state_resolve_lock") + self._state_resolution_handler = hs.get_state_resolution_handler() def start_caching(self): - logger.debug("start_caching") - - self._state_cache = ExpiringCache( - cache_name="state_cache", - clock=self.clock, - max_len=SIZE_OF_CACHE, - expiry_ms=EVICTION_TIMEOUT_SECONDS * 1000, - iterable=True, - reset_expiry_on_get=True, - ) - - self._state_cache.start() + # TODO: remove this shim + self._state_resolution_handler.start_caching() @defer.inlineCallbacks def get_current_state(self, room_id, event_type=None, state_key="", @@ -283,7 +271,6 @@ class StateHandler(object): defer.returnValue(context) @defer.inlineCallbacks - @log_function def resolve_state_groups_for_events(self, room_id, event_ids): """ Given a list of event_ids this method fetches the state at each event, resolves conflicts between them and returns them. @@ -303,13 +290,7 @@ class StateHandler(object): room_id, event_ids ) - logger.debug( - "resolve_state_groups state_groups %s", - state_groups_ids.keys() - ) - - group_names = frozenset(state_groups_ids.keys()) - if len(group_names) == 1: + if len(state_groups_ids) == 1: name, state_list = state_groups_ids.items().pop() prev_group, delta_ids = yield self.store.get_state_group_delta(name) @@ -321,6 +302,92 @@ class StateHandler(object): delta_ids=delta_ids, )) + result = yield self._state_resolution_handler.resolve_state_groups( + room_id, state_groups_ids, self._state_map_factory, + ) + defer.returnValue(result) + + def _state_map_factory(self, ev_ids): + return self.store.get_events( + ev_ids, get_prev_content=False, check_redacted=False, + ) + + def resolve_events(self, state_sets, event): + logger.info( + "Resolving state for %s with %d groups", event.room_id, len(state_sets) + ) + state_set_ids = [{ + (ev.type, ev.state_key): ev.event_id + for ev in st + } for st in state_sets] + + state_map = { + ev.event_id: ev + for st in state_sets + for ev in st + } + + with Measure(self.clock, "state._resolve_events"): + new_state = resolve_events_with_state_map(state_set_ids, state_map) + + new_state = { + key: state_map[ev_id] for key, ev_id in new_state.items() + } + + return new_state + + +class StateResolutionHandler(object): + """Responsible for doing state conflict resolution. + + Note that the storage layer depends on this handler, so all functions must + be storage-independent. + """ + def __init__(self, hs): + self.clock = hs.get_clock() + + # dict of set of event_ids -> _StateCacheEntry. + self._state_cache = None + self.resolve_linearizer = Linearizer(name="state_resolve_lock") + + def start_caching(self): + logger.debug("start_caching") + + self._state_cache = ExpiringCache( + cache_name="state_cache", + clock=self.clock, + max_len=SIZE_OF_CACHE, + expiry_ms=EVICTION_TIMEOUT_SECONDS * 1000, + iterable=True, + reset_expiry_on_get=True, + ) + + self._state_cache.start() + + @defer.inlineCallbacks + @log_function + def resolve_state_groups(self, room_id, state_groups_ids, state_map_factory): + """Resolves conflicts between a set of state groups + + Always generates a new state group (unless we hit the cache), so should + not be called for a single state group + + Args: + room_id (str): room we are resolving for (used for logging) + state_groups_ids (dict[int, dict[(str, str), str]]): + map from state group id to the state in that state group + (where 'state' is a map from state key to event id) + + Returns: + Deferred[_StateCacheEntry]: resolved state + """ + logger.debug( + "resolve_state_groups state_groups %s", + state_groups_ids.keys() + ) + + group_names = frozenset(state_groups_ids.keys()) + with (yield self.resolve_linearizer.queue(group_names)): if self._state_cache is not None: cache = self._state_cache.get(group_names, None) @@ -351,15 +418,17 @@ class StateHandler(object): with Measure(self.clock, "state._resolve_events"): new_state = yield resolve_events_with_factory( state_groups_ids.values(), - state_map_factory=lambda ev_ids: self.store.get_events( - ev_ids, get_prev_content=False, check_redacted=False, - ), + state_map_factory=state_map_factory, ) else: new_state = { key: e_ids.pop() for key, e_ids in state.items() } + # if the new state matches any of the input state groups, we can + # use that state group again. Otherwise we will generate a state_id + # which will be used as a cache key for future resolutions, but + # not get persisted. state_group = None new_state_event_ids = frozenset(new_state.values()) for sg, events in state_groups_ids.items(): @@ -396,30 +465,6 @@ class StateHandler(object): defer.returnValue(cache) - def resolve_events(self, state_sets, event): - logger.info( - "Resolving state for %s with %d groups", event.room_id, len(state_sets) - ) - state_set_ids = [{ - (ev.type, ev.state_key): ev.event_id - for ev in st - } for st in state_sets] - - state_map = { - ev.event_id: ev - for st in state_sets - for ev in st - } - - with Measure(self.clock, "state._resolve_events"): - new_state = resolve_events_with_state_map(state_set_ids, state_map) - - new_state = { - key: state_map[ev_id] for key, ev_id in new_state.items() - } - - return new_state - def _ordered_events(events): def key_func(e): diff --git a/tests/test_state.py b/tests/test_state.py index feb84f3d48..d16e1b3b8b 100644 --- a/tests/test_state.py +++ b/tests/test_state.py @@ -19,7 +19,7 @@ from twisted.internet import defer from synapse.events import FrozenEvent from synapse.api.auth import Auth from synapse.api.constants import EventTypes, Membership -from synapse.state import StateHandler +from synapse.state import StateHandler, StateResolutionHandler from .utils import MockClock @@ -148,11 +148,13 @@ class StateTestCase(unittest.TestCase): ) hs = Mock(spec_set=[ "get_datastore", "get_auth", "get_state_handler", "get_clock", + "get_state_resolution_handler", ]) hs.get_datastore.return_value = self.store hs.get_state_handler.return_value = None hs.get_clock.return_value = MockClock() hs.get_auth.return_value = Auth(hs) + hs.get_state_resolution_handler = lambda: StateResolutionHandler(hs) self.store.get_next_state_group.side_effect = Mock self.store.get_state_group_delta.return_value = (None, None) From a18828c12961ce254660e7641335428ec0655868 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 19 Jan 2018 17:33:22 +0000 Subject: [PATCH 151/348] Fix docstring for StateHandler.resolve_state_groups The return type was a complete lie, so fix it --- synapse/state.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/synapse/state.py b/synapse/state.py index 1f9abf9d3d..2bc7e584c3 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -58,7 +58,11 @@ class _StateCacheEntry(object): __slots__ = ["state", "state_group", "state_id", "prev_group", "delta_ids"] def __init__(self, state, state_group, prev_group=None, delta_ids=None): + # dict[(str, str), str] map from (type, state_key) to event_id self.state = frozendict(state) + + # the ID of a state group if one and only one is involved. + # otherwise, None otherwise? self.state_group = state_group self.prev_group = prev_group @@ -280,11 +284,12 @@ class StateHandler(object): """ Given a list of event_ids this method fetches the state at each event, resolves conflicts between them and returns them. + Args: + room_id (str): + event_ids (list[str]): + Returns: - a Deferred tuple of (`state_group`, `state`, `prev_state`). - `state_group` is the name of a state group if one and only one is - involved. `state` is a map from (type, state_key) to event, and - `prev_state` is a list of event ids. + Deferred[_StateCacheEntry]: resolved state """ logger.debug("resolve_state_groups event_ids %s", event_ids) From e15d4ea2482827035e31945f514b9d4c67086c37 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 19 Jan 2018 17:36:45 +0000 Subject: [PATCH 152/348] More docstring fixes Fix a couple of errors in docstrings --- synapse/state.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/synapse/state.py b/synapse/state.py index 2bc7e584c3..1071635ac4 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -434,8 +434,8 @@ def resolve_events_with_state_map(state_sets, state_map): state_sets. Returns - dict[(str, str), synapse.events.FrozenEvent]: - a map from (type, state_key) to event. + dict[(str, str), str]: + a map from (type, state_key) to event_id. """ if len(state_sets) == 1: return state_sets[0] @@ -497,8 +497,8 @@ def resolve_events_with_factory(state_sets, state_map_factory): a Deferred of dict of event_id to event. Returns - Deferred[dict[(str, str), synapse.events.FrozenEvent]]: - a map from (type, state_key) to event. + Deferred[dict[(str, str), str]]: + a map from (type, state_key) to event_id. """ if len(state_sets) == 1: defer.returnValue(state_sets[0]) From 14737ba495d914daf350a80c9b34dfefb0ae5a86 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 19 Jan 2018 23:55:01 +0000 Subject: [PATCH 153/348] doc arg types for _seperate --- synapse/state.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/synapse/state.py b/synapse/state.py index 1071635ac4..30b16e201a 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -457,6 +457,21 @@ def _seperate(state_sets): """Takes the state_sets and figures out which keys are conflicted and which aren't. i.e., which have multiple different event_ids associated with them in different state sets. + + Args: + state_sets(list[dict[(str, str), str]]): + List of dicts of (type, state_key) -> event_id, which are the + different state groups to resolve. + + Returns: + (dict[(str, str), str], dict[(str, str), set[str]]): + A tuple of (unconflicted_state, conflicted_state), where: + + unconflicted_state is a dict mapping (type, state_key)->event_id + for unconflicted state keys. + + conflicted_state is a dict mapping (type, state_key) to a set of + event ids for conflicted state keys. """ unconflicted_state = dict(state_sets[0]) conflicted_state = {} From d5352cbba80005fb226563211c9e7179edef2d65 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 2 Feb 2018 00:35:18 +0000 Subject: [PATCH 154/348] Handle url_previews with no content-type avoid failing with an exception if the remote server doesn't give us a Content-Type header. Also, clean up the exception handling a bit. --- synapse/rest/media/v1/preview_url_resource.py | 55 ++++++++++++------- 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 981f01e417..31fe7aa75c 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -12,6 +12,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import cgi +import datetime +import errno +import fnmatch +import itertools +import logging +import os +import re +import shutil +import sys +import traceback +import ujson as json +import urlparse from twisted.web.server import NOT_DONE_YET from twisted.internet import defer @@ -33,18 +46,6 @@ from synapse.http.server import ( from synapse.util.async import ObservableDeferred from synapse.util.stringutils import is_ascii -import os -import re -import fnmatch -import cgi -import ujson as json -import urlparse -import itertools -import datetime -import errno -import shutil - -import logging logger = logging.getLogger(__name__) @@ -286,17 +287,28 @@ class PreviewUrlResource(Resource): url_cache=True, ) - try: - with self.media_storage.store_into_file(file_info) as (f, fname, finish): + with self.media_storage.store_into_file(file_info) as (f, fname, finish): + try: logger.debug("Trying to get url '%s'" % url) length, headers, uri, code = yield self.client.get_file( url, output_stream=f, max_size=self.max_spider_size, ) + except Exception as e: # FIXME: pass through 404s and other error messages nicely + logger.warn("Error downloading %s: %r", url, e) + raise SynapseError( + 500, "Failed to download content: %s" % ( + traceback.format_exception_only(sys.exc_type, e), + ), + Codes.UNKNOWN, + ) + yield finish() - yield finish() - - media_type = headers["Content-Type"][0] + try: + if "Content-Type" in headers: + media_type = headers["Content-Type"][0] + else: + media_type = "application/octet-stream" time_now_ms = self.clock.time_msec() content_disposition = headers.get("Content-Disposition", None) @@ -336,10 +348,11 @@ class PreviewUrlResource(Resource): ) except Exception as e: - raise SynapseError( - 500, ("Failed to download content: %s" % e), - Codes.UNKNOWN - ) + logger.error("Error handling downloaded %s: %r", url, e) + # TODO: we really ought to delete the downloaded file in this + # case, since we won't have recorded it in the db, and will + # therefore not expire it. + raise defer.returnValue({ "media_type": media_type, From 6b02fc80d173d3d4de81623d411a136abe1637e9 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 2 Feb 2018 14:32:51 +0000 Subject: [PATCH 155/348] Reinstate event_search_postgres_gist handler People may have queued updates for this, so we can't just delete it. --- synapse/storage/background_updates.py | 19 +++++++++++++++++++ synapse/storage/registration.py | 7 +------ synapse/storage/search.py | 11 +++++++++++ 3 files changed, 31 insertions(+), 6 deletions(-) diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index 11a1b942f1..c88759bf2c 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -242,6 +242,25 @@ class BackgroundUpdateStore(SQLBaseStore): """ self._background_update_handlers[update_name] = update_handler + def register_noop_background_update(self, update_name): + """Register a noop handler for a background update. + + This is useful when we previously did a background update, but no + longer wish to do the update. In this case the background update should + be removed from the schema delta files, but there may still be some + users who have the background update queued, so this method should + also be called to clear the update. + + Args: + update_name (str): Name of update + """ + @defer.inlineCallbacks + def noop_update(progress, batch_size): + yield self._end_background_update(update_name) + defer.returnValue(1) + + self.register_background_update_handler(update_name, noop_update) + def register_background_index_update(self, update_name, index_name, table, columns, where_clause=None, unique=False, diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 3aa810981f..95f75d6df1 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -39,12 +39,7 @@ class RegistrationStore(background_updates.BackgroundUpdateStore): # we no longer use refresh tokens, but it's possible that some people # might have a background update queued to build this index. Just # clear the background update. - @defer.inlineCallbacks - def noop_update(progress, batch_size): - yield self._end_background_update("refresh_tokens_device_index") - defer.returnValue(1) - self.register_background_update_handler( - "refresh_tokens_device_index", noop_update) + self.register_noop_background_update("refresh_tokens_device_index") @defer.inlineCallbacks def add_access_token_to_user(self, user_id, token, device_id=None): diff --git a/synapse/storage/search.py b/synapse/storage/search.py index d3e76b58d6..13c827cf87 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -31,6 +31,7 @@ class SearchStore(BackgroundUpdateStore): EVENT_SEARCH_UPDATE_NAME = "event_search" EVENT_SEARCH_ORDER_UPDATE_NAME = "event_search_order" + EVENT_SEARCH_USE_GIST_POSTGRES_NAME = "event_search_postgres_gist" EVENT_SEARCH_USE_GIN_POSTGRES_NAME = "event_search_postgres_gin" def __init__(self, db_conn, hs): @@ -42,6 +43,16 @@ class SearchStore(BackgroundUpdateStore): self.EVENT_SEARCH_ORDER_UPDATE_NAME, self._background_reindex_search_order ) + + # we used to have a background update to turn the GIN index into a + # GIST one; we no longer do that (obviously) because we actually want + # a GIN index. However, it's possible that some people might still have + # the background update queued, so we register a handler to clear the + # background update. + self.register_noop_background_update( + self.EVENT_SEARCH_USE_GIST_POSTGRES_NAME, + ) + self.register_background_update_handler( self.EVENT_SEARCH_USE_GIN_POSTGRES_NAME, self._background_reindex_gin_search From 4eeae7ad657729eb8c2765da6fb40fc983c740f7 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 3 Feb 2018 22:57:33 +0000 Subject: [PATCH 156/348] Move store_event_search_txn to SearchStore ... as a precursor to making event storing and doing the bg update share some code. --- synapse/storage/room.py | 45 +++++++-------------------------------- synapse/storage/search.py | 35 ++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 37 deletions(-) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 9f373b47e0..0fcfb7f86d 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -16,11 +16,9 @@ from twisted.internet import defer from synapse.api.errors import StoreError +from synapse.storage.search import SearchStore from synapse.util.caches.descriptors import cached, cachedInlineCallbacks -from ._base import SQLBaseStore -from .engines import PostgresEngine, Sqlite3Engine - import collections import logging import ujson as json @@ -40,7 +38,7 @@ RatelimitOverride = collections.namedtuple( ) -class RoomStore(SQLBaseStore): +class RoomStore(SearchStore): @defer.inlineCallbacks def store_room(self, room_id, room_creator_user_id, is_public): @@ -263,8 +261,8 @@ class RoomStore(SQLBaseStore): }, ) - self._store_event_search_txn( - txn, event, "content.topic", event.content["topic"] + self.store_event_search_txn( + txn, event, "content.topic", event.content["topic"], ) def _store_room_name_txn(self, txn, event): @@ -279,14 +277,14 @@ class RoomStore(SQLBaseStore): } ) - self._store_event_search_txn( - txn, event, "content.name", event.content["name"] + self.store_event_search_txn( + txn, event, "content.name", event.content["name"], ) def _store_room_message_txn(self, txn, event): if hasattr(event, "content") and "body" in event.content: - self._store_event_search_txn( - txn, event, "content.body", event.content["body"] + self.store_event_search_txn( + txn, event, "content.body", event.content["body"], ) def _store_history_visibility_txn(self, txn, event): @@ -308,33 +306,6 @@ class RoomStore(SQLBaseStore): event.content[key] )) - def _store_event_search_txn(self, txn, event, key, value): - if isinstance(self.database_engine, PostgresEngine): - txn.execute("SET work_mem='256kB'") - sql = ( - "INSERT INTO event_search" - " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)" - " VALUES (?,?,?,to_tsvector('english', ?),?,?)" - ) - txn.execute( - sql, - ( - event.event_id, event.room_id, key, value, - event.internal_metadata.stream_ordering, - event.origin_server_ts, - ) - ) - txn.execute("RESET work_mem") - elif isinstance(self.database_engine, Sqlite3Engine): - sql = ( - "INSERT INTO event_search (event_id, room_id, key, value)" - " VALUES (?,?,?,?)" - ) - txn.execute(sql, (event.event_id, event.room_id, key, value,)) - else: - # This should be unreachable. - raise Exception("Unrecognized database engine") - def add_event_report(self, room_id, event_id, user_id, reason, content, received_ts): next_id = self._event_reports_id_gen.get_next() diff --git a/synapse/storage/search.py b/synapse/storage/search.py index f52f3c8592..205e8d0017 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -246,6 +246,41 @@ class SearchStore(BackgroundUpdateStore): defer.returnValue(num_rows) + def store_event_search_txn(self, txn, event, key, value): + """Add event to the search table + + Args: + txn (cursor): + event (EventBase): + key (str): + value (str): + """ + if isinstance(self.database_engine, PostgresEngine): + txn.execute("SET work_mem='256kB'") + sql = ( + "INSERT INTO event_search" + " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)" + " VALUES (?,?,?,to_tsvector('english', ?),?,?)" + ) + txn.execute( + sql, + ( + event.event_id, event.room_id, key, value, + event.internal_metadata.stream_ordering, + event.origin_server_ts, + ) + ) + txn.execute("RESET work_mem") + elif isinstance(self.database_engine, Sqlite3Engine): + sql = ( + "INSERT INTO event_search (event_id, room_id, key, value)" + " VALUES (?,?,?,?)" + ) + txn.execute(sql, (event.event_id, event.room_id, key, value,)) + else: + # This should be unreachable. + raise Exception("Unrecognized database engine") + @defer.inlineCallbacks def search_msgs(self, room_ids, search_term, keys): """Performs a full text search over events with given keys. From bd25f9cf36ff86d1616853d88cebd2a4a83fa552 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 3 Feb 2018 23:05:41 +0000 Subject: [PATCH 157/348] Clean up work_mem handling Add some comments and improve exception handling when twiddling work_mem for the search update --- synapse/storage/search.py | 52 ++++++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 11 deletions(-) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 205e8d0017..190751bade 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import sys from twisted.internet import defer from .background_updates import BackgroundUpdateStore @@ -256,21 +256,51 @@ class SearchStore(BackgroundUpdateStore): value (str): """ if isinstance(self.database_engine, PostgresEngine): - txn.execute("SET work_mem='256kB'") sql = ( "INSERT INTO event_search" - " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)" + " (event_id, room_id, key, vector, stream_ordering, " + " origin_server_ts)" " VALUES (?,?,?,to_tsvector('english', ?),?,?)" ) - txn.execute( - sql, - ( - event.event_id, event.room_id, key, value, - event.internal_metadata.stream_ordering, - event.origin_server_ts, + + # inserts to a GIN index are normally batched up into a pending + # list, and then all committed together once the list gets to a + # certain size. The trouble with that is that postgres (pre-9.5) + # uses work_mem to determine the length of the list, and work_mem + # is typically very large. + # + # We therefore reduce work_mem while we do the insert. + # + # (postgres 9.5 uses the separate gin_pending_list_limit setting, + # so doesn't suffer the same problem, but changing work_mem will + # be harmless) + + txn.execute("SET work_mem='256kB'") + try: + txn.execute( + sql, + ( + event.event_id, event.room_id, key, value, + event.internal_metadata.stream_ordering, + event.origin_server_ts, + ) ) - ) - txn.execute("RESET work_mem") + except Exception: + # we need to reset work_mem, but doing so may throw a new + # exception and we want to preserve the original + t, v, tb = sys.exc_info() + try: + txn.execute("RESET work_mem") + except Exception as e: + logger.warn( + "exception resetting work_mem during exception " + "handling: %r", + e, + ) + raise t, v, tb + else: + txn.execute("RESET work_mem") + elif isinstance(self.database_engine, Sqlite3Engine): sql = ( "INSERT INTO event_search (event_id, room_id, key, value)" From 80b8a28100e29e34bdc6226513575789310aa41f Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 3 Feb 2018 23:07:13 +0000 Subject: [PATCH 158/348] Factor out common code for search insert we can reuse the same code as is used for event insert, for doing the background index population. --- synapse/storage/search.py | 89 ++++++++++++++++++++++++--------------- 1 file changed, 56 insertions(+), 33 deletions(-) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 190751bade..eecf778516 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from collections import namedtuple import sys from twisted.internet import defer @@ -26,6 +27,11 @@ import ujson as json logger = logging.getLogger(__name__) +SearchEntry = namedtuple('SearchEntry', [ + 'key', 'value', 'event_id', 'room_id', 'stream_ordering', + 'origin_server_ts', +]) + class SearchStore(BackgroundUpdateStore): @@ -49,16 +55,17 @@ class SearchStore(BackgroundUpdateStore): @defer.inlineCallbacks def _background_reindex_search(self, progress, batch_size): + # we work through the events table from highest stream id to lowest target_min_stream_id = progress["target_min_stream_id_inclusive"] max_stream_id = progress["max_stream_id_exclusive"] rows_inserted = progress.get("rows_inserted", 0) - INSERT_CLUMP_SIZE = 1000 TYPES = ["m.room.name", "m.room.message", "m.room.topic"] def reindex_search_txn(txn): sql = ( - "SELECT stream_ordering, event_id, room_id, type, content FROM events" + "SELECT stream_ordering, event_id, room_id, type, content, " + " origin_server_ts FROM events" " WHERE ? <= stream_ordering AND stream_ordering < ?" " AND (%s)" " ORDER BY stream_ordering DESC" @@ -67,6 +74,10 @@ class SearchStore(BackgroundUpdateStore): txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size)) + # we could stream straight from the results into + # store_search_entries_txn with a generator function, but that + # would mean having two cursors open on the database at once. + # Instead we just build a list of results. rows = self.cursor_to_dict(txn) if not rows: return 0 @@ -79,6 +90,8 @@ class SearchStore(BackgroundUpdateStore): event_id = row["event_id"] room_id = row["room_id"] etype = row["type"] + stream_ordering = row["stream_ordering"] + origin_server_ts = row["origin_server_ts"] try: content = json.loads(row["content"]) except Exception: @@ -93,6 +106,8 @@ class SearchStore(BackgroundUpdateStore): elif etype == "m.room.name": key = "content.name" value = content["name"] + else: + raise Exception("unexpected event type %s" % etype) except (KeyError, AttributeError): # If the event is missing a necessary field then # skip over it. @@ -103,29 +118,16 @@ class SearchStore(BackgroundUpdateStore): # then skip over it continue - event_search_rows.append((event_id, room_id, key, value)) + event_search_rows.append(SearchEntry( + key=key, + value=value, + event_id=event_id, + room_id=room_id, + stream_ordering=stream_ordering, + origin_server_ts=origin_server_ts, + )) - if isinstance(self.database_engine, PostgresEngine): - txn.execute("SET work_mem='256kB'") - sql = ( - "INSERT INTO event_search (event_id, room_id, key, vector)" - " VALUES (?,?,?,to_tsvector('english', ?))" - ) - elif isinstance(self.database_engine, Sqlite3Engine): - sql = ( - "INSERT INTO event_search (event_id, room_id, key, value)" - " VALUES (?,?,?,?)" - ) - else: - # This should be unreachable. - raise Exception("Unrecognized database engine") - - for index in range(0, len(event_search_rows), INSERT_CLUMP_SIZE): - clump = event_search_rows[index:index + INSERT_CLUMP_SIZE] - txn.executemany(sql, clump) - - if isinstance(self.database_engine, PostgresEngine): - txn.execute("RESET work_mem") + self.store_search_entries_txn(txn, event_search_rows) progress = { "target_min_stream_id_inclusive": target_min_stream_id, @@ -255,6 +257,26 @@ class SearchStore(BackgroundUpdateStore): key (str): value (str): """ + self.store_search_entries_txn( + txn, + (SearchEntry( + key=key, + value=value, + event_id=event.event_id, + room_id=event.room_id, + stream_ordering=event.internal_metadata.stream_ordering, + origin_server_ts=event.origin_server_ts, + ),), + ) + + def store_search_entries_txn(self, txn, entries): + """Add entries to the search table + + Args: + txn (cursor): + entries (iterable[SearchEntry]): + entries to be added to the table + """ if isinstance(self.database_engine, PostgresEngine): sql = ( "INSERT INTO event_search" @@ -262,6 +284,10 @@ class SearchStore(BackgroundUpdateStore): " origin_server_ts)" " VALUES (?,?,?,to_tsvector('english', ?),?,?)" ) + args = (( + entry.event_id, entry.room_id, entry.key, entry.value, + entry.stream_ordering, entry.origin_server_ts, + ) for entry in entries) # inserts to a GIN index are normally batched up into a pending # list, and then all committed together once the list gets to a @@ -277,14 +303,7 @@ class SearchStore(BackgroundUpdateStore): txn.execute("SET work_mem='256kB'") try: - txn.execute( - sql, - ( - event.event_id, event.room_id, key, value, - event.internal_metadata.stream_ordering, - event.origin_server_ts, - ) - ) + txn.executemany(sql, args) except Exception: # we need to reset work_mem, but doing so may throw a new # exception and we want to preserve the original @@ -306,7 +325,11 @@ class SearchStore(BackgroundUpdateStore): "INSERT INTO event_search (event_id, room_id, key, value)" " VALUES (?,?,?,?)" ) - txn.execute(sql, (event.event_id, event.room_id, key, value,)) + args = (( + entry.event_id, entry.room_id, entry.key, entry.value, + ) for entry in entries) + + txn.executemany(sql, args) else: # This should be unreachable. raise Exception("Unrecognized database engine") From b5049d2e5cab5780c08c61b9d8ac70c6a9298fb1 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 5 Feb 2018 12:06:46 +0000 Subject: [PATCH 159/348] Add .vscode to gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 491047c352..c8901eb206 100644 --- a/.gitignore +++ b/.gitignore @@ -46,3 +46,5 @@ static/client/register/register_config.js env/ *.config + +.vscode/ From a1beca0e254104ad87220d276fb56d26e5ca5809 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 5 Feb 2018 12:44:03 +0000 Subject: [PATCH 160/348] Fix broken unit test for media storage --- tests/rest/media/v1/test_media_storage.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/rest/media/v1/test_media_storage.py b/tests/rest/media/v1/test_media_storage.py index c4de181579..eef38b6781 100644 --- a/tests/rest/media/v1/test_media_storage.py +++ b/tests/rest/media/v1/test_media_storage.py @@ -21,6 +21,8 @@ from synapse.rest.media.v1.media_storage import MediaStorage from synapse.rest.media.v1.filepath import MediaFilePaths from synapse.rest.media.v1.storage_provider import FileStorageProviderBackend +from mock import Mock + from tests import unittest import os @@ -35,8 +37,11 @@ class MediaStorageTests(unittest.TestCase): self.primary_base_path = os.path.join(self.test_dir, "primary") self.secondary_base_path = os.path.join(self.test_dir, "secondary") + hs = Mock() + hs.config.media_store_path = self.primary_base_path + storage_providers = [FileStorageProviderBackend( - self.primary_base_path, self.secondary_base_path + hs, self.secondary_base_path )] self.filepaths = MediaFilePaths(self.primary_base_path) From bc496df192fa20dee933590d5f21a3425388c0d7 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 1 Feb 2018 17:57:51 +0000 Subject: [PATCH 161/348] report metrics on number of cache evictions --- synapse/metrics/metric.py | 11 ++++++++++- synapse/util/caches/descriptors.py | 4 ++++ synapse/util/caches/expiringcache.py | 6 +++++- synapse/util/caches/lrucache.py | 28 +++++++++++++++++++++++++--- tests/metrics/test_metric.py | 12 ++++++++++++ 5 files changed, 56 insertions(+), 5 deletions(-) diff --git a/synapse/metrics/metric.py b/synapse/metrics/metric.py index 1e783e5ff4..ff5aa8c0e1 100644 --- a/synapse/metrics/metric.py +++ b/synapse/metrics/metric.py @@ -193,7 +193,9 @@ class DistributionMetric(object): class CacheMetric(object): - __slots__ = ("name", "cache_name", "hits", "misses", "size_callback") + __slots__ = ( + "name", "cache_name", "hits", "misses", "evicted_size", "size_callback", + ) def __init__(self, name, size_callback, cache_name): self.name = name @@ -201,6 +203,7 @@ class CacheMetric(object): self.hits = 0 self.misses = 0 + self.evicted_size = 0 self.size_callback = size_callback @@ -210,6 +213,9 @@ class CacheMetric(object): def inc_misses(self): self.misses += 1 + def inc_evictions(self, size=1): + self.evicted_size += size + def render(self): size = self.size_callback() hits = self.hits @@ -219,6 +225,9 @@ class CacheMetric(object): """%s:hits{name="%s"} %d""" % (self.name, self.cache_name, hits), """%s:total{name="%s"} %d""" % (self.name, self.cache_name, total), """%s:size{name="%s"} %d""" % (self.name, self.cache_name, size), + """%s:evicted_size{name="%s"} %d""" % ( + self.name, self.cache_name, self.evicted_size + ), ] diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py index af65bfe7b8..bf3a66eae4 100644 --- a/synapse/util/caches/descriptors.py +++ b/synapse/util/caches/descriptors.py @@ -75,6 +75,7 @@ class Cache(object): self.cache = LruCache( max_size=max_entries, keylen=keylen, cache_type=cache_type, size_callback=(lambda d: len(d)) if iterable else None, + evicted_callback=self._on_evicted, ) self.name = name @@ -83,6 +84,9 @@ class Cache(object): self.thread = None self.metrics = register_cache(name, self.cache) + def _on_evicted(self, evicted_count): + self.metrics.inc_evictions(evicted_count) + def check_thread(self): expected_thread = self.thread if expected_thread is None: diff --git a/synapse/util/caches/expiringcache.py b/synapse/util/caches/expiringcache.py index 6ad53a6390..0aa103eecb 100644 --- a/synapse/util/caches/expiringcache.py +++ b/synapse/util/caches/expiringcache.py @@ -79,7 +79,11 @@ class ExpiringCache(object): while self._max_len and len(self) > self._max_len: _key, value = self._cache.popitem(last=False) if self.iterable: - self._size_estimate -= len(value.value) + removed_len = len(value.value) + self.metrics.inc_evictions(removed_len) + self._size_estimate -= removed_len + else: + self.metrics.inc_evictions() def __getitem__(self, key): try: diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py index cf5fbb679c..f088dd430e 100644 --- a/synapse/util/caches/lrucache.py +++ b/synapse/util/caches/lrucache.py @@ -49,7 +49,24 @@ class LruCache(object): Can also set callbacks on objects when getting/setting which are fired when that key gets invalidated/evicted. """ - def __init__(self, max_size, keylen=1, cache_type=dict, size_callback=None): + def __init__(self, max_size, keylen=1, cache_type=dict, size_callback=None, + evicted_callback=None): + """ + Args: + max_size (int): + + keylen (int): + + cache_type (type): + type of underlying cache to be used. Typically one of dict + or TreeCache. + + size_callback (func(V) -> int | None): + + evicted_callback (func(int)|None): + if not None, called on eviction with the size of the evicted + entry + """ cache = cache_type() self.cache = cache # Used for introspection. list_root = _Node(None, None, None, None) @@ -61,8 +78,10 @@ class LruCache(object): def evict(): while cache_len() > max_size: todelete = list_root.prev_node - delete_node(todelete) + evicted_len = delete_node(todelete) cache.pop(todelete.key, None) + if evicted_callback: + evicted_callback(evicted_len) def synchronized(f): @wraps(f) @@ -111,12 +130,15 @@ class LruCache(object): prev_node.next_node = next_node next_node.prev_node = prev_node + deleted_len = 1 if size_callback: - cached_cache_len[0] -= size_callback(node.value) + deleted_len = size_callback(node.value) + cached_cache_len[0] -= deleted_len for cb in node.callbacks: cb() node.callbacks.clear() + return deleted_len @synchronized def cache_get(key, default=None, callbacks=[]): diff --git a/tests/metrics/test_metric.py b/tests/metrics/test_metric.py index f85455a5af..39bde6e3f8 100644 --- a/tests/metrics/test_metric.py +++ b/tests/metrics/test_metric.py @@ -141,6 +141,7 @@ class CacheMetricTestCase(unittest.TestCase): 'cache:hits{name="cache_name"} 0', 'cache:total{name="cache_name"} 0', 'cache:size{name="cache_name"} 0', + 'cache:evicted_size{name="cache_name"} 0', ]) metric.inc_misses() @@ -150,6 +151,7 @@ class CacheMetricTestCase(unittest.TestCase): 'cache:hits{name="cache_name"} 0', 'cache:total{name="cache_name"} 1', 'cache:size{name="cache_name"} 1', + 'cache:evicted_size{name="cache_name"} 0', ]) metric.inc_hits() @@ -158,4 +160,14 @@ class CacheMetricTestCase(unittest.TestCase): 'cache:hits{name="cache_name"} 1', 'cache:total{name="cache_name"} 2', 'cache:size{name="cache_name"} 1', + 'cache:evicted_size{name="cache_name"} 0', + ]) + + metric.inc_evictions(2) + + self.assertEquals(metric.render(), [ + 'cache:hits{name="cache_name"} 1', + 'cache:total{name="cache_name"} 2', + 'cache:size{name="cache_name"} 1', + 'cache:evicted_size{name="cache_name"} 2', ]) From c46e75d3d8311f378f234e3de4719d6fa5d380c9 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 3 Feb 2018 22:57:33 +0000 Subject: [PATCH 162/348] Move store_event_search_txn to SearchStore ... as a precursor to making event storing and doing the bg update share some code. --- synapse/storage/room.py | 43 ++++++++------------------------------- synapse/storage/search.py | 33 ++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 35 deletions(-) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index cf2c4dae39..fff6652e05 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -16,11 +16,9 @@ from twisted.internet import defer from synapse.api.errors import StoreError +from synapse.storage.search import SearchStore from synapse.util.caches.descriptors import cached, cachedInlineCallbacks -from ._base import SQLBaseStore -from .engines import PostgresEngine, Sqlite3Engine - import collections import logging import ujson as json @@ -40,7 +38,7 @@ RatelimitOverride = collections.namedtuple( ) -class RoomStore(SQLBaseStore): +class RoomStore(SearchStore): @defer.inlineCallbacks def store_room(self, room_id, room_creator_user_id, is_public): @@ -263,8 +261,8 @@ class RoomStore(SQLBaseStore): }, ) - self._store_event_search_txn( - txn, event, "content.topic", event.content["topic"] + self.store_event_search_txn( + txn, event, "content.topic", event.content["topic"], ) def _store_room_name_txn(self, txn, event): @@ -279,14 +277,14 @@ class RoomStore(SQLBaseStore): } ) - self._store_event_search_txn( - txn, event, "content.name", event.content["name"] + self.store_event_search_txn( + txn, event, "content.name", event.content["name"], ) def _store_room_message_txn(self, txn, event): if hasattr(event, "content") and "body" in event.content: - self._store_event_search_txn( - txn, event, "content.body", event.content["body"] + self.store_event_search_txn( + txn, event, "content.body", event.content["body"], ) def _store_history_visibility_txn(self, txn, event): @@ -308,31 +306,6 @@ class RoomStore(SQLBaseStore): event.content[key] )) - def _store_event_search_txn(self, txn, event, key, value): - if isinstance(self.database_engine, PostgresEngine): - sql = ( - "INSERT INTO event_search" - " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)" - " VALUES (?,?,?,to_tsvector('english', ?),?,?)" - ) - txn.execute( - sql, - ( - event.event_id, event.room_id, key, value, - event.internal_metadata.stream_ordering, - event.origin_server_ts, - ) - ) - elif isinstance(self.database_engine, Sqlite3Engine): - sql = ( - "INSERT INTO event_search (event_id, room_id, key, value)" - " VALUES (?,?,?,?)" - ) - txn.execute(sql, (event.event_id, event.room_id, key, value,)) - else: - # This should be unreachable. - raise Exception("Unrecognized database engine") - def add_event_report(self, room_id, event_id, user_id, reason, content, received_ts): next_id = self._event_reports_id_gen.get_next() diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 479b04c636..4f38a587c8 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -242,6 +242,39 @@ class SearchStore(BackgroundUpdateStore): defer.returnValue(num_rows) + def store_event_search_txn(self, txn, event, key, value): + """Add event to the search table + + Args: + txn (cursor): + event (EventBase): + key (str): + value (str): + """ + if isinstance(self.database_engine, PostgresEngine): + sql = ( + "INSERT INTO event_search" + " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)" + " VALUES (?,?,?,to_tsvector('english', ?),?,?)" + ) + txn.execute( + sql, + ( + event.event_id, event.room_id, key, value, + event.internal_metadata.stream_ordering, + event.origin_server_ts, + ) + ) + elif isinstance(self.database_engine, Sqlite3Engine): + sql = ( + "INSERT INTO event_search (event_id, room_id, key, value)" + " VALUES (?,?,?,?)" + ) + txn.execute(sql, (event.event_id, event.room_id, key, value,)) + else: + # This should be unreachable. + raise Exception("Unrecognized database engine") + @defer.inlineCallbacks def search_msgs(self, room_ids, search_term, keys): """Performs a full text search over events with given keys. From 5ff3d23564f41e3ae82398a2f0726d8914d060a4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 15 Jan 2018 16:51:53 +0000 Subject: [PATCH 163/348] Split event creation into a separate handler --- synapse/handlers/message.py | 368 +++++++++++++++++++----------------- 1 file changed, 191 insertions(+), 177 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 21f1717dd2..afa19bf653 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -47,21 +47,9 @@ class MessageHandler(BaseHandler): self.hs = hs self.state = hs.get_state_handler() self.clock = hs.get_clock() - self.validator = EventValidator() - self.profile_handler = hs.get_profile_handler() self.pagination_lock = ReadWriteLock() - self.pusher_pool = hs.get_pusherpool() - - # We arbitrarily limit concurrent event creation for a room to 5. - # This is to stop us from diverging history *too* much. - self.limiter = Limiter(max_count=5) - - self.action_generator = hs.get_action_generator() - - self.spam_checker = hs.get_spam_checker() - @defer.inlineCallbacks def purge_history(self, room_id, event_id): event = yield self.store.get_event(event_id) @@ -182,166 +170,6 @@ class MessageHandler(BaseHandler): defer.returnValue(chunk) - @defer.inlineCallbacks - def create_event(self, requester, event_dict, token_id=None, txn_id=None, - prev_event_ids=None): - """ - Given a dict from a client, create a new event. - - Creates an FrozenEvent object, filling out auth_events, prev_events, - etc. - - Adds display names to Join membership events. - - Args: - requester - event_dict (dict): An entire event - token_id (str) - txn_id (str) - prev_event_ids (list): The prev event ids to use when creating the event - - Returns: - Tuple of created event (FrozenEvent), Context - """ - builder = self.event_builder_factory.new(event_dict) - - with (yield self.limiter.queue(builder.room_id)): - self.validator.validate_new(builder) - - if builder.type == EventTypes.Member: - membership = builder.content.get("membership", None) - target = UserID.from_string(builder.state_key) - - if membership in {Membership.JOIN, Membership.INVITE}: - # If event doesn't include a display name, add one. - profile = self.profile_handler - content = builder.content - - try: - if "displayname" not in content: - content["displayname"] = yield profile.get_displayname(target) - if "avatar_url" not in content: - content["avatar_url"] = yield profile.get_avatar_url(target) - except Exception as e: - logger.info( - "Failed to get profile information for %r: %s", - target, e - ) - - if token_id is not None: - builder.internal_metadata.token_id = token_id - - if txn_id is not None: - builder.internal_metadata.txn_id = txn_id - - event, context = yield self._create_new_client_event( - builder=builder, - requester=requester, - prev_event_ids=prev_event_ids, - ) - - defer.returnValue((event, context)) - - @defer.inlineCallbacks - def send_nonmember_event(self, requester, event, context, ratelimit=True): - """ - Persists and notifies local clients and federation of an event. - - Args: - event (FrozenEvent) the event to send. - context (Context) the context of the event. - ratelimit (bool): Whether to rate limit this send. - is_guest (bool): Whether the sender is a guest. - """ - if event.type == EventTypes.Member: - raise SynapseError( - 500, - "Tried to send member event through non-member codepath" - ) - - # We check here if we are currently being rate limited, so that we - # don't do unnecessary work. We check again just before we actually - # send the event. - yield self.ratelimit(requester, update=False) - - user = UserID.from_string(event.sender) - - assert self.hs.is_mine(user), "User must be our own: %s" % (user,) - - if event.is_state(): - prev_state = yield self.deduplicate_state_event(event, context) - if prev_state is not None: - defer.returnValue(prev_state) - - yield self.handle_new_client_event( - requester=requester, - event=event, - context=context, - ratelimit=ratelimit, - ) - - if event.type == EventTypes.Message: - presence = self.hs.get_presence_handler() - # We don't want to block sending messages on any presence code. This - # matters as sometimes presence code can take a while. - preserve_fn(presence.bump_presence_active_time)(user) - - @defer.inlineCallbacks - def deduplicate_state_event(self, event, context): - """ - Checks whether event is in the latest resolved state in context. - - If so, returns the version of the event in context. - Otherwise, returns None. - """ - prev_event_id = context.prev_state_ids.get((event.type, event.state_key)) - prev_event = yield self.store.get_event(prev_event_id, allow_none=True) - if not prev_event: - return - - if prev_event and event.user_id == prev_event.user_id: - prev_content = encode_canonical_json(prev_event.content) - next_content = encode_canonical_json(event.content) - if prev_content == next_content: - defer.returnValue(prev_event) - return - - @defer.inlineCallbacks - def create_and_send_nonmember_event( - self, - requester, - event_dict, - ratelimit=True, - txn_id=None - ): - """ - Creates an event, then sends it. - - See self.create_event and self.send_nonmember_event. - """ - event, context = yield self.create_event( - requester, - event_dict, - token_id=requester.access_token_id, - txn_id=txn_id - ) - - spam_error = self.spam_checker.check_event_for_spam(event) - if spam_error: - if not isinstance(spam_error, basestring): - spam_error = "Spam is not permitted here" - raise SynapseError( - 403, spam_error, Codes.FORBIDDEN - ) - - yield self.send_nonmember_event( - requester, - event, - context, - ratelimit=ratelimit, - ) - defer.returnValue(event) - @defer.inlineCallbacks def get_room_data(self, user_id=None, room_id=None, event_type=None, state_key="", is_guest=False): @@ -470,6 +298,194 @@ class MessageHandler(BaseHandler): for user_id, profile in users_with_profile.iteritems() }) + +class EventCreationHandler(object): + def __init__(self, hs): + self.hs = hs + self.auth = hs.get_auth() + self.store = hs.get_datastore() + self.state = hs.get_state_handler() + self.clock = hs.get_clock() + self.validator = EventValidator() + self.profile_handler = hs.get_profile_handler() + self.event_builder_factory = hs.get_event_builder_factory() + self.server_name = hs.hostname + self.ratelimiter = hs.get_ratelimiter() + self.notifier = hs.get_notifier() + + # This is only used to get at ratelimit function, and maybe_kick_guest_users + self.base_handler = BaseHandler(hs) + + self.pusher_pool = hs.get_pusherpool() + + # We arbitrarily limit concurrent event creation for a room to 5. + # This is to stop us from diverging history *too* much. + self.limiter = Limiter(max_count=5) + + self.action_generator = hs.get_action_generator() + + self.spam_checker = hs.get_spam_checker() + + @defer.inlineCallbacks + def create_event(self, requester, event_dict, token_id=None, txn_id=None, + prev_event_ids=None): + """ + Given a dict from a client, create a new event. + + Creates an FrozenEvent object, filling out auth_events, prev_events, + etc. + + Adds display names to Join membership events. + + Args: + requester + event_dict (dict): An entire event + token_id (str) + txn_id (str) + prev_event_ids (list): The prev event ids to use when creating the event + + Returns: + Tuple of created event (FrozenEvent), Context + """ + builder = self.event_builder_factory.new(event_dict) + + with (yield self.limiter.queue(builder.room_id)): + self.validator.validate_new(builder) + + if builder.type == EventTypes.Member: + membership = builder.content.get("membership", None) + target = UserID.from_string(builder.state_key) + + if membership in {Membership.JOIN, Membership.INVITE}: + # If event doesn't include a display name, add one. + profile = self.profile_handler + content = builder.content + + try: + if "displayname" not in content: + content["displayname"] = yield profile.get_displayname(target) + if "avatar_url" not in content: + content["avatar_url"] = yield profile.get_avatar_url(target) + except Exception as e: + logger.info( + "Failed to get profile information for %r: %s", + target, e + ) + + if token_id is not None: + builder.internal_metadata.token_id = token_id + + if txn_id is not None: + builder.internal_metadata.txn_id = txn_id + + event, context = yield self._create_new_client_event( + builder=builder, + requester=requester, + prev_event_ids=prev_event_ids, + ) + + defer.returnValue((event, context)) + + @defer.inlineCallbacks + def send_nonmember_event(self, requester, event, context, ratelimit=True): + """ + Persists and notifies local clients and federation of an event. + + Args: + event (FrozenEvent) the event to send. + context (Context) the context of the event. + ratelimit (bool): Whether to rate limit this send. + is_guest (bool): Whether the sender is a guest. + """ + if event.type == EventTypes.Member: + raise SynapseError( + 500, + "Tried to send member event through non-member codepath" + ) + + # We check here if we are currently being rate limited, so that we + # don't do unnecessary work. We check again just before we actually + # send the event. + yield self.base_handler.ratelimit(requester, update=False) + + user = UserID.from_string(event.sender) + + assert self.hs.is_mine(user), "User must be our own: %s" % (user,) + + if event.is_state(): + prev_state = yield self.deduplicate_state_event(event, context) + if prev_state is not None: + defer.returnValue(prev_state) + + yield self.handle_new_client_event( + requester=requester, + event=event, + context=context, + ratelimit=ratelimit, + ) + + if event.type == EventTypes.Message: + presence = self.hs.get_presence_handler() + # We don't want to block sending messages on any presence code. This + # matters as sometimes presence code can take a while. + preserve_fn(presence.bump_presence_active_time)(user) + + @defer.inlineCallbacks + def deduplicate_state_event(self, event, context): + """ + Checks whether event is in the latest resolved state in context. + + If so, returns the version of the event in context. + Otherwise, returns None. + """ + prev_event_id = context.prev_state_ids.get((event.type, event.state_key)) + prev_event = yield self.store.get_event(prev_event_id, allow_none=True) + if not prev_event: + return + + if prev_event and event.user_id == prev_event.user_id: + prev_content = encode_canonical_json(prev_event.content) + next_content = encode_canonical_json(event.content) + if prev_content == next_content: + defer.returnValue(prev_event) + return + + @defer.inlineCallbacks + def create_and_send_nonmember_event( + self, + requester, + event_dict, + ratelimit=True, + txn_id=None + ): + """ + Creates an event, then sends it. + + See self.create_event and self.send_nonmember_event. + """ + event, context = yield self.create_event( + requester, + event_dict, + token_id=requester.access_token_id, + txn_id=txn_id + ) + + spam_error = self.spam_checker.check_event_for_spam(event) + if spam_error: + if not isinstance(spam_error, basestring): + spam_error = "Spam is not permitted here" + raise SynapseError( + 403, spam_error, Codes.FORBIDDEN + ) + + yield self.send_nonmember_event( + requester, + event, + context, + ratelimit=ratelimit, + ) + defer.returnValue(event) + @measure_func("_create_new_client_event") @defer.inlineCallbacks def _create_new_client_event(self, builder, requester=None, prev_event_ids=None): @@ -509,9 +525,7 @@ class MessageHandler(BaseHandler): builder.prev_events = prev_events builder.depth = depth - state_handler = self.state_handler - - context = yield state_handler.compute_event_context(builder) + context = yield self.state.compute_event_context(builder) if requester: context.app_service = requester.app_service @@ -551,7 +565,7 @@ class MessageHandler(BaseHandler): # We now need to go and hit out to wherever we need to hit out to. if ratelimit: - yield self.ratelimit(requester) + yield self.base_handler.ratelimit(requester) try: yield self.auth.check_from_context(event, context) @@ -567,7 +581,7 @@ class MessageHandler(BaseHandler): logger.exception("Failed to encode content: %r", event.content) raise - yield self.maybe_kick_guest_users(event, context) + yield self.base_handler.maybe_kick_guest_users(event, context) if event.type == EventTypes.CanonicalAlias: # Check the alias is acually valid (at this time at least) From 3fa362502cc6c509bac65753954c313d307035e6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 15 Jan 2018 16:52:07 +0000 Subject: [PATCH 164/348] Update places where we create events --- synapse/handlers/directory.py | 7 +++---- synapse/handlers/federation.py | 18 ++++++++---------- synapse/handlers/room.py | 10 ++++------ synapse/handlers/room_member.py | 20 +++++++++++--------- synapse/rest/client/v1/admin.py | 4 ++-- synapse/rest/client/v1/room.py | 16 +++++++++------- synapse/server.py | 5 +++++ 7 files changed, 42 insertions(+), 38 deletions(-) diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index a0464ae5c0..8580ada60a 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -34,6 +34,7 @@ class DirectoryHandler(BaseHandler): self.state = hs.get_state_handler() self.appservice_handler = hs.get_application_service_handler() + self.event_creation_handler = hs.get_event_creation_handler() self.federation = hs.get_replication_layer() self.federation.register_query_handler( @@ -249,8 +250,7 @@ class DirectoryHandler(BaseHandler): def send_room_alias_update_event(self, requester, user_id, room_id): aliases = yield self.store.get_aliases_for_room(room_id) - msg_handler = self.hs.get_handlers().message_handler - yield msg_handler.create_and_send_nonmember_event( + yield self.event_creation_handler.create_and_send_nonmember_event( requester, { "type": EventTypes.Aliases, @@ -272,8 +272,7 @@ class DirectoryHandler(BaseHandler): if not alias_event or alias_event.content.get("alias", "") != alias_str: return - msg_handler = self.hs.get_handlers().message_handler - yield msg_handler.create_and_send_nonmember_event( + yield self.event_creation_handler.create_and_send_nonmember_event( requester, { "type": EventTypes.CanonicalAlias, diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 8ee9434c9b..e6b9f5cf53 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -75,6 +75,7 @@ class FederationHandler(BaseHandler): self.is_mine_id = hs.is_mine_id self.pusher_pool = hs.get_pusherpool() self.spam_checker = hs.get_spam_checker() + self.event_creation_handler = hs.get_event_creation_handler() self.replication_layer.set_handler(self) @@ -1007,8 +1008,7 @@ class FederationHandler(BaseHandler): }) try: - message_handler = self.hs.get_handlers().message_handler - event, context = yield message_handler._create_new_client_event( + event, context = yield self.event_creation_handler._create_new_client_event( builder=builder, ) except AuthError as e: @@ -1248,8 +1248,7 @@ class FederationHandler(BaseHandler): "state_key": user_id, }) - message_handler = self.hs.get_handlers().message_handler - event, context = yield message_handler._create_new_client_event( + event, context = yield self.event_creation_handler._create_new_client_event( builder=builder, ) @@ -2120,8 +2119,7 @@ class FederationHandler(BaseHandler): if (yield self.auth.check_host_in_room(room_id, self.hs.hostname)): builder = self.event_builder_factory.new(event_dict) EventValidator().validate_new(builder) - message_handler = self.hs.get_handlers().message_handler - event, context = yield message_handler._create_new_client_event( + event, context = yield self.event_creation_handler._create_new_client_event( builder=builder ) @@ -2159,8 +2157,7 @@ class FederationHandler(BaseHandler): """ builder = self.event_builder_factory.new(event_dict) - message_handler = self.hs.get_handlers().message_handler - event, context = yield message_handler._create_new_client_event( + event, context = yield self.event_creation_handler._create_new_client_event( builder=builder, ) @@ -2210,8 +2207,9 @@ class FederationHandler(BaseHandler): builder = self.event_builder_factory.new(event_dict) EventValidator().validate_new(builder) - message_handler = self.hs.get_handlers().message_handler - event, context = yield message_handler._create_new_client_event(builder=builder) + event, context = yield self.event_creation_handler._create_new_client_event( + builder=builder, + ) defer.returnValue((event, context)) @defer.inlineCallbacks diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index d1cc87a016..4ea5bf1bcf 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -64,6 +64,7 @@ class RoomCreationHandler(BaseHandler): super(RoomCreationHandler, self).__init__(hs) self.spam_checker = hs.get_spam_checker() + self.event_creation_handler = hs.get_event_creation_handler() @defer.inlineCallbacks def create_room(self, requester, config, ratelimit=True): @@ -163,13 +164,11 @@ class RoomCreationHandler(BaseHandler): creation_content = config.get("creation_content", {}) - msg_handler = self.hs.get_handlers().message_handler room_member_handler = self.hs.get_handlers().room_member_handler yield self._send_events_for_new_room( requester, room_id, - msg_handler, room_member_handler, preset_config=preset_config, invite_list=invite_list, @@ -181,7 +180,7 @@ class RoomCreationHandler(BaseHandler): if "name" in config: name = config["name"] - yield msg_handler.create_and_send_nonmember_event( + yield self.event_creation_handler.create_and_send_nonmember_event( requester, { "type": EventTypes.Name, @@ -194,7 +193,7 @@ class RoomCreationHandler(BaseHandler): if "topic" in config: topic = config["topic"] - yield msg_handler.create_and_send_nonmember_event( + yield self.event_creation_handler.create_and_send_nonmember_event( requester, { "type": EventTypes.Topic, @@ -249,7 +248,6 @@ class RoomCreationHandler(BaseHandler): self, creator, # A Requester object. room_id, - msg_handler, room_member_handler, preset_config, invite_list, @@ -272,7 +270,7 @@ class RoomCreationHandler(BaseHandler): @defer.inlineCallbacks def send(etype, content, **kwargs): event = create(etype, content, **kwargs) - yield msg_handler.create_and_send_nonmember_event( + yield self.event_creation_handler.create_and_send_nonmember_event( creator, event, ratelimit=False diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 7e6467cd1d..ab58beb0f5 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -46,6 +46,7 @@ class RoomMemberHandler(BaseHandler): super(RoomMemberHandler, self).__init__(hs) self.profile_handler = hs.get_profile_handler() + self.event_creation_hander = hs.get_event_creation_handler() self.member_linearizer = Linearizer(name="member") @@ -66,13 +67,12 @@ class RoomMemberHandler(BaseHandler): ): if content is None: content = {} - msg_handler = self.hs.get_handlers().message_handler content["membership"] = membership if requester.is_guest: content["kind"] = "guest" - event, context = yield msg_handler.create_event( + event, context = yield self.event_creation_hander.create_event( requester, { "type": EventTypes.Member, @@ -90,12 +90,14 @@ class RoomMemberHandler(BaseHandler): ) # Check if this event matches the previous membership event for the user. - duplicate = yield msg_handler.deduplicate_state_event(event, context) + duplicate = yield self.event_creation_hander.deduplicate_state_event( + event, context, + ) if duplicate is not None: # Discard the new event since this membership change is a no-op. defer.returnValue(duplicate) - yield msg_handler.handle_new_client_event( + yield self.event_creation_hander.handle_new_client_event( requester, event, context, @@ -394,8 +396,9 @@ class RoomMemberHandler(BaseHandler): else: requester = synapse.types.create_requester(target_user) - message_handler = self.hs.get_handlers().message_handler - prev_event = yield message_handler.deduplicate_state_event(event, context) + prev_event = yield self.event_creation_hander.deduplicate_state_event( + event, context, + ) if prev_event is not None: return @@ -412,7 +415,7 @@ class RoomMemberHandler(BaseHandler): if is_blocked: raise SynapseError(403, "This room has been blocked on this server") - yield message_handler.handle_new_client_event( + yield self.event_creation_hander.handle_new_client_event( requester, event, context, @@ -644,8 +647,7 @@ class RoomMemberHandler(BaseHandler): ) ) - msg_handler = self.hs.get_handlers().message_handler - yield msg_handler.create_and_send_nonmember_event( + yield self.event_creation_hander.create_and_send_nonmember_event( requester, { "type": EventTypes.ThirdPartyInvite, diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index 0615e5d807..f77f646670 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -171,6 +171,7 @@ class ShutdownRoomRestServlet(ClientV1RestServlet): self.store = hs.get_datastore() self.handlers = hs.get_handlers() self.state = hs.get_state_handler() + self.event_creation_handler = hs.get_event_creation_handler() @defer.inlineCallbacks def on_POST(self, request, room_id): @@ -203,8 +204,7 @@ class ShutdownRoomRestServlet(ClientV1RestServlet): ) new_room_id = info["room_id"] - msg_handler = self.handlers.message_handler - yield msg_handler.create_and_send_nonmember_event( + yield self.event_creation_handler.create_and_send_nonmember_event( room_creator_requester, { "type": "m.room.message", diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 867ec8602c..ad6534537a 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -82,6 +82,7 @@ class RoomStateEventRestServlet(ClientV1RestServlet): def __init__(self, hs): super(RoomStateEventRestServlet, self).__init__(hs) self.handlers = hs.get_handlers() + self.event_creation_hander = hs.get_event_creation_handler() def register(self, http_server): # /room/$roomid/state/$eventtype @@ -162,15 +163,16 @@ class RoomStateEventRestServlet(ClientV1RestServlet): content=content, ) else: - msg_handler = self.handlers.message_handler - event, context = yield msg_handler.create_event( + event, context = yield self.event_creation_hander.create_event( requester, event_dict, token_id=requester.access_token_id, txn_id=txn_id, ) - yield msg_handler.send_nonmember_event(requester, event, context) + yield self.event_creation_hander.send_nonmember_event( + requester, event, context, + ) ret = {} if event: @@ -184,6 +186,7 @@ class RoomSendEventRestServlet(ClientV1RestServlet): def __init__(self, hs): super(RoomSendEventRestServlet, self).__init__(hs) self.handlers = hs.get_handlers() + self.event_creation_hander = hs.get_event_creation_handler() def register(self, http_server): # /rooms/$roomid/send/$event_type[/$txn_id] @@ -205,8 +208,7 @@ class RoomSendEventRestServlet(ClientV1RestServlet): if 'ts' in request.args and requester.app_service: event_dict['origin_server_ts'] = parse_integer(request, "ts", 0) - msg_handler = self.handlers.message_handler - event = yield msg_handler.create_and_send_nonmember_event( + event = yield self.event_creation_hander.create_and_send_nonmember_event( requester, event_dict, txn_id=txn_id, @@ -670,6 +672,7 @@ class RoomRedactEventRestServlet(ClientV1RestServlet): def __init__(self, hs): super(RoomRedactEventRestServlet, self).__init__(hs) self.handlers = hs.get_handlers() + self.event_creation_handler = hs.get_event_creation_handler() def register(self, http_server): PATTERNS = ("/rooms/(?P[^/]*)/redact/(?P[^/]*)") @@ -680,8 +683,7 @@ class RoomRedactEventRestServlet(ClientV1RestServlet): requester = yield self.auth.get_user_by_req(request) content = parse_json_object_from_request(request) - msg_handler = self.handlers.message_handler - event = yield msg_handler.create_and_send_nonmember_event( + event = yield self.event_creation_handler.create_and_send_nonmember_event( requester, { "type": EventTypes.Redaction, diff --git a/synapse/server.py b/synapse/server.py index 3173aed1d0..fbd602d40e 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -55,6 +55,7 @@ from synapse.handlers.read_marker import ReadMarkerHandler from synapse.handlers.user_directory import UserDirectoryHandler from synapse.handlers.groups_local import GroupsLocalHandler from synapse.handlers.profile import ProfileHandler +from synapse.handlers.message import EventCreationHandler from synapse.groups.groups_server import GroupsServerHandler from synapse.groups.attestations import GroupAttestionRenewer, GroupAttestationSigning from synapse.http.client import SimpleHttpClient, InsecureInterceptableContextFactory @@ -118,6 +119,7 @@ class HomeServer(object): 'application_service_handler', 'device_message_handler', 'profile_handler', + 'event_creation_handler', 'deactivate_account_handler', 'set_password_handler', 'notifier', @@ -276,6 +278,9 @@ class HomeServer(object): def build_profile_handler(self): return ProfileHandler(self) + def build_event_creation_handler(self): + return EventCreationHandler(self) + def build_deactivate_account_handler(self): return DeactivateAccountHandler(self) From 25c0a020f430de2781041c8104a5cf92321a60a2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 15 Jan 2018 16:52:12 +0000 Subject: [PATCH 165/348] Updates tests --- tests/storage/test_redaction.py | 9 ++++----- tests/storage/test_roommember.py | 5 ++--- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/tests/storage/test_redaction.py b/tests/storage/test_redaction.py index 6afaca3a61..de6d7904e3 100644 --- a/tests/storage/test_redaction.py +++ b/tests/storage/test_redaction.py @@ -36,8 +36,7 @@ class RedactionTestCase(unittest.TestCase): self.store = hs.get_datastore() self.event_builder_factory = hs.get_event_builder_factory() - self.handlers = hs.get_handlers() - self.message_handler = self.handlers.message_handler + self.event_creation_handler = hs.get_event_creation_handler() self.u_alice = UserID.from_string("@alice:test") self.u_bob = UserID.from_string("@bob:test") @@ -59,7 +58,7 @@ class RedactionTestCase(unittest.TestCase): "content": content, }) - event, context = yield self.message_handler._create_new_client_event( + event, context = yield self.event_creation_handler._create_new_client_event( builder ) @@ -79,7 +78,7 @@ class RedactionTestCase(unittest.TestCase): "content": {"body": body, "msgtype": u"message"}, }) - event, context = yield self.message_handler._create_new_client_event( + event, context = yield self.event_creation_handler._create_new_client_event( builder ) @@ -98,7 +97,7 @@ class RedactionTestCase(unittest.TestCase): "redacts": event_id, }) - event, context = yield self.message_handler._create_new_client_event( + event, context = yield self.event_creation_handler._create_new_client_event( builder ) diff --git a/tests/storage/test_roommember.py b/tests/storage/test_roommember.py index 1be7d932f6..4aff38bd58 100644 --- a/tests/storage/test_roommember.py +++ b/tests/storage/test_roommember.py @@ -37,8 +37,7 @@ class RoomMemberStoreTestCase(unittest.TestCase): # storage logic self.store = hs.get_datastore() self.event_builder_factory = hs.get_event_builder_factory() - self.handlers = hs.get_handlers() - self.message_handler = self.handlers.message_handler + self.event_creation_handler = hs.get_event_creation_handler() self.u_alice = UserID.from_string("@alice:test") self.u_bob = UserID.from_string("@bob:test") @@ -58,7 +57,7 @@ class RoomMemberStoreTestCase(unittest.TestCase): "content": {"membership": membership}, }) - event, context = yield self.message_handler._create_new_client_event( + event, context = yield self.event_creation_handler._create_new_client_event( builder ) From 3c7b480ba33c68bfc4e98de57b6874c32011c8f4 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 3 Feb 2018 23:07:13 +0000 Subject: [PATCH 166/348] Factor out common code for search insert we can reuse the same code as is used for event insert, for doing the background index population. --- synapse/storage/search.py | 95 +++++++++++++++++++++++++-------------- 1 file changed, 62 insertions(+), 33 deletions(-) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 4f38a587c8..f1ac9ba0fd 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -13,19 +13,25 @@ # See the License for the specific language governing permissions and # limitations under the License. +from collections import namedtuple +import logging +import re +import ujson as json + from twisted.internet import defer from .background_updates import BackgroundUpdateStore from synapse.api.errors import SynapseError from synapse.storage.engines import PostgresEngine, Sqlite3Engine -import logging -import re -import ujson as json - logger = logging.getLogger(__name__) +SearchEntry = namedtuple('SearchEntry', [ + 'key', 'value', 'event_id', 'room_id', 'stream_ordering', + 'origin_server_ts', +]) + class SearchStore(BackgroundUpdateStore): @@ -49,16 +55,17 @@ class SearchStore(BackgroundUpdateStore): @defer.inlineCallbacks def _background_reindex_search(self, progress, batch_size): + # we work through the events table from highest stream id to lowest target_min_stream_id = progress["target_min_stream_id_inclusive"] max_stream_id = progress["max_stream_id_exclusive"] rows_inserted = progress.get("rows_inserted", 0) - INSERT_CLUMP_SIZE = 1000 TYPES = ["m.room.name", "m.room.message", "m.room.topic"] def reindex_search_txn(txn): sql = ( - "SELECT stream_ordering, event_id, room_id, type, content FROM events" + "SELECT stream_ordering, event_id, room_id, type, content, " + " origin_server_ts FROM events" " WHERE ? <= stream_ordering AND stream_ordering < ?" " AND (%s)" " ORDER BY stream_ordering DESC" @@ -67,6 +74,10 @@ class SearchStore(BackgroundUpdateStore): txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size)) + # we could stream straight from the results into + # store_search_entries_txn with a generator function, but that + # would mean having two cursors open on the database at once. + # Instead we just build a list of results. rows = self.cursor_to_dict(txn) if not rows: return 0 @@ -79,6 +90,8 @@ class SearchStore(BackgroundUpdateStore): event_id = row["event_id"] room_id = row["room_id"] etype = row["type"] + stream_ordering = row["stream_ordering"] + origin_server_ts = row["origin_server_ts"] try: content = json.loads(row["content"]) except Exception: @@ -93,6 +106,8 @@ class SearchStore(BackgroundUpdateStore): elif etype == "m.room.name": key = "content.name" value = content["name"] + else: + raise Exception("unexpected event type %s" % etype) except (KeyError, AttributeError): # If the event is missing a necessary field then # skip over it. @@ -103,25 +118,16 @@ class SearchStore(BackgroundUpdateStore): # then skip over it continue - event_search_rows.append((event_id, room_id, key, value)) + event_search_rows.append(SearchEntry( + key=key, + value=value, + event_id=event_id, + room_id=room_id, + stream_ordering=stream_ordering, + origin_server_ts=origin_server_ts, + )) - if isinstance(self.database_engine, PostgresEngine): - sql = ( - "INSERT INTO event_search (event_id, room_id, key, vector)" - " VALUES (?,?,?,to_tsvector('english', ?))" - ) - elif isinstance(self.database_engine, Sqlite3Engine): - sql = ( - "INSERT INTO event_search (event_id, room_id, key, value)" - " VALUES (?,?,?,?)" - ) - else: - # This should be unreachable. - raise Exception("Unrecognized database engine") - - for index in range(0, len(event_search_rows), INSERT_CLUMP_SIZE): - clump = event_search_rows[index:index + INSERT_CLUMP_SIZE] - txn.executemany(sql, clump) + self.store_search_entries_txn(txn, event_search_rows) progress = { "target_min_stream_id_inclusive": target_min_stream_id, @@ -251,26 +257,49 @@ class SearchStore(BackgroundUpdateStore): key (str): value (str): """ + self.store_search_entries_txn( + txn, + (SearchEntry( + key=key, + value=value, + event_id=event.event_id, + room_id=event.room_id, + stream_ordering=event.internal_metadata.stream_ordering, + origin_server_ts=event.origin_server_ts, + ),), + ) + + def store_search_entries_txn(self, txn, entries): + """Add entries to the search table + + Args: + txn (cursor): + entries (iterable[SearchEntry]): + entries to be added to the table + """ if isinstance(self.database_engine, PostgresEngine): sql = ( "INSERT INTO event_search" " (event_id, room_id, key, vector, stream_ordering, origin_server_ts)" " VALUES (?,?,?,to_tsvector('english', ?),?,?)" ) - txn.execute( - sql, - ( - event.event_id, event.room_id, key, value, - event.internal_metadata.stream_ordering, - event.origin_server_ts, - ) - ) + + args = (( + entry.event_id, entry.room_id, entry.key, entry.value, + entry.stream_ordering, entry.origin_server_ts, + ) for entry in entries) + + txn.executemany(sql, args) elif isinstance(self.database_engine, Sqlite3Engine): sql = ( "INSERT INTO event_search (event_id, room_id, key, value)" " VALUES (?,?,?,?)" ) - txn.execute(sql, (event.event_id, event.room_id, key, value,)) + args = (( + entry.event_id, entry.room_id, entry.key, entry.value, + ) for entry in entries) + + txn.executemany(sql, args) else: # This should be unreachable. raise Exception("Unrecognized database engine") From ee6fb4cf8560534a9acc61b075c09dceeca83e85 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 20 Jan 2018 00:23:36 +0000 Subject: [PATCH 167/348] Remove redundant return value from _calculate_state_delta we already have the state from _get_new_state_after_events, so returning it from _calculate_state_delta is just confusing. --- synapse/storage/events.py | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index dd28c2efe3..2fead9eb0f 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -342,8 +342,20 @@ class EventsStore(SQLBaseStore): # NB: Assumes that we are only persisting events for one room # at a time. + + # map room_id->list[event_ids] giving the new forward + # extremities in each room new_forward_extremeties = {} + + # map room_id->(type,state_key)->event_id tracking the full + # state in each room after adding these events current_state_for_room = {} + + # map room_id->(to_delete, to_insert) where each entry is + # a map (type,key)->event_id giving the state delta in each + # room + state_delta_for_room = {} + if not backfilled: with Measure(self._clock, "_calculate_state_and_extrem"): # Work out the new "current state" for each room. @@ -393,11 +405,12 @@ class EventsStore(SQLBaseStore): ev_ctx_rm, new_latest_event_ids, ) if current_state is not None: + current_state_for_room[room_id] = current_state delta = yield self._calculate_state_delta( room_id, current_state, ) if delta is not None: - current_state_for_room[room_id] = delta + state_delta_for_room[room_id] = delta yield self.runInteraction( "persist_events", @@ -405,7 +418,7 @@ class EventsStore(SQLBaseStore): events_and_contexts=chunk, backfilled=backfilled, delete_existing=delete_existing, - current_state_for_room=current_state_for_room, + state_delta_for_room=state_delta_for_room, new_forward_extremeties=new_forward_extremeties, ) persist_event_counter.inc_by(len(chunk)) @@ -422,7 +435,7 @@ class EventsStore(SQLBaseStore): event_counter.inc(event.type, origin_type, origin_entity) - for room_id, (_, _, new_state) in current_state_for_room.iteritems(): + for room_id, new_state in current_state_for_room.iteritems(): self.get_current_state_ids.prefill( (room_id, ), new_state ) @@ -586,10 +599,10 @@ class EventsStore(SQLBaseStore): Assumes that we are only persisting events for one room at a time. Returns: - 3-tuple (to_delete, to_insert, new_state) where both are state dicts, + 2-tuple (to_delete, to_insert) where both are state dicts, i.e. (type, state_key) -> event_id. `to_delete` are the entries to first be deleted from current_state_events, `to_insert` are entries - to insert. `new_state` is the full set of state. + to insert. """ existing_state = yield self.get_current_state_ids(room_id) @@ -610,7 +623,7 @@ class EventsStore(SQLBaseStore): if ev_id in events_to_insert } - defer.returnValue((to_delete, to_insert, current_state)) + defer.returnValue((to_delete, to_insert)) @defer.inlineCallbacks def get_event(self, event_id, check_redacted=True, @@ -670,7 +683,7 @@ class EventsStore(SQLBaseStore): @log_function def _persist_events_txn(self, txn, events_and_contexts, backfilled, - delete_existing=False, current_state_for_room={}, + delete_existing=False, state_delta_for_room={}, new_forward_extremeties={}): """Insert some number of room events into the necessary database tables. @@ -686,7 +699,7 @@ class EventsStore(SQLBaseStore): delete_existing (bool): True to purge existing table rows for the events from the database. This is useful when retrying due to IntegrityError. - current_state_for_room (dict[str, (list[str], list[str])]): + state_delta_for_room (dict[str, (list[str], list[str])]): The current-state delta for each room. For each room, a tuple (to_delete, to_insert), being a list of event ids to be removed from the current state, and a list of event ids to be added to @@ -698,7 +711,7 @@ class EventsStore(SQLBaseStore): """ max_stream_order = events_and_contexts[-1][0].internal_metadata.stream_ordering - self._update_current_state_txn(txn, current_state_for_room, max_stream_order) + self._update_current_state_txn(txn, state_delta_for_room, max_stream_order) self._update_forward_extremities_txn( txn, @@ -764,7 +777,7 @@ class EventsStore(SQLBaseStore): def _update_current_state_txn(self, txn, state_delta_by_room, max_stream_order): for room_id, current_state_tuple in state_delta_by_room.iteritems(): - to_delete, to_insert, _ = current_state_tuple + to_delete, to_insert = current_state_tuple txn.executemany( "DELETE FROM current_state_events WHERE event_id = ?", [(ev_id,) for ev_id in to_delete.itervalues()], From 447aed42d22d3ece245c69f397d348b3a5b7bfa8 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 27 Jan 2018 15:40:41 +0000 Subject: [PATCH 168/348] Add event_map param to resolve_state_groups --- synapse/state.py | 34 ++++++++++++++++++++++++++++++---- synapse/storage/events.py | 1 + 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/synapse/state.py b/synapse/state.py index 273f9911ca..6c2aaa5e7a 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -308,7 +308,7 @@ class StateHandler(object): )) result = yield self._state_resolution_handler.resolve_state_groups( - room_id, state_groups_ids, self._state_map_factory, + room_id, state_groups_ids, None, self._state_map_factory, ) defer.returnValue(result) @@ -371,7 +371,9 @@ class StateResolutionHandler(object): @defer.inlineCallbacks @log_function - def resolve_state_groups(self, room_id, state_groups_ids, state_map_factory): + def resolve_state_groups( + self, room_id, state_groups_ids, event_map, state_map_factory, + ): """Resolves conflicts between a set of state groups Always generates a new state group (unless we hit the cache), so should @@ -383,6 +385,14 @@ class StateResolutionHandler(object): map from state group id to the state in that state group (where 'state' is a map from state key to event id) + event_map(dict[str,FrozenEvent]|None): + a dict from event_id to event, for any events that we happen to + have in flight (eg, those currently being persisted). This will be + used as a starting point fof finding the state we need; any missing + events will be requested via state_map_factory. + + If None, all events will be fetched via state_map_factory. + Returns: Deferred[_StateCacheEntry]: resolved state """ @@ -423,6 +433,7 @@ class StateResolutionHandler(object): with Measure(self.clock, "state._resolve_events"): new_state = yield resolve_events_with_factory( state_groups_ids.values(), + event_map=event_map, state_map_factory=state_map_factory, ) else: @@ -555,11 +566,20 @@ def _seperate(state_sets): @defer.inlineCallbacks -def resolve_events_with_factory(state_sets, state_map_factory): +def resolve_events_with_factory(state_sets, event_map, state_map_factory): """ Args: state_sets(list): List of dicts of (type, state_key) -> event_id, which are the different state groups to resolve. + + event_map(dict[str,FrozenEvent]|None): + a dict from event_id to event, for any events that we happen to + have in flight (eg, those currently being persisted). This will be + used as a starting point fof finding the state we need; any missing + events will be requested via state_map_factory. + + If None, all events will be fetched via state_map_factory. + state_map_factory(func): will be called with a list of event_ids that are needed, and should return with a Deferred of dict of event_id to event. @@ -580,12 +600,16 @@ def resolve_events_with_factory(state_sets, state_map_factory): for event_ids in conflicted_state.itervalues() for event_id in event_ids ) + if event_map is not None: + needed_events -= set(event_map.iterkeys()) logger.info("Asking for %d conflicted events", len(needed_events)) # dict[str, FrozenEvent]: a map from state event id to event. Only includes - # the state events which are in conflict. + # the state events which are in conflict (and those in event_map) state_map = yield state_map_factory(needed_events) + if event_map is not None: + state_map.update(event_map) # get the ids of the auth events which allow us to authenticate the # conflicted state, picking only from the unconflicting state. @@ -597,6 +621,8 @@ def resolve_events_with_factory(state_sets, state_map_factory): new_needed_events = set(auth_events.itervalues()) new_needed_events -= needed_events + if event_map is not None: + new_needed_events -= set(event_map.iterkeys()) logger.info("Asking for %d auth events", len(new_needed_events)) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 2fead9eb0f..7b912ad413 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -586,6 +586,7 @@ class EventsStore(SQLBaseStore): current_state = yield resolve_events_with_factory( state_sets, + event_map={}, state_map_factory=get_events, ) defer.returnValue(current_state) From 9fcbbe8e7d7557fef7fe03533166b376d6fa82ef Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 27 Jan 2018 09:49:15 +0000 Subject: [PATCH 169/348] Check that events being persisted have state_group --- synapse/storage/events.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 7b912ad413..9bceded7ba 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -515,16 +515,21 @@ class EventsStore(SQLBaseStore): if ctx.current_state_ids is None: raise Exception("Unknown current state") + if ctx.state_group is None: + # I don't think this can happen, but let's double-check + raise Exception( + "Context for new extremity event %s has no state " + "group" % event_id, + ) + # If we've already seen the state group don't bother adding # it to the state sets again if ctx.state_group not in state_groups: state_sets.append(ctx.current_state_ids) if ctx.delta_ids or hasattr(ev, "state_key"): was_updated = True - if ctx.state_group: - # Add this as a seen state group (if it has a state - # group) - state_groups.add(ctx.state_group) + # Add this as a seen state group + state_groups.add(ctx.state_group) break else: # If we couldn't find it, then we'll need to pull From 225dc3b4cb8875fff52180d2f3b1e386dec26f4d Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 30 Jan 2018 10:17:55 +0000 Subject: [PATCH 170/348] Flatten _get_new_state_after_events rejig the if statements to simplify the logic and reduce indentation --- synapse/storage/events.py | 92 ++++++++++++++++++++------------------- 1 file changed, 47 insertions(+), 45 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 9bceded7ba..1b5dffe1c7 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -503,6 +503,10 @@ class EventsStore(SQLBaseStore): None if there are no changes to the room state, or a dict of (type, state_key) -> event_id]. """ + + if not new_latest_event_ids: + defer.returnValue({}) + state_sets = [] state_groups = set() missing_event_ids = [] @@ -537,6 +541,9 @@ class EventsStore(SQLBaseStore): was_updated = True missing_event_ids.append(event_id) + if not was_updated: + return + if missing_event_ids: # Now pull out the state for any missing events from DB event_to_groups = yield self._get_state_group_for_events( @@ -549,54 +556,49 @@ class EventsStore(SQLBaseStore): group_to_state = yield self._get_state_for_groups(groups) state_sets.extend(group_to_state.itervalues()) - if not new_latest_event_ids: - defer.returnValue({}) - elif was_updated: - if len(state_sets) == 1: - # If there is only one state set, then we know what the current - # state is. - defer.returnValue(state_sets[0]) - else: - # We work out the current state by passing the state sets to the - # state resolution algorithm. It may ask for some events, including - # the events we have yet to persist, so we need a slightly more - # complicated event lookup function than simply looking the events - # up in the db. + if len(state_sets) == 1: + # If there is only one state set, then we know what the current + # state is. + defer.returnValue(state_sets[0]) - logger.info( - "Resolving state with %i state sets", len(state_sets), + # We work out the current state by passing the state sets to the + # state resolution algorithm. It may ask for some events, including + # the events we have yet to persist, so we need a slightly more + # complicated event lookup function than simply looking the events + # up in the db. + + logger.info( + "Resolving state with %i state sets", len(state_sets), + ) + + events_map = {ev.event_id: ev for ev, _ in events_context} + + @defer.inlineCallbacks + def get_events(ev_ids): + # We get the events by first looking at the list of events we + # are trying to persist, and then fetching the rest from the DB. + db = [] + to_return = {} + for ev_id in ev_ids: + ev = events_map.get(ev_id, None) + if ev: + to_return[ev_id] = ev + else: + db.append(ev_id) + + if db: + evs = yield self.get_events( + ev_ids, get_prev_content=False, check_redacted=False, ) + to_return.update(evs) + defer.returnValue(to_return) - events_map = {ev.event_id: ev for ev, _ in events_context} - - @defer.inlineCallbacks - def get_events(ev_ids): - # We get the events by first looking at the list of events we - # are trying to persist, and then fetching the rest from the DB. - db = [] - to_return = {} - for ev_id in ev_ids: - ev = events_map.get(ev_id, None) - if ev: - to_return[ev_id] = ev - else: - db.append(ev_id) - - if db: - evs = yield self.get_events( - ev_ids, get_prev_content=False, check_redacted=False, - ) - to_return.update(evs) - defer.returnValue(to_return) - - current_state = yield resolve_events_with_factory( - state_sets, - event_map={}, - state_map_factory=get_events, - ) - defer.returnValue(current_state) - else: - return + current_state = yield resolve_events_with_factory( + state_sets, + event_map={}, + state_map_factory=get_events, + ) + defer.returnValue(current_state) @defer.inlineCallbacks def _calculate_state_delta(self, room_id, current_state): From ebfe64e3d69d0047ee9902a05beaf0249f11e072 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 30 Jan 2018 11:06:15 +0000 Subject: [PATCH 171/348] Use StateResolutionHandler to resolve state in persist events ... and thus benefit (hopefully) from its cache. --- synapse/storage/events.py | 72 +++++++++++++-------------------------- 1 file changed, 24 insertions(+), 48 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 1b5dffe1c7..ca1d4a3986 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -27,7 +27,6 @@ from synapse.util.logutils import log_function from synapse.util.metrics import Measure from synapse.api.constants import EventTypes from synapse.api.errors import SynapseError -from synapse.state import resolve_events_with_factory from synapse.util.caches.descriptors import cached from synapse.types import get_domain_from_id @@ -237,6 +236,8 @@ class EventsStore(SQLBaseStore): self._event_persist_queue = _EventPeristenceQueue() + self._state_resolution_handler = hs.get_state_resolution_handler() + def persist_events(self, events_and_contexts, backfilled=False): """ Write events to the database @@ -402,6 +403,7 @@ class EventsStore(SQLBaseStore): "Calculating state delta for room %s", room_id, ) current_state = yield self._get_new_state_after_events( + room_id, ev_ctx_rm, new_latest_event_ids, ) if current_state is not None: @@ -487,11 +489,14 @@ class EventsStore(SQLBaseStore): defer.returnValue(new_latest_event_ids) @defer.inlineCallbacks - def _get_new_state_after_events(self, events_context, new_latest_event_ids): + def _get_new_state_after_events(self, room_id, events_context, new_latest_event_ids): """Calculate the current state dict after adding some new events to a room Args: + room_id (str): + room to which the events are being added. Used for logging etc + events_context (list[(EventBase, EventContext)]): events and contexts which are being added to the room @@ -507,8 +512,8 @@ class EventsStore(SQLBaseStore): if not new_latest_event_ids: defer.returnValue({}) - state_sets = [] - state_groups = set() + # map from state_group to ((type, key) -> event_id) state map + state_groups = {} missing_event_ids = [] was_updated = False for event_id in new_latest_event_ids: @@ -529,11 +534,9 @@ class EventsStore(SQLBaseStore): # If we've already seen the state group don't bother adding # it to the state sets again if ctx.state_group not in state_groups: - state_sets.append(ctx.current_state_ids) + state_groups[ctx.state_group] = ctx.current_state_ids if ctx.delta_ids or hasattr(ev, "state_key"): was_updated = True - # Add this as a seen state group - state_groups.add(ctx.state_group) break else: # If we couldn't find it, then we'll need to pull @@ -550,55 +553,28 @@ class EventsStore(SQLBaseStore): missing_event_ids, ) - groups = set(event_to_groups.itervalues()) - state_groups + groups = set(event_to_groups.itervalues()) - set(state_groups.iterkeys()) if groups: group_to_state = yield self._get_state_for_groups(groups) - state_sets.extend(group_to_state.itervalues()) + state_groups.update(group_to_state) - if len(state_sets) == 1: - # If there is only one state set, then we know what the current + if len(state_groups) == 1: + # If there is only one state group, then we know what the current # state is. - defer.returnValue(state_sets[0]) + defer.returnValue(state_groups.values()[0]) - # We work out the current state by passing the state sets to the - # state resolution algorithm. It may ask for some events, including - # the events we have yet to persist, so we need a slightly more - # complicated event lookup function than simply looking the events - # up in the db. - - logger.info( - "Resolving state with %i state sets", len(state_sets), - ) - - events_map = {ev.event_id: ev for ev, _ in events_context} - - @defer.inlineCallbacks def get_events(ev_ids): - # We get the events by first looking at the list of events we - # are trying to persist, and then fetching the rest from the DB. - db = [] - to_return = {} - for ev_id in ev_ids: - ev = events_map.get(ev_id, None) - if ev: - to_return[ev_id] = ev - else: - db.append(ev_id) - - if db: - evs = yield self.get_events( - ev_ids, get_prev_content=False, check_redacted=False, - ) - to_return.update(evs) - defer.returnValue(to_return) - - current_state = yield resolve_events_with_factory( - state_sets, - event_map={}, - state_map_factory=get_events, + return self.get_events( + ev_ids, get_prev_content=False, check_redacted=False, + ) + events_map = {ev.event_id: ev for ev, _ in events_context} + logger.debug("calling resolve_state_groups from preserve_events") + res = yield self._state_resolution_handler.resolve_state_groups( + room_id, state_groups, events_map, get_events ) - defer.returnValue(current_state) + + defer.returnValue(res.state) @defer.inlineCallbacks def _calculate_state_delta(self, room_id, current_state): From 3d33eef6fcbba474664a9bccdcb8822c6f72ee8c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 6 Feb 2018 14:31:24 +0000 Subject: [PATCH 172/348] Store state groups separately from events (#2784) * Split state group persist into seperate storage func * Add per database engine code for state group id gen * Move store_state_group to StateReadStore This allows other workers to use it, and so resolve state. * Hook up store_state_group * Fix tests * Rename _store_mult_state_groups_txn * Rename StateGroupReadStore * Remove redundant _have_persisted_state_group_txn * Update comments * Comment compute_event_context * Set start val for state_group_id_seq ... otherwise we try to recreate old state groups * Update comments * Don't store state for outliers * Update comment * Update docstring as state groups are ints --- synapse/events/snapshot.py | 4 +- synapse/handlers/federation.py | 24 +- synapse/replication/slave/storage/events.py | 4 +- synapse/state.py | 58 ++++- synapse/storage/__init__.py | 1 - synapse/storage/engines/postgres.py | 6 + synapse/storage/engines/sqlite3.py | 19 ++ synapse/storage/events.py | 10 +- .../schema/delta/47/state_group_seq.py | 37 +++ synapse/storage/state.py | 224 +++++++++--------- .../replication/slave/storage/test_events.py | 4 +- tests/test_state.py | 154 ++++++------ 12 files changed, 341 insertions(+), 204 deletions(-) create mode 100644 synapse/storage/schema/delta/47/state_group_seq.py diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index e9a732ff03..87e3fe7b97 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -25,7 +25,9 @@ class EventContext(object): The current state map excluding the current event. (type, state_key) -> event_id - state_group (int): state group id + state_group (int|None): state group id, if the state has been stored + as a state group. This is usually only None if e.g. the event is + an outlier. rejected (bool|str): A rejection reason if the event was rejected, else False diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 8ee9434c9b..643e813b1f 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1831,8 +1831,8 @@ class FederationHandler(BaseHandler): current_state = set(e.event_id for e in auth_events.values()) different_auth = event_auth_events - current_state - self._update_context_for_auth_events( - context, auth_events, event_key, + yield self._update_context_for_auth_events( + event, context, auth_events, event_key, ) if different_auth and not event.internal_metadata.is_outlier(): @@ -1913,8 +1913,8 @@ class FederationHandler(BaseHandler): # 4. Look at rejects and their proofs. # TODO. - self._update_context_for_auth_events( - context, auth_events, event_key, + yield self._update_context_for_auth_events( + event, context, auth_events, event_key, ) try: @@ -1923,11 +1923,15 @@ class FederationHandler(BaseHandler): logger.warn("Failed auth resolution for %r because %s", event, e) raise e - def _update_context_for_auth_events(self, context, auth_events, + @defer.inlineCallbacks + def _update_context_for_auth_events(self, event, context, auth_events, event_key): - """Update the state_ids in an event context after auth event resolution + """Update the state_ids in an event context after auth event resolution, + storing the changes as a new state group. Args: + event (Event): The event we're handling the context for + context (synapse.events.snapshot.EventContext): event context to be updated @@ -1950,7 +1954,13 @@ class FederationHandler(BaseHandler): context.prev_state_ids.update({ k: a.event_id for k, a in auth_events.iteritems() }) - context.state_group = self.store.get_next_state_group() + context.state_group = yield self.store.store_state_group( + event.event_id, + event.room_id, + prev_group=context.prev_group, + delta_ids=context.delta_ids, + current_state_ids=context.current_state_ids, + ) @defer.inlineCallbacks def construct_auth_difference(self, local_auth, remote_auth): diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index 29d7296b43..8acb5df0f3 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -19,7 +19,7 @@ from synapse.storage import DataStore from synapse.storage.event_federation import EventFederationStore from synapse.storage.event_push_actions import EventPushActionsStore from synapse.storage.roommember import RoomMemberStore -from synapse.storage.state import StateGroupReadStore +from synapse.storage.state import StateGroupWorkerStore from synapse.storage.stream import StreamStore from synapse.util.caches.stream_change_cache import StreamChangeCache from ._base import BaseSlavedStore @@ -37,7 +37,7 @@ logger = logging.getLogger(__name__) # the method descriptor on the DataStore and chuck them into our class. -class SlavedEventStore(StateGroupReadStore, BaseSlavedStore): +class SlavedEventStore(StateGroupWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): super(SlavedEventStore, self).__init__(db_conn, hs) diff --git a/synapse/state.py b/synapse/state.py index 273f9911ca..cc93bbcb6b 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -183,8 +183,15 @@ class StateHandler(object): def compute_event_context(self, event, old_state=None): """Build an EventContext structure for the event. + This works out what the current state should be for the event, and + generates a new state group if necessary. + Args: event (synapse.events.EventBase): + old_state (dict|None): The state at the event if it can't be + calculated from existing events. This is normally only specified + when receiving an event from federation where we don't have the + prev events for, e.g. when backfilling. Returns: synapse.events.snapshot.EventContext: """ @@ -208,15 +215,22 @@ class StateHandler(object): context.current_state_ids = {} context.prev_state_ids = {} context.prev_state_events = [] - context.state_group = self.store.get_next_state_group() + + # We don't store state for outliers, so we don't generate a state + # froup for it. + context.state_group = None + defer.returnValue(context) if old_state: + # We already have the state, so we don't need to calculate it. + # Let's just correctly fill out the context and create a + # new state group for it. + context = EventContext() context.prev_state_ids = { (s.type, s.state_key): s.event_id for s in old_state } - context.state_group = self.store.get_next_state_group() if event.is_state(): key = (event.type, event.state_key) @@ -229,6 +243,14 @@ class StateHandler(object): else: context.current_state_ids = context.prev_state_ids + context.state_group = yield self.store.store_state_group( + event.event_id, + event.room_id, + prev_group=None, + delta_ids=None, + current_state_ids=context.current_state_ids, + ) + context.prev_state_events = [] defer.returnValue(context) @@ -242,7 +264,8 @@ class StateHandler(object): context = EventContext() context.prev_state_ids = curr_state if event.is_state(): - context.state_group = self.store.get_next_state_group() + # If this is a state event then we need to create a new state + # group for the state after this event. key = (event.type, event.state_key) if key in context.prev_state_ids: @@ -253,24 +276,43 @@ class StateHandler(object): context.current_state_ids[key] = event.event_id if entry.state_group: + # If the state at the event has a state group assigned then + # we can use that as the prev group context.prev_group = entry.state_group context.delta_ids = { key: event.event_id } elif entry.prev_group: + # If the state at the event only has a prev group, then we can + # use that as a prev group too. context.prev_group = entry.prev_group context.delta_ids = dict(entry.delta_ids) context.delta_ids[key] = event.event_id - else: - if entry.state_group is None: - entry.state_group = self.store.get_next_state_group() - entry.state_id = entry.state_group - context.state_group = entry.state_group + context.state_group = yield self.store.store_state_group( + event.event_id, + event.room_id, + prev_group=context.prev_group, + delta_ids=context.delta_ids, + current_state_ids=context.current_state_ids, + ) + else: context.current_state_ids = context.prev_state_ids context.prev_group = entry.prev_group context.delta_ids = entry.delta_ids + if entry.state_group is None: + entry.state_group = yield self.store.store_state_group( + event.event_id, + event.room_id, + prev_group=entry.prev_group, + delta_ids=entry.delta_ids, + current_state_ids=context.current_state_ids, + ) + entry.state_id = entry.state_group + + context.state_group = entry.state_group + context.prev_state_events = [] defer.returnValue(context) diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index d01d46338a..f8fbd02ceb 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -124,7 +124,6 @@ class DataStore(RoomMemberStore, RoomStore, ) self._transaction_id_gen = IdGenerator(db_conn, "sent_transactions", "id") - self._state_groups_id_gen = IdGenerator(db_conn, "state_groups", "id") self._access_tokens_id_gen = IdGenerator(db_conn, "access_tokens", "id") self._event_reports_id_gen = IdGenerator(db_conn, "event_reports", "id") self._push_rule_id_gen = IdGenerator(db_conn, "push_rules", "id") diff --git a/synapse/storage/engines/postgres.py b/synapse/storage/engines/postgres.py index a6ae79dfad..8a0386c1a4 100644 --- a/synapse/storage/engines/postgres.py +++ b/synapse/storage/engines/postgres.py @@ -62,3 +62,9 @@ class PostgresEngine(object): def lock_table(self, txn, table): txn.execute("LOCK TABLE %s in EXCLUSIVE MODE" % (table,)) + + def get_next_state_group_id(self, txn): + """Returns an int that can be used as a new state_group ID + """ + txn.execute("SELECT nextval('state_group_id_seq')") + return txn.fetchone()[0] diff --git a/synapse/storage/engines/sqlite3.py b/synapse/storage/engines/sqlite3.py index 755c9a1f07..60f0fa7fb3 100644 --- a/synapse/storage/engines/sqlite3.py +++ b/synapse/storage/engines/sqlite3.py @@ -16,6 +16,7 @@ from synapse.storage.prepare_database import prepare_database import struct +import threading class Sqlite3Engine(object): @@ -24,6 +25,11 @@ class Sqlite3Engine(object): def __init__(self, database_module, database_config): self.module = database_module + # The current max state_group, or None if we haven't looked + # in the DB yet. + self._current_state_group_id = None + self._current_state_group_id_lock = threading.Lock() + def check_database(self, txn): pass @@ -43,6 +49,19 @@ class Sqlite3Engine(object): def lock_table(self, txn, table): return + def get_next_state_group_id(self, txn): + """Returns an int that can be used as a new state_group ID + """ + # We do application locking here since if we're using sqlite then + # we are a single process synapse. + with self._current_state_group_id_lock: + if self._current_state_group_id is None: + txn.execute("SELECT COALESCE(max(id), 0) FROM state_groups") + self._current_state_group_id = txn.fetchone()[0] + + self._current_state_group_id += 1 + return self._current_state_group_id + # Following functions taken from: https://github.com/coleifer/peewee diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 2fead9eb0f..af56f1ee57 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -755,9 +755,8 @@ class EventsStore(SQLBaseStore): events_and_contexts=events_and_contexts, ) - # Insert into the state_groups, state_groups_state, and - # event_to_state_groups tables. - self._store_mult_state_groups_txn(txn, events_and_contexts) + # Insert into event_to_state_groups. + self._store_event_state_mappings_txn(txn, events_and_contexts) # _store_rejected_events_txn filters out any events which were # rejected, and returns the filtered list. @@ -992,10 +991,9 @@ class EventsStore(SQLBaseStore): # an outlier in the database. We now have some state at that # so we need to update the state_groups table with that state. - # insert into the state_group, state_groups_state and - # event_to_state_groups tables. + # insert into event_to_state_groups. try: - self._store_mult_state_groups_txn(txn, ((event, context),)) + self._store_event_state_mappings_txn(txn, ((event, context),)) except Exception: logger.exception("") raise diff --git a/synapse/storage/schema/delta/47/state_group_seq.py b/synapse/storage/schema/delta/47/state_group_seq.py new file mode 100644 index 0000000000..f6766501d2 --- /dev/null +++ b/synapse/storage/schema/delta/47/state_group_seq.py @@ -0,0 +1,37 @@ +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from synapse.storage.engines import PostgresEngine + + +def run_create(cur, database_engine, *args, **kwargs): + if isinstance(database_engine, PostgresEngine): + # if we already have some state groups, we want to start making new + # ones with a higher id. + cur.execute("SELECT max(id) FROM state_groups") + row = cur.fetchone() + + if row[0] is None: + start_val = 1 + else: + start_val = row[0] + 1 + + cur.execute( + "CREATE SEQUENCE state_group_id_seq START WITH %s", + (start_val, ), + ) + + +def run_upgrade(*args, **kwargs): + pass diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 360e3e4355..adb48df73e 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -42,11 +42,8 @@ class _GetStateGroupDelta(namedtuple("_GetStateGroupDelta", ("prev_group", "delt return len(self.delta_ids) if self.delta_ids else 0 -class StateGroupReadStore(SQLBaseStore): - """The read-only parts of StateGroupStore - - None of these functions write to the state tables, so are suitable for - including in the SlavedStores. +class StateGroupWorkerStore(SQLBaseStore): + """The parts of StateGroupStore that can be called from workers. """ STATE_GROUP_DEDUPLICATION_UPDATE_NAME = "state_group_state_deduplication" @@ -54,7 +51,7 @@ class StateGroupReadStore(SQLBaseStore): CURRENT_STATE_INDEX_UPDATE_NAME = "current_state_members_idx" def __init__(self, db_conn, hs): - super(StateGroupReadStore, self).__init__(db_conn, hs) + super(StateGroupWorkerStore, self).__init__(db_conn, hs) self._state_group_cache = DictionaryCache( "*stateGroupCache*", 100000 * CACHE_SIZE_FACTOR @@ -549,8 +546,117 @@ class StateGroupReadStore(SQLBaseStore): defer.returnValue(results) + def store_state_group(self, event_id, room_id, prev_group, delta_ids, + current_state_ids): + """Store a new set of state, returning a newly assigned state group. -class StateStore(StateGroupReadStore, BackgroundUpdateStore): + Args: + event_id (str): The event ID for which the state was calculated + room_id (str) + prev_group (int|None): A previous state group for the room, optional. + delta_ids (dict|None): The delta between state at `prev_group` and + `current_state_ids`, if `prev_group` was given. Same format as + `current_state_ids`. + current_state_ids (dict): The state to store. Map of (type, state_key) + to event_id. + + Returns: + Deferred[int]: The state group ID + """ + def _store_state_group_txn(txn): + if current_state_ids is None: + # AFAIK, this can never happen + raise Exception("current_state_ids cannot be None") + + state_group = self.database_engine.get_next_state_group_id(txn) + + self._simple_insert_txn( + txn, + table="state_groups", + values={ + "id": state_group, + "room_id": room_id, + "event_id": event_id, + }, + ) + + # We persist as a delta if we can, while also ensuring the chain + # of deltas isn't tooo long, as otherwise read performance degrades. + if prev_group: + is_in_db = self._simple_select_one_onecol_txn( + txn, + table="state_groups", + keyvalues={"id": prev_group}, + retcol="id", + allow_none=True, + ) + if not is_in_db: + raise Exception( + "Trying to persist state with unpersisted prev_group: %r" + % (prev_group,) + ) + + potential_hops = self._count_state_group_hops_txn( + txn, prev_group + ) + if prev_group and potential_hops < MAX_STATE_DELTA_HOPS: + self._simple_insert_txn( + txn, + table="state_group_edges", + values={ + "state_group": state_group, + "prev_state_group": prev_group, + }, + ) + + self._simple_insert_many_txn( + txn, + table="state_groups_state", + values=[ + { + "state_group": state_group, + "room_id": room_id, + "type": key[0], + "state_key": key[1], + "event_id": state_id, + } + for key, state_id in delta_ids.iteritems() + ], + ) + else: + self._simple_insert_many_txn( + txn, + table="state_groups_state", + values=[ + { + "state_group": state_group, + "room_id": room_id, + "type": key[0], + "state_key": key[1], + "event_id": state_id, + } + for key, state_id in current_state_ids.iteritems() + ], + ) + + # Prefill the state group cache with this group. + # It's fine to use the sequence like this as the state group map + # is immutable. (If the map wasn't immutable then this prefill could + # race with another update) + txn.call_after( + self._state_group_cache.update, + self._state_group_cache.sequence, + key=state_group, + value=dict(current_state_ids), + full=True, + ) + + return state_group + + return self.runInteraction("store_state_group", _store_state_group_txn) + + +class StateStore(StateGroupWorkerStore, BackgroundUpdateStore): """ Keeps track of the state at a given event. This is done by the concept of `state groups`. Every event is a assigned @@ -591,27 +697,12 @@ class StateStore(StateGroupReadStore, BackgroundUpdateStore): where_clause="type='m.room.member'", ) - def _have_persisted_state_group_txn(self, txn, state_group): - txn.execute( - "SELECT count(*) FROM state_groups WHERE id = ?", - (state_group,) - ) - row = txn.fetchone() - return row and row[0] - - def _store_mult_state_groups_txn(self, txn, events_and_contexts): + def _store_event_state_mappings_txn(self, txn, events_and_contexts): state_groups = {} for event, context in events_and_contexts: if event.internal_metadata.is_outlier(): continue - if context.current_state_ids is None: - # AFAIK, this can never happen - logger.error( - "Non-outlier event %s had current_state_ids==None", - event.event_id) - continue - # if the event was rejected, just give it the same state as its # predecessor. if context.rejected: @@ -620,90 +711,6 @@ class StateStore(StateGroupReadStore, BackgroundUpdateStore): state_groups[event.event_id] = context.state_group - if self._have_persisted_state_group_txn(txn, context.state_group): - continue - - self._simple_insert_txn( - txn, - table="state_groups", - values={ - "id": context.state_group, - "room_id": event.room_id, - "event_id": event.event_id, - }, - ) - - # We persist as a delta if we can, while also ensuring the chain - # of deltas isn't tooo long, as otherwise read performance degrades. - if context.prev_group: - is_in_db = self._simple_select_one_onecol_txn( - txn, - table="state_groups", - keyvalues={"id": context.prev_group}, - retcol="id", - allow_none=True, - ) - if not is_in_db: - raise Exception( - "Trying to persist state with unpersisted prev_group: %r" - % (context.prev_group,) - ) - - potential_hops = self._count_state_group_hops_txn( - txn, context.prev_group - ) - if context.prev_group and potential_hops < MAX_STATE_DELTA_HOPS: - self._simple_insert_txn( - txn, - table="state_group_edges", - values={ - "state_group": context.state_group, - "prev_state_group": context.prev_group, - }, - ) - - self._simple_insert_many_txn( - txn, - table="state_groups_state", - values=[ - { - "state_group": context.state_group, - "room_id": event.room_id, - "type": key[0], - "state_key": key[1], - "event_id": state_id, - } - for key, state_id in context.delta_ids.iteritems() - ], - ) - else: - self._simple_insert_many_txn( - txn, - table="state_groups_state", - values=[ - { - "state_group": context.state_group, - "room_id": event.room_id, - "type": key[0], - "state_key": key[1], - "event_id": state_id, - } - for key, state_id in context.current_state_ids.iteritems() - ], - ) - - # Prefill the state group cache with this group. - # It's fine to use the sequence like this as the state group map - # is immutable. (If the map wasn't immutable then this prefill could - # race with another update) - txn.call_after( - self._state_group_cache.update, - self._state_group_cache.sequence, - key=context.state_group, - value=dict(context.current_state_ids), - full=True, - ) - self._simple_insert_many_txn( txn, table="event_to_state_groups", @@ -763,9 +770,6 @@ class StateStore(StateGroupReadStore, BackgroundUpdateStore): return count - def get_next_state_group(self): - return self._state_groups_id_gen.get_next() - @defer.inlineCallbacks def _background_deduplicate_state(self, progress, batch_size): """This background update will slowly deduplicate state by reencoding diff --git a/tests/replication/slave/storage/test_events.py b/tests/replication/slave/storage/test_events.py index 105e1228bb..f430cce931 100644 --- a/tests/replication/slave/storage/test_events.py +++ b/tests/replication/slave/storage/test_events.py @@ -226,11 +226,9 @@ class SlavedEventStoreTestCase(BaseSlavedStoreTestCase): context = EventContext() context.current_state_ids = state_ids context.prev_state_ids = state_ids - elif not backfill: + else: state_handler = self.hs.get_state_handler() context = yield state_handler.compute_event_context(event) - else: - context = EventContext() context.push_actions = push_actions diff --git a/tests/test_state.py b/tests/test_state.py index d16e1b3b8b..a5c5e55951 100644 --- a/tests/test_state.py +++ b/tests/test_state.py @@ -80,14 +80,14 @@ class StateGroupStore(object): return defer.succeed(groups) - def store_state_groups(self, event, context): - if context.current_state_ids is None: - return + def store_state_group(self, event_id, room_id, prev_group, delta_ids, + current_state_ids): + state_group = self._next_group + self._next_group += 1 - state_events = dict(context.current_state_ids) + self._group_to_state[state_group] = dict(current_state_ids) - self._group_to_state[context.state_group] = state_events - self._event_to_state_group[event.event_id] = context.state_group + return state_group def get_events(self, event_ids, **kwargs): return { @@ -95,10 +95,19 @@ class StateGroupStore(object): if e_id in self._event_id_to_event } + def get_state_group_delta(self, name): + return (None, None) + def register_events(self, events): for e in events: self._event_id_to_event[e.event_id] = e + def register_event_context(self, event, context): + self._event_to_state_group[event.event_id] = context.state_group + + def register_event_id_state_group(self, event_id, state_group): + self._event_to_state_group[event_id] = state_group + class DictObj(dict): def __init__(self, **kwargs): @@ -137,15 +146,7 @@ class Graph(object): class StateTestCase(unittest.TestCase): def setUp(self): - self.store = Mock( - spec_set=[ - "get_state_groups_ids", - "add_event_hashes", - "get_events", - "get_next_state_group", - "get_state_group_delta", - ] - ) + self.store = StateGroupStore() hs = Mock(spec_set=[ "get_datastore", "get_auth", "get_state_handler", "get_clock", "get_state_resolution_handler", @@ -156,9 +157,6 @@ class StateTestCase(unittest.TestCase): hs.get_auth.return_value = Auth(hs) hs.get_state_resolution_handler = lambda: StateResolutionHandler(hs) - self.store.get_next_state_group.side_effect = Mock - self.store.get_state_group_delta.return_value = (None, None) - self.state = StateHandler(hs) self.event_id = 0 @@ -197,14 +195,13 @@ class StateTestCase(unittest.TestCase): } ) - store = StateGroupStore() - self.store.get_state_groups_ids.side_effect = store.get_state_groups_ids + self.store.register_events(graph.walk()) context_store = {} for event in graph.walk(): context = yield self.state.compute_event_context(event) - store.store_state_groups(event, context) + self.store.register_event_context(event, context) context_store[event.event_id] = context self.assertEqual(2, len(context_store["D"].prev_state_ids)) @@ -249,16 +246,13 @@ class StateTestCase(unittest.TestCase): } ) - store = StateGroupStore() - self.store.get_state_groups_ids.side_effect = store.get_state_groups_ids - self.store.get_events = store.get_events - store.register_events(graph.walk()) + self.store.register_events(graph.walk()) context_store = {} for event in graph.walk(): context = yield self.state.compute_event_context(event) - store.store_state_groups(event, context) + self.store.register_event_context(event, context) context_store[event.event_id] = context self.assertSetEqual( @@ -315,16 +309,13 @@ class StateTestCase(unittest.TestCase): } ) - store = StateGroupStore() - self.store.get_state_groups_ids.side_effect = store.get_state_groups_ids - self.store.get_events = store.get_events - store.register_events(graph.walk()) + self.store.register_events(graph.walk()) context_store = {} for event in graph.walk(): context = yield self.state.compute_event_context(event) - store.store_state_groups(event, context) + self.store.register_event_context(event, context) context_store[event.event_id] = context self.assertSetEqual( @@ -398,16 +389,13 @@ class StateTestCase(unittest.TestCase): self._add_depths(nodes, edges) graph = Graph(nodes, edges) - store = StateGroupStore() - self.store.get_state_groups_ids.side_effect = store.get_state_groups_ids - self.store.get_events = store.get_events - store.register_events(graph.walk()) + self.store.register_events(graph.walk()) context_store = {} for event in graph.walk(): context = yield self.state.compute_event_context(event) - store.store_state_groups(event, context) + self.store.register_event_context(event, context) context_store[event.event_id] = context self.assertSetEqual( @@ -467,7 +455,11 @@ class StateTestCase(unittest.TestCase): @defer.inlineCallbacks def test_trivial_annotate_message(self): - event = create_event(type="test_message", name="event") + prev_event_id = "prev_event_id" + event = create_event( + type="test_message", name="event2", + prev_events=[(prev_event_id, {})], + ) old_state = [ create_event(type="test1", state_key="1"), @@ -475,11 +467,11 @@ class StateTestCase(unittest.TestCase): create_event(type="test2", state_key=""), ] - group_name = "group_name_1" - - self.store.get_state_groups_ids.return_value = { - group_name: {(e.type, e.state_key): e.event_id for e in old_state}, - } + group_name = self.store.store_state_group( + prev_event_id, event.room_id, None, None, + {(e.type, e.state_key): e.event_id for e in old_state}, + ) + self.store.register_event_id_state_group(prev_event_id, group_name) context = yield self.state.compute_event_context(event) @@ -492,7 +484,11 @@ class StateTestCase(unittest.TestCase): @defer.inlineCallbacks def test_trivial_annotate_state(self): - event = create_event(type="state", state_key="", name="event") + prev_event_id = "prev_event_id" + event = create_event( + type="state", state_key="", name="event2", + prev_events=[(prev_event_id, {})], + ) old_state = [ create_event(type="test1", state_key="1"), @@ -500,11 +496,11 @@ class StateTestCase(unittest.TestCase): create_event(type="test2", state_key=""), ] - group_name = "group_name_1" - - self.store.get_state_groups_ids.return_value = { - group_name: {(e.type, e.state_key): e.event_id for e in old_state}, - } + group_name = self.store.store_state_group( + prev_event_id, event.room_id, None, None, + {(e.type, e.state_key): e.event_id for e in old_state}, + ) + self.store.register_event_id_state_group(prev_event_id, group_name) context = yield self.state.compute_event_context(event) @@ -517,7 +513,12 @@ class StateTestCase(unittest.TestCase): @defer.inlineCallbacks def test_resolve_message_conflict(self): - event = create_event(type="test_message", name="event") + prev_event_id1 = "event_id1" + prev_event_id2 = "event_id2" + event = create_event( + type="test_message", name="event3", + prev_events=[(prev_event_id1, {}), (prev_event_id2, {})], + ) creation = create_event( type=EventTypes.Create, state_key="" @@ -537,12 +538,12 @@ class StateTestCase(unittest.TestCase): create_event(type="test4", state_key=""), ] - store = StateGroupStore() - store.register_events(old_state_1) - store.register_events(old_state_2) - self.store.get_events = store.get_events + self.store.register_events(old_state_1) + self.store.register_events(old_state_2) - context = yield self._get_context(event, old_state_1, old_state_2) + context = yield self._get_context( + event, prev_event_id1, old_state_1, prev_event_id2, old_state_2, + ) self.assertEqual(len(context.current_state_ids), 6) @@ -550,7 +551,12 @@ class StateTestCase(unittest.TestCase): @defer.inlineCallbacks def test_resolve_state_conflict(self): - event = create_event(type="test4", state_key="", name="event") + prev_event_id1 = "event_id1" + prev_event_id2 = "event_id2" + event = create_event( + type="test4", state_key="", name="event", + prev_events=[(prev_event_id1, {}), (prev_event_id2, {})], + ) creation = create_event( type=EventTypes.Create, state_key="" @@ -575,7 +581,9 @@ class StateTestCase(unittest.TestCase): store.register_events(old_state_2) self.store.get_events = store.get_events - context = yield self._get_context(event, old_state_1, old_state_2) + context = yield self._get_context( + event, prev_event_id1, old_state_1, prev_event_id2, old_state_2, + ) self.assertEqual(len(context.current_state_ids), 6) @@ -583,7 +591,12 @@ class StateTestCase(unittest.TestCase): @defer.inlineCallbacks def test_standard_depth_conflict(self): - event = create_event(type="test4", name="event") + prev_event_id1 = "event_id1" + prev_event_id2 = "event_id2" + event = create_event( + type="test4", name="event", + prev_events=[(prev_event_id1, {}), (prev_event_id2, {})], + ) member_event = create_event( type=EventTypes.Member, @@ -615,7 +628,9 @@ class StateTestCase(unittest.TestCase): store.register_events(old_state_2) self.store.get_events = store.get_events - context = yield self._get_context(event, old_state_1, old_state_2) + context = yield self._get_context( + event, prev_event_id1, old_state_1, prev_event_id2, old_state_2, + ) self.assertEqual( old_state_2[2].event_id, context.current_state_ids[("test1", "1")] @@ -639,19 +654,26 @@ class StateTestCase(unittest.TestCase): store.register_events(old_state_1) store.register_events(old_state_2) - context = yield self._get_context(event, old_state_1, old_state_2) + context = yield self._get_context( + event, prev_event_id1, old_state_1, prev_event_id2, old_state_2, + ) self.assertEqual( old_state_1[2].event_id, context.current_state_ids[("test1", "1")] ) - def _get_context(self, event, old_state_1, old_state_2): - group_name_1 = "group_name_1" - group_name_2 = "group_name_2" + def _get_context(self, event, prev_event_id_1, old_state_1, prev_event_id_2, + old_state_2): + sg1 = self.store.store_state_group( + prev_event_id_1, event.room_id, None, None, + {(e.type, e.state_key): e.event_id for e in old_state_1}, + ) + self.store.register_event_id_state_group(prev_event_id_1, sg1) - self.store.get_state_groups_ids.return_value = { - group_name_1: {(e.type, e.state_key): e.event_id for e in old_state_1}, - group_name_2: {(e.type, e.state_key): e.event_id for e in old_state_2}, - } + sg2 = self.store.store_state_group( + prev_event_id_2, event.room_id, None, None, + {(e.type, e.state_key): e.event_id for e in old_state_2}, + ) + self.store.register_event_id_state_group(prev_event_id_2, sg2) return self.state.compute_event_context(event) From 770b2252ca9b5a74c3e767bfe0c5e7bb2e84ebd3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 6 Feb 2018 16:31:50 +0000 Subject: [PATCH 173/348] s/_create_new_client_event/create_new_client_event/ --- synapse/handlers/federation.py | 10 +++++----- synapse/handlers/message.py | 6 +++--- tests/storage/test_redaction.py | 6 +++--- tests/storage/test_roommember.py | 2 +- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index e6b9f5cf53..06d6c8425b 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1008,7 +1008,7 @@ class FederationHandler(BaseHandler): }) try: - event, context = yield self.event_creation_handler._create_new_client_event( + event, context = yield self.event_creation_handler.create_new_client_event( builder=builder, ) except AuthError as e: @@ -1248,7 +1248,7 @@ class FederationHandler(BaseHandler): "state_key": user_id, }) - event, context = yield self.event_creation_handler._create_new_client_event( + event, context = yield self.event_creation_handler.create_new_client_event( builder=builder, ) @@ -2119,7 +2119,7 @@ class FederationHandler(BaseHandler): if (yield self.auth.check_host_in_room(room_id, self.hs.hostname)): builder = self.event_builder_factory.new(event_dict) EventValidator().validate_new(builder) - event, context = yield self.event_creation_handler._create_new_client_event( + event, context = yield self.event_creation_handler.create_new_client_event( builder=builder ) @@ -2157,7 +2157,7 @@ class FederationHandler(BaseHandler): """ builder = self.event_builder_factory.new(event_dict) - event, context = yield self.event_creation_handler._create_new_client_event( + event, context = yield self.event_creation_handler.create_new_client_event( builder=builder, ) @@ -2207,7 +2207,7 @@ class FederationHandler(BaseHandler): builder = self.event_builder_factory.new(event_dict) EventValidator().validate_new(builder) - event, context = yield self.event_creation_handler._create_new_client_event( + event, context = yield self.event_creation_handler.create_new_client_event( builder=builder, ) defer.returnValue((event, context)) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index afa19bf653..e8e6a89a3c 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -378,7 +378,7 @@ class EventCreationHandler(object): if txn_id is not None: builder.internal_metadata.txn_id = txn_id - event, context = yield self._create_new_client_event( + event, context = yield self.create_new_client_event( builder=builder, requester=requester, prev_event_ids=prev_event_ids, @@ -486,9 +486,9 @@ class EventCreationHandler(object): ) defer.returnValue(event) - @measure_func("_create_new_client_event") + @measure_func("create_new_client_event") @defer.inlineCallbacks - def _create_new_client_event(self, builder, requester=None, prev_event_ids=None): + def create_new_client_event(self, builder, requester=None, prev_event_ids=None): if prev_event_ids: prev_events = yield self.store.add_event_hashes(prev_event_ids) prev_max_depth = yield self.store.get_max_depth_of_events(prev_event_ids) diff --git a/tests/storage/test_redaction.py b/tests/storage/test_redaction.py index de6d7904e3..888ddfaddd 100644 --- a/tests/storage/test_redaction.py +++ b/tests/storage/test_redaction.py @@ -58,7 +58,7 @@ class RedactionTestCase(unittest.TestCase): "content": content, }) - event, context = yield self.event_creation_handler._create_new_client_event( + event, context = yield self.event_creation_handler.create_new_client_event( builder ) @@ -78,7 +78,7 @@ class RedactionTestCase(unittest.TestCase): "content": {"body": body, "msgtype": u"message"}, }) - event, context = yield self.event_creation_handler._create_new_client_event( + event, context = yield self.event_creation_handler.create_new_client_event( builder ) @@ -97,7 +97,7 @@ class RedactionTestCase(unittest.TestCase): "redacts": event_id, }) - event, context = yield self.event_creation_handler._create_new_client_event( + event, context = yield self.event_creation_handler.create_new_client_event( builder ) diff --git a/tests/storage/test_roommember.py b/tests/storage/test_roommember.py index 4aff38bd58..657b279e5d 100644 --- a/tests/storage/test_roommember.py +++ b/tests/storage/test_roommember.py @@ -57,7 +57,7 @@ class RoomMemberStoreTestCase(unittest.TestCase): "content": {"membership": membership}, }) - event, context = yield self.event_creation_handler._create_new_client_event( + event, context = yield self.event_creation_handler.create_new_client_event( builder ) From 3e1e69ccafbfdf8aa7c0cd06bc4eaf948a6bafdf Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 6 Feb 2018 16:40:38 +0000 Subject: [PATCH 174/348] Update copyright --- synapse/handlers/federation.py | 1 + synapse/handlers/message.py | 2 +- synapse/handlers/room.py | 1 + synapse/handlers/room_member.py | 1 + synapse/rest/client/v1/admin.py | 1 + synapse/rest/client/v1/room.py | 1 + 6 files changed, 6 insertions(+), 1 deletion(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 06d6c8425b..cba96111d1 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index e8e6a89a3c..1540721815 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014 - 2016 OpenMarket Ltd -# Copyright 2017 New Vector Ltd +# Copyright 2017 - 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 4ea5bf1bcf..6ab020bf41 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014 - 2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index ab58beb0f5..37dc5e99ab 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index f77f646670..20c5c66632 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index ad6534537a..fbb2fc36e4 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From e3624fad5f0dfd3cffcbb7c996a8d29bb2c79dbd Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 7 Feb 2018 10:30:25 +0000 Subject: [PATCH 175/348] Remove pointless ratelimit check The intention was for the check to be called as early as possible in the request, but actually was called just before the main ratelimit check, so was fairly pointless. --- synapse/handlers/message.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 1540721815..a58fc37fff 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -403,11 +403,6 @@ class EventCreationHandler(object): "Tried to send member event through non-member codepath" ) - # We check here if we are currently being rate limited, so that we - # don't do unnecessary work. We check again just before we actually - # send the event. - yield self.base_handler.ratelimit(requester, update=False) - user = UserID.from_string(event.sender) assert self.hs.is_mine(user), "User must be our own: %s" % (user,) From 24dd73028ad7fc7a5109a5d97eef5a79179225d4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 5 Feb 2018 17:22:16 +0000 Subject: [PATCH 176/348] Add replication http endpoint for event sending --- synapse/app/homeserver.py | 4 + synapse/config/workers.py | 8 ++ synapse/events/snapshot.py | 72 +++++++++++++++++ synapse/handlers/message.py | 16 ++++ synapse/replication/http/__init__.py | 31 +++++++ synapse/replication/http/send_event.py | 108 +++++++++++++++++++++++++ synapse/storage/appservice.py | 13 +++ synapse/types.py | 63 ++++++++++++--- 8 files changed, 303 insertions(+), 12 deletions(-) create mode 100644 synapse/replication/http/__init__.py create mode 100644 synapse/replication/http/send_event.py diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index cb82a415a6..e375f2bbcf 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -38,6 +38,7 @@ from synapse.metrics import register_memory_metrics from synapse.metrics.resource import METRICS_PREFIX, MetricsResource from synapse.python_dependencies import CONDITIONAL_REQUIREMENTS, \ check_requirements +from synapse.replication.http import ReplicationRestResource, REPLICATION_PREFIX from synapse.replication.tcp.resource import ReplicationStreamProtocolFactory from synapse.rest import ClientRestResource from synapse.rest.key.v1.server_key_resource import LocalKey @@ -219,6 +220,9 @@ class SynapseHomeServer(HomeServer): if name == "metrics" and self.get_config().enable_metrics: resources[METRICS_PREFIX] = MetricsResource(self) + if name == "replication": + resources[REPLICATION_PREFIX] = ReplicationRestResource(self) + return resources def start_listening(self): diff --git a/synapse/config/workers.py b/synapse/config/workers.py index 4b6884918d..80baf0ce0e 100644 --- a/synapse/config/workers.py +++ b/synapse/config/workers.py @@ -33,8 +33,16 @@ class WorkerConfig(Config): self.worker_pid_file = config.get("worker_pid_file") self.worker_log_file = config.get("worker_log_file") self.worker_log_config = config.get("worker_log_config") + + # The host used to connect to the main synapse self.worker_replication_host = config.get("worker_replication_host", None) + + # The port on the main synapse for TCP replication self.worker_replication_port = config.get("worker_replication_port", None) + + # The port on the main synapse for HTTP replication endpoint + self.worker_replication_http_port = config.get("worker_replication_http_port") + self.worker_name = config.get("worker_name", self.worker_app) self.worker_main_http_uri = config.get("worker_main_http_uri", None) diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index 87e3fe7b97..7b80444f73 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -14,6 +14,9 @@ # limitations under the License. +from frozendict import frozendict + + class EventContext(object): """ Attributes: @@ -73,3 +76,72 @@ class EventContext(object): self.prev_state_events = None self.app_service = None + + def serialize(self): + """Converts self to a type that can be serialized as JSON, and then + deserialized by `deserialize` + + Returns: + dict + """ + return { + "current_state_ids": _encode_state_dict(self.current_state_ids), + "prev_state_ids": _encode_state_dict(self.prev_state_ids), + "state_group": self.state_group, + "rejected": self.rejected, + "push_actions": self.push_actions, + "prev_group": self.prev_group, + "delta_ids": _encode_state_dict(self.delta_ids), + "prev_state_events": self.prev_state_events, + "app_service_id": self.app_service.id if self.app_service else None + } + + @staticmethod + def deserialize(store, input): + """Converts a dict that was produced by `serialize` back into a + EventContext. + + Args: + store (DataStore): Used to convert AS ID to AS object + input (dict): A dict produced by `serialize` + + Returns: + EventContext + """ + context = EventContext() + context.current_state_ids = _decode_state_dict(input["current_state_ids"]) + context.prev_state_ids = _decode_state_dict(input["prev_state_ids"]) + context.state_group = input["state_group"] + context.rejected = input["rejected"] + context.push_actions = input["push_actions"] + context.prev_group = input["prev_group"] + context.delta_ids = _decode_state_dict(input["delta_ids"]) + context.prev_state_events = input["prev_state_events"] + + app_service_id = input["app_service_id"] + if app_service_id: + context.app_service = store.get_app_service_by_id(app_service_id) + + return context + + +def _encode_state_dict(state_dict): + """Since dicts of (type, state_key) -> event_id cannot be serialized in + JSON we need to convert them to a form that can. + """ + if state_dict is None: + return None + + return [ + (etype, state_key, v) + for (etype, state_key), v in state_dict.iteritems() + ] + + +def _decode_state_dict(input): + """Decodes a state dict encoded using `_encode_state_dict` above + """ + if input is None: + return None + + return frozendict({(etype, state_key,): v for etype, state_key, v in input}) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index a58fc37fff..92c153f300 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -28,6 +28,7 @@ from synapse.util.logcontext import preserve_fn from synapse.util.metrics import measure_func from synapse.util.frozenutils import unfreeze from synapse.visibility import filter_events_for_client +from synapse.replication.http.send_event import send_event_to_master from ._base import BaseHandler @@ -312,6 +313,9 @@ class EventCreationHandler(object): self.server_name = hs.hostname self.ratelimiter = hs.get_ratelimiter() self.notifier = hs.get_notifier() + self.config = hs.config + + self.http_client = hs.get_simple_http_client() # This is only used to get at ratelimit function, and maybe_kick_guest_users self.base_handler = BaseHandler(hs) @@ -559,6 +563,18 @@ class EventCreationHandler(object): ): # We now need to go and hit out to wherever we need to hit out to. + # If we're a worker we need to hit out to the master. + if self.config.worker_app: + yield send_event_to_master( + self.http_client, + host=self.config.worker_replication_host, + port=self.config.worker_replication_http_port, + requester=requester, + event=event, + context=context, + ) + return + if ratelimit: yield self.base_handler.ratelimit(requester) diff --git a/synapse/replication/http/__init__.py b/synapse/replication/http/__init__.py new file mode 100644 index 0000000000..b378b41646 --- /dev/null +++ b/synapse/replication/http/__init__.py @@ -0,0 +1,31 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import send_event + +from synapse.http.server import JsonResource + + +REPLICATION_PREFIX = "/_synapse/replication" + + +class ReplicationRestResource(JsonResource): + def __init__(self, hs): + JsonResource.__init__(self, hs, canonical_json=False) + self.register_servlets(hs) + + def register_servlets(self, hs): + send_event.register_servlets(hs, self) diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py new file mode 100644 index 0000000000..ff9b9d2f10 --- /dev/null +++ b/synapse/replication/http/send_event.py @@ -0,0 +1,108 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet import defer + +from synapse.events import FrozenEvent +from synapse.events.snapshot import EventContext +from synapse.http.servlet import RestServlet, parse_json_object_from_request +from synapse.util.metrics import Measure +from synapse.types import Requester + +import logging +import re + +logger = logging.getLogger(__name__) + + +def send_event_to_master(client, host, port, requester, event, context): + """Send event to be handled on the master + + Args: + client (SimpleHttpClient) + host (str): host of master + port (int): port on master listening for HTTP replication + requester (Requester) + event (FrozenEvent) + context (EventContext) + """ + uri = "http://%s:%s/_synapse/replication/send_event" % (host, port,) + + payload = { + "event": event.get_pdu_json(), + "internal_metadata": event.internal_metadata.get_dict(), + "rejected_reason": event.rejected_reason, + "context": context.serialize(), + "requester": requester.serialize(), + } + + return client.post_json_get_json(uri, payload) + + +class ReplicationSendEventRestServlet(RestServlet): + """Handles events newly created on workers, including persisting and + notifying. + + The API looks like: + + POST /_synapse/replication/send_event + + { + "event": { .. serialized event .. }, + "internal_metadata": { .. serialized internal_metadata .. }, + "rejected_reason": .., // The event.rejected_reason field + "context": { .. serialized event context .. }, + "requester": { .. serialized requester .. }, + } + """ + PATTERNS = [re.compile("^/_synapse/replication/send_event$")] + + def __init__(self, hs): + super(ReplicationSendEventRestServlet, self).__init__() + + self.event_creation_handler = hs.get_event_creation_handler() + self.store = hs.get_datastore() + self.clock = hs.get_clock() + + @defer.inlineCallbacks + def on_POST(self, request): + with Measure(self.clock, "repl_send_event_parse"): + content = parse_json_object_from_request(request) + + event_dict = content["event"] + internal_metadata = content["internal_metadata"] + rejected_reason = content["rejected_reason"] + event = FrozenEvent(event_dict, internal_metadata, rejected_reason) + + requester = Requester.deserialize(self.store, content["requester"]) + context = EventContext.deserialize(self.store, content["context"]) + + if requester.user: + request.authenticated_entity = requester.user.to_string() + + logger.info( + "Got event to send with ID: %s into room: %s", + event.event_id, event.room_id, + ) + + yield self.event_creation_handler.handle_new_client_event( + requester, event, context, + ) + + defer.returnValue((200, {})) + + +def register_servlets(hs, http_server): + ReplicationSendEventRestServlet(hs).register(http_server) diff --git a/synapse/storage/appservice.py b/synapse/storage/appservice.py index d8c84b7141..79673b4273 100644 --- a/synapse/storage/appservice.py +++ b/synapse/storage/appservice.py @@ -99,6 +99,19 @@ class ApplicationServiceStore(SQLBaseStore): return service return None + def get_app_service_by_id(self, as_id): + """Get the application service with the given appservice ID. + + Args: + as_id (str): The application service ID. + Returns: + synapse.appservice.ApplicationService or None. + """ + for service in self.services_cache: + if service.id == as_id: + return service + return None + def get_app_service_rooms(self, service): """Get a list of RoomsForUser for this application service. diff --git a/synapse/types.py b/synapse/types.py index 6e76c016d9..7cb24cecb2 100644 --- a/synapse/types.py +++ b/synapse/types.py @@ -19,20 +19,59 @@ from synapse.api.errors import SynapseError from collections import namedtuple -Requester = namedtuple("Requester", [ +class Requester(namedtuple("Requester", [ "user", "access_token_id", "is_guest", "device_id", "app_service", -]) -""" -Represents the user making a request +])): + """ + Represents the user making a request -Attributes: - user (UserID): id of the user making the request - access_token_id (int|None): *ID* of the access token used for this - request, or None if it came via the appservice API or similar - is_guest (bool): True if the user making this request is a guest user - device_id (str|None): device_id which was set at authentication time - app_service (ApplicationService|None): the AS requesting on behalf of the user -""" + Attributes: + user (UserID): id of the user making the request + access_token_id (int|None): *ID* of the access token used for this + request, or None if it came via the appservice API or similar + is_guest (bool): True if the user making this request is a guest user + device_id (str|None): device_id which was set at authentication time + app_service (ApplicationService|None): the AS requesting on behalf of the user + """ + + def serialize(self): + """Converts self to a type that can be serialized as JSON, and then + deserialized by `deserialize` + + Returns: + dict + """ + return { + "user_id": self.user.to_string(), + "access_token_id": self.access_token_id, + "is_guest": self.is_guest, + "device_id": self.device_id, + "app_server_id": self.app_service.id if self.app_service else None, + } + + @staticmethod + def deserialize(store, input): + """Converts a dict that was produced by `serialize` back into a + Requester. + + Args: + store (DataStore): Used to convert AS ID to AS object + input (dict): A dict produced by `serialize` + + Returns: + Requester + """ + appservice = None + if input["app_server_id"]: + appservice = store.get_app_service_by_id(input["app_server_id"]) + + return Requester( + user=UserID.from_string(input["user_id"]), + access_token_id=input["access_token_id"], + is_guest=input["is_guest"], + device_id=input["device_id"], + app_service=appservice, + ) def create_requester(user_id, access_token_id=None, is_guest=False, From 8ec2e638be6f9205451d51dc839c94d0dd8999d4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 6 Feb 2018 10:55:40 +0000 Subject: [PATCH 177/348] Add event_creator worker --- synapse/app/event_creator.py | 170 ++++++++++++++++++++ synapse/replication/slave/storage/events.py | 20 +++ synapse/rest/client/v1/room.py | 1 - 3 files changed, 190 insertions(+), 1 deletion(-) create mode 100644 synapse/app/event_creator.py diff --git a/synapse/app/event_creator.py b/synapse/app/event_creator.py new file mode 100644 index 0000000000..b2ce399258 --- /dev/null +++ b/synapse/app/event_creator.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging +import sys + +import synapse +from synapse import events +from synapse.app import _base +from synapse.config._base import ConfigError +from synapse.config.homeserver import HomeServerConfig +from synapse.config.logger import setup_logging +from synapse.crypto import context_factory +from synapse.http.server import JsonResource +from synapse.http.site import SynapseSite +from synapse.metrics.resource import METRICS_PREFIX, MetricsResource +from synapse.replication.slave.storage._base import BaseSlavedStore +from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore +from synapse.replication.slave.storage.client_ips import SlavedClientIpStore +from synapse.replication.slave.storage.devices import SlavedDeviceStore +from synapse.replication.slave.storage.events import SlavedEventStore +from synapse.replication.slave.storage.registration import SlavedRegistrationStore +from synapse.replication.slave.storage.room import RoomStore +from synapse.replication.tcp.client import ReplicationClientHandler +from synapse.rest.client.v1.room import RoomSendEventRestServlet +from synapse.server import HomeServer +from synapse.storage.engines import create_engine +from synapse.util.httpresourcetree import create_resource_tree +from synapse.util.logcontext import LoggingContext +from synapse.util.manhole import manhole +from synapse.util.versionstring import get_version_string +from twisted.internet import reactor +from twisted.web.resource import Resource + +logger = logging.getLogger("synapse.app.event_creator") + + +class EventCreatorSlavedStore( + SlavedDeviceStore, + SlavedClientIpStore, + SlavedApplicationServiceStore, + SlavedEventStore, + SlavedRegistrationStore, + RoomStore, + BaseSlavedStore, +): + pass + + +class EventCreatorServer(HomeServer): + def setup(self): + logger.info("Setting up.") + self.datastore = EventCreatorSlavedStore(self.get_db_conn(), self) + logger.info("Finished setting up.") + + def _listen_http(self, listener_config): + port = listener_config["port"] + bind_addresses = listener_config["bind_addresses"] + site_tag = listener_config.get("tag", port) + resources = {} + for res in listener_config["resources"]: + for name in res["names"]: + if name == "metrics": + resources[METRICS_PREFIX] = MetricsResource(self) + elif name == "client": + resource = JsonResource(self, canonical_json=False) + RoomSendEventRestServlet(self).register(resource) + resources.update({ + "/_matrix/client/r0": resource, + "/_matrix/client/unstable": resource, + "/_matrix/client/v2_alpha": resource, + "/_matrix/client/api/v1": resource, + }) + + root_resource = create_resource_tree(resources, Resource()) + + _base.listen_tcp( + bind_addresses, + port, + SynapseSite( + "synapse.access.http.%s" % (site_tag,), + site_tag, + listener_config, + root_resource, + ) + ) + + logger.info("Synapse event creator now listening on port %d", port) + + def start_listening(self, listeners): + for listener in listeners: + if listener["type"] == "http": + self._listen_http(listener) + elif listener["type"] == "manhole": + _base.listen_tcp( + listener["bind_addresses"], + listener["port"], + manhole( + username="matrix", + password="rabbithole", + globals={"hs": self}, + ) + ) + else: + logger.warn("Unrecognized listener type: %s", listener["type"]) + + self.get_tcp_replication().start_replication(self) + + def build_tcp_replication(self): + return ReplicationClientHandler(self.get_datastore()) + + +def start(config_options): + try: + config = HomeServerConfig.load_config( + "Synapse event creator", config_options + ) + except ConfigError as e: + sys.stderr.write("\n" + e.message + "\n") + sys.exit(1) + + assert config.worker_app == "synapse.app.event_creator" + + assert config.worker_replication_http_port is not None + + setup_logging(config, use_worker_options=True) + + events.USE_FROZEN_DICTS = config.use_frozen_dicts + + database_engine = create_engine(config.database_config) + + tls_server_context_factory = context_factory.ServerContextFactory(config) + + ss = EventCreatorServer( + config.server_name, + db_config=config.database_config, + tls_server_context_factory=tls_server_context_factory, + config=config, + version_string="Synapse/" + get_version_string(synapse), + database_engine=database_engine, + ) + + ss.setup() + ss.get_handlers() + ss.start_listening(config.worker_listeners) + + def start(): + ss.get_state_handler().start_caching() + ss.get_datastore().start_profiling() + + reactor.callWhenRunning(start) + + _base.start_worker_reactor("synapse-event-creator", config) + + +if __name__ == '__main__': + with LoggingContext("main"): + start(sys.argv[1:]) diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index 8acb5df0f3..f8c164b48b 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -21,6 +21,7 @@ from synapse.storage.event_push_actions import EventPushActionsStore from synapse.storage.roommember import RoomMemberStore from synapse.storage.state import StateGroupWorkerStore from synapse.storage.stream import StreamStore +from synapse.storage.signatures import SignatureStore from synapse.util.caches.stream_change_cache import StreamChangeCache from ._base import BaseSlavedStore from ._slaved_id_tracker import SlavedIdTracker @@ -170,6 +171,25 @@ class SlavedEventStore(StateGroupWorkerStore, BaseSlavedStore): get_federation_out_pos = DataStore.get_federation_out_pos.__func__ update_federation_out_pos = DataStore.update_federation_out_pos.__func__ + get_latest_event_ids_and_hashes_in_room = ( + DataStore.get_latest_event_ids_and_hashes_in_room.__func__ + ) + _get_latest_event_ids_and_hashes_in_room = ( + DataStore._get_latest_event_ids_and_hashes_in_room.__func__ + ) + _get_event_reference_hashes_txn = ( + DataStore._get_event_reference_hashes_txn.__func__ + ) + add_event_hashes = ( + DataStore.add_event_hashes.__func__ + ) + get_event_reference_hashes = ( + SignatureStore.__dict__["get_event_reference_hashes"] + ) + get_event_reference_hash = ( + SignatureStore.__dict__["get_event_reference_hash"] + ) + def stream_positions(self): result = super(SlavedEventStore, self).stream_positions() result["events"] = self._stream_id_gen.get_current_token() diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index fbb2fc36e4..817fd47842 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -186,7 +186,6 @@ class RoomSendEventRestServlet(ClientV1RestServlet): def __init__(self, hs): super(RoomSendEventRestServlet, self).__init__(hs) - self.handlers = hs.get_handlers() self.event_creation_hander = hs.get_event_creation_handler() def register(self, http_server): From 50fe92cd26d06cd09a722521488bea2a9d1ffdea Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 6 Feb 2018 17:27:00 +0000 Subject: [PATCH 178/348] Move presence handling into handle_new_client_event As we want to have it run on the main synapse instance --- synapse/handlers/message.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 92c153f300..6be3f4d770 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -423,12 +423,6 @@ class EventCreationHandler(object): ratelimit=ratelimit, ) - if event.type == EventTypes.Message: - presence = self.hs.get_presence_handler() - # We don't want to block sending messages on any presence code. This - # matters as sometimes presence code can take a while. - preserve_fn(presence.bump_presence_active_time)(user) - @defer.inlineCallbacks def deduplicate_state_event(self, event, context): """ @@ -708,3 +702,9 @@ class EventCreationHandler(object): ) preserve_fn(_notify)() + + if event.type == EventTypes.Message: + presence = self.hs.get_presence_handler() + # We don't want to block sending messages on any presence code. This + # matters as sometimes presence code can take a while. + preserve_fn(presence.bump_presence_active_time)(requester.user) From f133228cb35b7803910688e7060772cb9e64f01a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 6 Feb 2018 17:23:13 +0000 Subject: [PATCH 179/348] Add note in docs/workers.rst --- docs/workers.rst | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/docs/workers.rst b/docs/workers.rst index b39f79058e..213d57e47c 100644 --- a/docs/workers.rst +++ b/docs/workers.rst @@ -207,3 +207,14 @@ the ``worker_main_http_uri`` setting in the frontend_proxy worker configuration file. For example:: worker_main_http_uri: http://127.0.0.1:8008 + + +``synapse.app.event_creator`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Handles non-state event creation. It can handle REST endpoints matching: + + ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/send + +It will create events locally and then send them on to the main synapse +instance to be persisted and handled. From 5fa571a91b851d372e07217c091b7e3a9ef3d116 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 7 Feb 2018 13:35:08 +0000 Subject: [PATCH 180/348] Tell storage providers about new file so they can upload --- synapse/rest/media/v1/media_storage.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py index e8e8b3986d..3f8d4b9c22 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py @@ -70,6 +70,12 @@ class MediaStorage(object): _write_file_synchronously, source, fname, )) + # Tell the storage providers about the new file. They'll decide + # if they should upload it and whether to do so synchronously + # or not. + for provider in self.storage_providers: + yield provider.store_file(path, file_info) + defer.returnValue(fname) @contextlib.contextmanager From 671540dccf3996620ffe65705904fb911e21fb68 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 7 Feb 2018 17:27:08 +0000 Subject: [PATCH 181/348] rename delete_old_state -> purge_history (beacause it deletes more than state) --- synapse/handlers/message.py | 2 +- synapse/storage/events.py | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 21f1717dd2..1c7860bb05 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -72,7 +72,7 @@ class MessageHandler(BaseHandler): depth = event.depth with (yield self.pagination_lock.write(room_id)): - yield self.store.delete_old_state(room_id, depth) + yield self.store.purge_history(room_id, depth) @defer.inlineCallbacks def get_messages(self, requester, room_id=None, pagin_config=None, diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 7a9cd3ec90..21533970d1 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2031,16 +2031,16 @@ class EventsStore(SQLBaseStore): ) return self.runInteraction("get_all_new_events", get_all_new_events_txn) - def delete_old_state(self, room_id, topological_ordering): - return self.runInteraction( - "delete_old_state", - self._delete_old_state_txn, room_id, topological_ordering - ) - - def _delete_old_state_txn(self, txn, room_id, topological_ordering): - """Deletes old room state + def purge_history(self, room_id, topological_ordering): + """Deletes room history before a certain point """ + return self.runInteraction( + "purge_history", + self._purge_history_txn, room_id, topological_ordering + ) + + def _purge_history_txn(self, txn, room_id, topological_ordering): # Tables that should be pruned: # event_auth # event_backward_extremities From 61ffaa8137ac962f84a077bb53c4a1b06b21b49b Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 7 Feb 2018 17:34:35 +0000 Subject: [PATCH 182/348] bump purge logging to info this thing takes ages and the only sign of any progress is the logs, so having some logs is useful. --- synapse/storage/events.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 21533970d1..803a4e2477 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2081,7 +2081,7 @@ class EventsStore(SQLBaseStore): 400, "topological_ordering is greater than forward extremeties" ) - logger.debug("[purge] looking for events to delete") + logger.info("[purge] looking for events to delete") txn.execute( "SELECT event_id, state_key FROM events" @@ -2102,7 +2102,7 @@ class EventsStore(SQLBaseStore): for event_id, state_key in event_rows: txn.call_after(self._get_state_group_for_event.invalidate, (event_id,)) - logger.debug("[purge] Finding new backward extremities") + logger.info("[purge] Finding new backward extremities") # We calculate the new entries for the backward extremeties by finding # all events that point to events that are to be purged @@ -2116,7 +2116,7 @@ class EventsStore(SQLBaseStore): ) new_backwards_extrems = txn.fetchall() - logger.debug("[purge] replacing backward extremities: %r", new_backwards_extrems) + logger.info("[purge] replacing backward extremities: %r", new_backwards_extrems) txn.execute( "DELETE FROM event_backward_extremities WHERE room_id = ?", @@ -2132,7 +2132,7 @@ class EventsStore(SQLBaseStore): ] ) - logger.debug("[purge] finding redundant state groups") + logger.info("[purge] finding redundant state groups") # Get all state groups that are only referenced by events that are # to be deleted. @@ -2149,15 +2149,15 @@ class EventsStore(SQLBaseStore): ) state_rows = txn.fetchall() - logger.debug("[purge] found %i redundant state groups", len(state_rows)) + logger.info("[purge] found %i redundant state groups", len(state_rows)) # make a set of the redundant state groups, so that we can look them up # efficiently state_groups_to_delete = set([sg for sg, in state_rows]) # Now we get all the state groups that rely on these state groups - logger.debug("[purge] finding state groups which depend on redundant" - " state groups") + logger.info("[purge] finding state groups which depend on redundant" + " state groups") remaining_state_groups = [] for i in xrange(0, len(state_rows), 100): chunk = [sg for sg, in state_rows[i:i + 100]] @@ -2182,7 +2182,7 @@ class EventsStore(SQLBaseStore): # Now we turn the state groups that reference to-be-deleted state # groups to non delta versions. for sg in remaining_state_groups: - logger.debug("[purge] de-delta-ing remaining state group %s", sg) + logger.info("[purge] de-delta-ing remaining state group %s", sg) curr_state = self._get_state_groups_from_groups_txn( txn, [sg], types=None ) @@ -2219,7 +2219,7 @@ class EventsStore(SQLBaseStore): ], ) - logger.debug("[purge] removing redundant state groups") + logger.info("[purge] removing redundant state groups") txn.executemany( "DELETE FROM state_groups_state WHERE state_group = ?", state_rows @@ -2230,13 +2230,13 @@ class EventsStore(SQLBaseStore): ) # Delete all non-state - logger.debug("[purge] removing events from event_to_state_groups") + logger.info("[purge] removing events from event_to_state_groups") txn.executemany( "DELETE FROM event_to_state_groups WHERE event_id = ?", [(event_id,) for event_id, _ in event_rows] ) - logger.debug("[purge] updating room_depth") + logger.info("[purge] updating room_depth") txn.execute( "UPDATE room_depth SET min_depth = ? WHERE room_id = ?", (topological_ordering, room_id,) @@ -2258,7 +2258,8 @@ class EventsStore(SQLBaseStore): "event_signatures", "rejections", ): - logger.debug("[purge] removing remote non-state events from %s", table) + logger.info("[purge] removing remote non-state events from %s", + table) txn.executemany( "DELETE FROM %s WHERE event_id = ?" % (table,), @@ -2266,7 +2267,7 @@ class EventsStore(SQLBaseStore): ) # Mark all state and own events as outliers - logger.debug("[purge] marking remaining events as outliers") + logger.info("[purge] marking remaining events as outliers") txn.executemany( "UPDATE events SET outlier = ?" " WHERE event_id = ?", From e571aef06d3b1af3946e790841f4b8a3a4cfdebf Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 7 Feb 2018 17:40:29 +0000 Subject: [PATCH 183/348] purge: Move cache invalidation to more appropriate place it was a bit of a non-sequitur there --- synapse/storage/events.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 803a4e2477..24d9978304 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2099,9 +2099,6 @@ class EventsStore(SQLBaseStore): "[purge] found %i events before cutoff, of which %i are remote" " non-state events to delete", len(event_rows), len(to_delete)) - for event_id, state_key in event_rows: - txn.call_after(self._get_state_group_for_event.invalidate, (event_id,)) - logger.info("[purge] Finding new backward extremities") # We calculate the new entries for the backward extremeties by finding @@ -2229,12 +2226,15 @@ class EventsStore(SQLBaseStore): state_rows ) - # Delete all non-state logger.info("[purge] removing events from event_to_state_groups") txn.executemany( "DELETE FROM event_to_state_groups WHERE event_id = ?", [(event_id,) for event_id, _ in event_rows] ) + for event_id, _ in event_rows: + txn.call_after(self._get_state_group_for_event.invalidate, ( + event_id, + )) logger.info("[purge] updating room_depth") txn.execute( From 74fcbf741b3a7b95b5cc44478050e8a40fb7dc46 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 8 Feb 2018 18:44:52 +0000 Subject: [PATCH 184/348] delete_local_events for purge_history Add a flag which makes the purger delete local events --- docs/admin_api/purge_history_api.rst | 14 +++++++++-- synapse/handlers/message.py | 4 ++-- synapse/http/servlet.py | 18 +++++++++++--- synapse/rest/client/v1/admin.py | 11 ++++++++- synapse/storage/events.py | 35 ++++++++++++++++++++++------ 5 files changed, 67 insertions(+), 15 deletions(-) diff --git a/docs/admin_api/purge_history_api.rst b/docs/admin_api/purge_history_api.rst index 08b3306366..b4e5bd9d75 100644 --- a/docs/admin_api/purge_history_api.rst +++ b/docs/admin_api/purge_history_api.rst @@ -4,8 +4,6 @@ Purge History API The purge history API allows server admins to purge historic events from their database, reclaiming disk space. -**NB!** This will not delete local events (locally sent messages content etc) from the database, but will remove lots of the metadata about them and does dramatically reduce the on disk space usage - Depending on the amount of history being purged a call to the API may take several minutes or longer. During this period users will not be able to paginate further back in the room from the point being purged from. @@ -15,3 +13,15 @@ The API is simply: ``POST /_matrix/client/r0/admin/purge_history//`` including an ``access_token`` of a server admin. + +By default, events sent by local users are not deleted, as they may represent +the only copies of this content in existence. (Events sent by remote users are +deleted, and room state data before the cutoff is always removed). + +To delete local events as well, set ``delete_local_events`` in the body: + +.. code:: json + + { + "delete_local_events": True, + } diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 1c7860bb05..276d1a7722 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -63,7 +63,7 @@ class MessageHandler(BaseHandler): self.spam_checker = hs.get_spam_checker() @defer.inlineCallbacks - def purge_history(self, room_id, event_id): + def purge_history(self, room_id, event_id, delete_local_events=False): event = yield self.store.get_event(event_id) if event.room_id != room_id: @@ -72,7 +72,7 @@ class MessageHandler(BaseHandler): depth = event.depth with (yield self.pagination_lock.write(room_id)): - yield self.store.purge_history(room_id, depth) + yield self.store.purge_history(room_id, depth, delete_local_events) @defer.inlineCallbacks def get_messages(self, requester, room_id=None, pagin_config=None, diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py index 71420e54db..ef8e62901b 100644 --- a/synapse/http/servlet.py +++ b/synapse/http/servlet.py @@ -148,11 +148,13 @@ def parse_string_from_args(args, name, default=None, required=False, return default -def parse_json_value_from_request(request): +def parse_json_value_from_request(request, allow_empty_body=False): """Parse a JSON value from the body of a twisted HTTP request. Args: request: the twisted HTTP request. + allow_empty_body (bool): if True, an empty body will be accepted and + turned into None Returns: The JSON value. @@ -165,6 +167,9 @@ def parse_json_value_from_request(request): except Exception: raise SynapseError(400, "Error reading JSON content.") + if not content_bytes and allow_empty_body: + return None + try: content = simplejson.loads(content_bytes) except Exception as e: @@ -174,17 +179,24 @@ def parse_json_value_from_request(request): return content -def parse_json_object_from_request(request): +def parse_json_object_from_request(request, allow_empty_body=False): """Parse a JSON object from the body of a twisted HTTP request. Args: request: the twisted HTTP request. + allow_empty_body (bool): if True, an empty body will be accepted and + turned into an empty dict. Raises: SynapseError if the request body couldn't be decoded as JSON or if it wasn't a JSON object. """ - content = parse_json_value_from_request(request) + content = parse_json_value_from_request( + request, allow_empty_body=allow_empty_body, + ) + + if allow_empty_body and content is None: + return {} if type(content) != dict: message = "Content must be a JSON object." diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index 5022808ea9..f954d2ea65 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -128,7 +128,16 @@ class PurgeHistoryRestServlet(ClientV1RestServlet): if not is_admin: raise AuthError(403, "You are not a server admin") - yield self.handlers.message_handler.purge_history(room_id, event_id) + body = parse_json_object_from_request(request, allow_empty_body=True) + + delete_local_events = bool( + body.get("delete_local_history", False) + ) + + yield self.handlers.message_handler.purge_history( + room_id, event_id, + delete_local_events=delete_local_events, + ) defer.returnValue((200, {})) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 24d9978304..11a2ff2d8a 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2031,16 +2031,32 @@ class EventsStore(SQLBaseStore): ) return self.runInteraction("get_all_new_events", get_all_new_events_txn) - def purge_history(self, room_id, topological_ordering): + def purge_history( + self, room_id, topological_ordering, delete_local_events, + ): """Deletes room history before a certain point + + Args: + room_id (str): + + topological_ordering (int): + minimum topo ordering to preserve + + delete_local_events (bool): + if True, we will delete local events as well as remote ones + (instead of just marking them as outliers and deleting their + state groups). """ return self.runInteraction( "purge_history", - self._purge_history_txn, room_id, topological_ordering + self._purge_history_txn, room_id, topological_ordering, + delete_local_events, ) - def _purge_history_txn(self, txn, room_id, topological_ordering): + def _purge_history_txn( + self, txn, room_id, topological_ordering, delete_local_events, + ): # Tables that should be pruned: # event_auth # event_backward_extremities @@ -2093,11 +2109,14 @@ class EventsStore(SQLBaseStore): to_delete = [ (event_id,) for event_id, state_key in event_rows - if state_key is None and not self.hs.is_mine_id(event_id) + if state_key is None and ( + delete_local_events or not self.hs.is_mine_id(event_id) + ) ] logger.info( - "[purge] found %i events before cutoff, of which %i are remote" - " non-state events to delete", len(event_rows), len(to_delete)) + "[purge] found %i events before cutoff, of which %i can be deleted", + len(event_rows), len(to_delete), + ) logger.info("[purge] Finding new backward extremities") @@ -2273,7 +2292,9 @@ class EventsStore(SQLBaseStore): " WHERE event_id = ?", [ (True, event_id,) for event_id, state_key in event_rows - if state_key is not None or self.hs.is_mine_id(event_id) + if state_key is not None or ( + not delete_local_events and self.hs.is_mine_id(event_id) + ) ] ) From 39a6b3549638c70e3aaf51b361576fbd729eb655 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 9 Feb 2018 12:13:34 +0000 Subject: [PATCH 185/348] purge: move room_depth update to end ... to avoid locking the table for too long --- synapse/storage/events.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 11a2ff2d8a..238a2006b8 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2255,12 +2255,6 @@ class EventsStore(SQLBaseStore): event_id, )) - logger.info("[purge] updating room_depth") - txn.execute( - "UPDATE room_depth SET min_depth = ? WHERE room_id = ?", - (topological_ordering, room_id,) - ) - # Delete all remote non-state events for table in ( "events", @@ -2298,6 +2292,18 @@ class EventsStore(SQLBaseStore): ] ) + # synapse tries to take out an exclusive lock on room_depth whenever it + # persists events (because upsert), and once we run this update, we + # will block that for the rest of our transaction. + # + # So, let's stick it at the end so that we don't block event + # persistence. + logger.info("[purge] updating room_depth") + txn.execute( + "UPDATE room_depth SET min_depth = ? WHERE room_id = ?", + (topological_ordering, room_id,) + ) + logger.info("[purge] done") @defer.inlineCallbacks From 32c7b8e48b5c79de5b722afb4c2b79c6c712cdc5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 12 Feb 2018 17:18:07 +0000 Subject: [PATCH 186/348] Update workers docs to include http port --- docs/workers.rst | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/docs/workers.rst b/docs/workers.rst index 213d57e47c..b687807e59 100644 --- a/docs/workers.rst +++ b/docs/workers.rst @@ -30,17 +30,29 @@ requests made to the federation port. The caveats regarding running a reverse-proxy on the federation port still apply (see https://github.com/matrix-org/synapse/blob/master/README.rst#reverse-proxying-the-federation-port). -To enable workers, you need to add a replication listener to the master synapse, e.g.:: +To enable workers, you need to add two replication listeners to the master +synapse, e.g.:: listeners: + # The TCP replication port - port: 9092 bind_address: '127.0.0.1' type: replication + # The HTTP replication port + - port: 9093 + bind_address: '127.0.0.1' + type: http + resources: + - names: [replication] -Under **no circumstances** should this replication API listener be exposed to the -public internet; it currently implements no authentication whatsoever and is +Under **no circumstances** should these replication API listeners be exposed to +the public internet; it currently implements no authentication whatsoever and is unencrypted. +(Roughly, the TCP port is used for streaming data from the master to the +workers, and the HTTP port for the workers to communicate with the main +synapse process.) + You then create a set of configs for the various worker processes. These should be worker configuration files, and should be stored in a dedicated subdirectory, to allow synctl to manipulate them. @@ -52,8 +64,10 @@ You should minimise the number of overrides though to maintain a usable config. You must specify the type of worker application (``worker_app``). The currently available worker applications are listed below. You must also specify the -replication endpoint that it's talking to on the main synapse process -(``worker_replication_host`` and ``worker_replication_port``). +replication endpoints that it's talking to on the main synapse process. +``worker_replication_host`` should specify the host of the main synapse, +``worker_replication_port`` should point to the TCP replication listener port and +``worker_replication_http_port`` should point to the HTTP replication port. For instance:: @@ -62,6 +76,7 @@ For instance:: # The replication listener on the synapse to talk to. worker_replication_host: 127.0.0.1 worker_replication_port: 9092 + worker_replication_http_port: 9093 worker_listeners: - type: http From 8fd1a324564510be55a7c1e6b6339f736f5c525a Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 13 Feb 2018 13:04:41 +0000 Subject: [PATCH 187/348] Fix typos in purge api & doc * It's supposed to be purge_local_events, not ..._history * Fix the doc to have valid json --- docs/admin_api/purge_history_api.rst | 2 +- synapse/rest/client/v1/admin.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/docs/admin_api/purge_history_api.rst b/docs/admin_api/purge_history_api.rst index b4e5bd9d75..a3a17e9f9f 100644 --- a/docs/admin_api/purge_history_api.rst +++ b/docs/admin_api/purge_history_api.rst @@ -23,5 +23,5 @@ To delete local events as well, set ``delete_local_events`` in the body: .. code:: json { - "delete_local_events": True, + "delete_local_events": true } diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index 2ad486c67d..6073cc6fa2 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -131,9 +131,7 @@ class PurgeHistoryRestServlet(ClientV1RestServlet): body = parse_json_object_from_request(request, allow_empty_body=True) - delete_local_events = bool( - body.get("delete_local_history", False) - ) + delete_local_events = bool(body.get("delete_local_events", False)) yield self.handlers.message_handler.purge_history( room_id, event_id, From 630caf8a703250e0f568000958faee42f9336b72 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 13 Feb 2018 14:29:22 +0000 Subject: [PATCH 188/348] style nit --- synapse/storage/events.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index ca1d4a3986..3d5eb9bc02 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -528,7 +528,7 @@ class EventsStore(SQLBaseStore): # I don't think this can happen, but let's double-check raise Exception( "Context for new extremity event %s has no state " - "group" % event_id, + "group" % (event_id, ), ) # If we've already seen the state group don't bother adding From 4a6d5517049c5b8b9e43df43a10a0dda5db07244 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 2 Feb 2018 15:25:27 +0000 Subject: [PATCH 189/348] GIN reindex: Fix syntax errors, improve exception handling --- synapse/storage/search.py | 40 ++++++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 13c827cf87..076ecff297 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -157,28 +157,42 @@ class SearchStore(BackgroundUpdateStore): @defer.inlineCallbacks def _background_reindex_gin_search(self, progress, batch_size): - '''This handles old synapses which used GIST indexes, if any; + """This handles old synapses which used GIST indexes, if any; converting them back to be GIN as per the actual schema. - ''' + """ def create_index(conn): + conn.rollback() + + # we have to set autocommit, because postgres refuses to + # CREATE INDEX CONCURRENTLY without it. + conn.set_session(autocommit=True) + try: - conn.rollback() - conn.set_session(autocommit=True) c = conn.cursor() + # if we skipped the conversion to GIST, we may already/still + # have an event_search_fts_idx; unfortunately postgres 9.4 + # doesn't support CREATE INDEX IF EXISTS so we just catch the + # exception and ignore it. + import psycopg2 + try: + c.execute( + "CREATE INDEX CONCURRENTLY event_search_fts_idx" + " ON event_search USING GIN (vector)" + ) + except psycopg2.ProgrammingError as e: + logger.warn( + "Ignoring error %r when trying to switch from GIST to GIN", + e + ) + + # we should now be able to delete the GIST index. c.execute( - "CREATE INDEX CONCURRENTLY event_search_fts_idx" - " ON event_search USING GIN (vector)" + "DROP INDEX IF EXISTS event_search_fts_idx_gist" ) - - c.execute("DROP INDEX event_search_fts_idx_gist") - + finally: conn.set_session(autocommit=False) - except e: - logger.warn( - "Ignoring error %s when trying to switch from GIST to GIN" % (e,) - ) if isinstance(self.database_engine, PostgresEngine): yield self.runWithConnection(create_index) From 0b27ae8dc3957e77561b2ff35a5a127532f6f9f1 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Sat, 3 Feb 2018 23:12:27 +0000 Subject: [PATCH 190/348] move search reindex to schema 47 We're up to schema v47 on develop now, so this will have to go in there to have an effect. This might cause an error if somebody has already run it in the v46 guise, and runs it again in the v47 guise, because it will cause a duplicate entry in the bbackground_updates table. On the other hand, the entry is removed once it is complete, and it is unlikely that anyone other than matrix.org has run it on v46. The update itself is harmless to re-run because it deliberately copes with the index already existing. --- synapse/storage/schema/delta/38/postgres_fts_gist.sql | 2 +- synapse/storage/schema/delta/{46 => 47}/postgres_fts_gin.sql | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename synapse/storage/schema/delta/{46 => 47}/postgres_fts_gin.sql (100%) diff --git a/synapse/storage/schema/delta/38/postgres_fts_gist.sql b/synapse/storage/schema/delta/38/postgres_fts_gist.sql index 5fe27d6877..515e6b8e84 100644 --- a/synapse/storage/schema/delta/38/postgres_fts_gist.sql +++ b/synapse/storage/schema/delta/38/postgres_fts_gist.sql @@ -13,7 +13,7 @@ * limitations under the License. */ --- We no longer do this given we back it out again in schema 46 +-- We no longer do this given we back it out again in schema 47 -- INSERT into background_updates (update_name, progress_json) -- VALUES ('event_search_postgres_gist', '{}'); diff --git a/synapse/storage/schema/delta/46/postgres_fts_gin.sql b/synapse/storage/schema/delta/47/postgres_fts_gin.sql similarity index 100% rename from synapse/storage/schema/delta/46/postgres_fts_gin.sql rename to synapse/storage/schema/delta/47/postgres_fts_gin.sql From d627174da222d1e90334f580003d6c8075284124 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 13 Feb 2018 16:51:21 +0000 Subject: [PATCH 191/348] Fix log message in purge_history (we don't just remove remote events) --- synapse/storage/events.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index d5bc423f18..bbb6aa992c 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2287,8 +2287,7 @@ class EventsStore(SQLBaseStore): "event_signatures", "rejections", ): - logger.info("[purge] removing remote non-state events from %s", - table) + logger.info("[purge] removing events from %s", table) txn.executemany( "DELETE FROM %s WHERE event_id = ?" % (table,), From 059d3a6c8e55ab7e5318793b8d7c4546bb850d33 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Feb 2018 17:53:56 +0000 Subject: [PATCH 192/348] Update docs --- docs/workers.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/workers.rst b/docs/workers.rst index b687807e59..dee04bbf3e 100644 --- a/docs/workers.rst +++ b/docs/workers.rst @@ -50,7 +50,7 @@ the public internet; it currently implements no authentication whatsoever and is unencrypted. (Roughly, the TCP port is used for streaming data from the master to the -workers, and the HTTP port for the workers to communicate with the main +workers, and the HTTP port for the workers to send data to the main synapse process.) You then create a set of configs for the various worker processes. These @@ -69,6 +69,9 @@ replication endpoints that it's talking to on the main synapse process. ``worker_replication_port`` should point to the TCP replication listener port and ``worker_replication_http_port`` should point to the HTTP replication port. +Currently, only the ``event_creator`` worker requires specifying +``worker_replication_http_port``. + For instance:: worker_app: synapse.app.synchrotron From 5fcbf1e07c5f7c2ce0ec44c2569116507caa0183 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 14 Feb 2018 11:02:22 +0000 Subject: [PATCH 193/348] Rework event purge to use a temporary table ... which should speed things up by reducing the amount of data being shuffled across the connection --- synapse/storage/events.py | 93 ++++++++++++++++++++++++--------------- 1 file changed, 58 insertions(+), 35 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index bbb6aa992c..5a2e6a03d5 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2115,23 +2115,44 @@ class EventsStore(SQLBaseStore): logger.info("[purge] looking for events to delete") + # we build a temporary table listing the events so that we don't have + # to keep shovelling the list back and forth across the connection. + txn.execute( - "SELECT event_id, state_key FROM events" - " LEFT JOIN state_events USING (room_id, event_id)" - " WHERE room_id = ? AND topological_ordering < ?", - (room_id, topological_ordering,) + "CREATE TEMPORARY TABLE events_to_purge (" + " event_id TEXT NOT NULL," + " should_delete BOOLEAN NOT NULL" + ")" + ) + + # create an index on should_delete because later we'll be looking for + # the should_delete / shouldn't_delete subsets + txn.execute("CREATE INDEX ON events_to_purge(should_delete)") + + should_delete_expr = "state_key IS NULL" + should_delete_params = () + if not delete_local_events: + should_delete_expr += " AND event_id NOT LIKE ?" + should_delete_params += ("%:" + self.hs.hostname, ) + + should_delete_params += (room_id, topological_ordering) + + txn.execute( + "INSERT INTO events_to_purge" + " SELECT event_id, %s" + " FROM events AS e LEFT JOIN state_events USING (event_id)" + " WHERE e.room_id = ? AND topological_ordering < ?" % ( + should_delete_expr, + ), + should_delete_params, + ) + txn.execute( + "SELECT event_id, should_delete FROM events_to_purge" ) event_rows = txn.fetchall() - - to_delete = [ - (event_id,) for event_id, state_key in event_rows - if state_key is None and ( - delete_local_events or not self.hs.is_mine_id(event_id) - ) - ] logger.info( "[purge] found %i events before cutoff, of which %i can be deleted", - len(event_rows), len(to_delete), + len(event_rows), sum(1 for e in event_rows if e[1]), ) logger.info("[purge] Finding new backward extremities") @@ -2139,12 +2160,11 @@ class EventsStore(SQLBaseStore): # We calculate the new entries for the backward extremeties by finding # all events that point to events that are to be purged txn.execute( - "SELECT DISTINCT e.event_id FROM events as e" - " INNER JOIN event_edges as ed ON e.event_id = ed.prev_event_id" - " INNER JOIN events as e2 ON e2.event_id = ed.event_id" - " WHERE e.room_id = ? AND e.topological_ordering < ?" - " AND e2.topological_ordering >= ?", - (room_id, topological_ordering, topological_ordering) + "SELECT DISTINCT e.event_id FROM events_to_purge AS e" + " INNER JOIN event_edges AS ed ON e.event_id = ed.prev_event_id" + " INNER JOIN events AS e2 ON e2.event_id = ed.event_id" + " WHERE e2.topological_ordering >= ?", + (topological_ordering, ) ) new_backwards_extrems = txn.fetchall() @@ -2172,12 +2192,11 @@ class EventsStore(SQLBaseStore): "SELECT state_group FROM event_to_state_groups" " INNER JOIN events USING (event_id)" " WHERE state_group IN (" - " SELECT DISTINCT state_group FROM events" + " SELECT DISTINCT state_group FROM events_to_purge" " INNER JOIN event_to_state_groups USING (event_id)" - " WHERE room_id = ? AND topological_ordering < ?" " )" " GROUP BY state_group HAVING MAX(topological_ordering) < ?", - (room_id, topological_ordering, topological_ordering) + (topological_ordering, ) ) state_rows = txn.fetchall() @@ -2262,9 +2281,9 @@ class EventsStore(SQLBaseStore): ) logger.info("[purge] removing events from event_to_state_groups") - txn.executemany( - "DELETE FROM event_to_state_groups WHERE event_id = ?", - [(event_id,) for event_id, _ in event_rows] + txn.execute( + "DELETE FROM event_to_state_groups " + "WHERE event_id IN (SELECT event_id from events_to_purge)" ) for event_id, _ in event_rows: txn.call_after(self._get_state_group_for_event.invalidate, ( @@ -2289,22 +2308,26 @@ class EventsStore(SQLBaseStore): ): logger.info("[purge] removing events from %s", table) - txn.executemany( - "DELETE FROM %s WHERE event_id = ?" % (table,), - to_delete + txn.execute( + "DELETE FROM %s WHERE event_id IN (" + " SELECT event_id FROM events_to_purge WHERE should_delete" + ")" % (table,), ) # Mark all state and own events as outliers logger.info("[purge] marking remaining events as outliers") - txn.executemany( + txn.execute( "UPDATE events SET outlier = ?" - " WHERE event_id = ?", - [ - (True, event_id,) for event_id, state_key in event_rows - if state_key is not None or ( - not delete_local_events and self.hs.is_mine_id(event_id) - ) - ] + " WHERE event_id IN (" + " SELECT event_id FROM events_to_purge " + " WHERE NOT should_delete" + ")", + (True,), + ) + + # we're now done with the temporary table + txn.execute( + "DROP TABLE events_to_purge" ) # synapse tries to take out an exclusive lock on room_depth whenever it From 278d21b5e498e301a43a217595ce75ed7729256f Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 14 Feb 2018 15:44:51 +0000 Subject: [PATCH 194/348] purge_history: fix index use event_push_actions doesn't have an index on event_id, so we need to specify room_id. --- synapse/storage/events.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 5a2e6a03d5..c81bc75eae 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2314,6 +2314,20 @@ class EventsStore(SQLBaseStore): ")" % (table,), ) + # event_push_actions lacks an index on event_id, and has one on + # (room_id, event_id) instead. + for table in ( + "event_push_actions", + ): + logger.info("[purge] removing events from %s", table) + + txn.execute( + "DELETE FROM %s WHERE room_id = ? AND event_id IN (" + " SELECT event_id FROM events_to_purge WHERE should_delete" + ")" % (table,), + (room_id, ) + ) + # Mark all state and own events as outliers logger.info("[purge] marking remaining events as outliers") txn.execute( From 5978dccff09e647509bb92e8125aa02e87f7a0a2 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 14 Feb 2018 15:54:09 +0000 Subject: [PATCH 195/348] remove overzealous exception handling --- synapse/storage/search.py | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/synapse/storage/search.py b/synapse/storage/search.py index 8d294d497b..2755acff40 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -16,7 +16,6 @@ from collections import namedtuple import logging import re -import sys import ujson as json from twisted.internet import defer @@ -335,25 +334,18 @@ class SearchStore(BackgroundUpdateStore): # (postgres 9.5 uses the separate gin_pending_list_limit setting, # so doesn't suffer the same problem, but changing work_mem will # be harmless) + # + # Note that we don't need to worry about restoring it on + # exception, because exceptions will cause the transaction to be + # rolled back, including the effects of the SET command. + # + # Also: we use SET rather than SET LOCAL because there's lots of + # other stuff going on in this transaction, which want to have the + # normal work_mem setting. txn.execute("SET work_mem='256kB'") - try: - txn.executemany(sql, args) - except Exception: - # we need to reset work_mem, but doing so may throw a new - # exception and we want to preserve the original - t, v, tb = sys.exc_info() - try: - txn.execute("RESET work_mem") - except Exception as e: - logger.warn( - "exception resetting work_mem during exception " - "handling: %r", - e, - ) - raise t, v, tb - else: - txn.execute("RESET work_mem") + txn.executemany(sql, args) + txn.execute("RESET work_mem") elif isinstance(self.database_engine, Sqlite3Engine): sql = ( From ac27f6a35ebb63d502769edec642fbf70a178a60 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 14 Feb 2018 16:41:12 +0000 Subject: [PATCH 196/348] purge_history: handle sqlite asshattery apparently creating a temporary table commits the transaction. because that's a useful thing. --- synapse/storage/events.py | 46 +++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index c81bc75eae..90e910f615 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2093,6 +2093,27 @@ class EventsStore(SQLBaseStore): # state_groups # state_groups_state + # we will build a temporary table listing the events so that we don't + # have to keep shovelling the list back and forth across the + # connection. Annoyingly the python sqlite driver commits the + # transaction on CREATE, so let's do this first. + # + # furthermore, we might already have the table from a previous (failed) + # purge attempt, so let's drop the table first. + + txn.execute("DROP TABLE IF EXISTS events_to_purge") + + txn.execute( + "CREATE TEMPORARY TABLE events_to_purge (" + " event_id TEXT NOT NULL," + " should_delete BOOLEAN NOT NULL" + ")" + ) + + # create an index on should_delete because later we'll be looking for + # the should_delete / shouldn't_delete subsets + txn.execute("CREATE INDEX ON events_to_purge(should_delete)") + # First ensure that we're not about to delete all the forward extremeties txn.execute( "SELECT e.event_id, e.depth FROM events as e " @@ -2115,20 +2136,6 @@ class EventsStore(SQLBaseStore): logger.info("[purge] looking for events to delete") - # we build a temporary table listing the events so that we don't have - # to keep shovelling the list back and forth across the connection. - - txn.execute( - "CREATE TEMPORARY TABLE events_to_purge (" - " event_id TEXT NOT NULL," - " should_delete BOOLEAN NOT NULL" - ")" - ) - - # create an index on should_delete because later we'll be looking for - # the should_delete / shouldn't_delete subsets - txn.execute("CREATE INDEX ON events_to_purge(should_delete)") - should_delete_expr = "state_key IS NULL" should_delete_params = () if not delete_local_events: @@ -2339,11 +2346,6 @@ class EventsStore(SQLBaseStore): (True,), ) - # we're now done with the temporary table - txn.execute( - "DROP TABLE events_to_purge" - ) - # synapse tries to take out an exclusive lock on room_depth whenever it # persists events (because upsert), and once we run this update, we # will block that for the rest of our transaction. @@ -2356,6 +2358,12 @@ class EventsStore(SQLBaseStore): (topological_ordering, room_id,) ) + # finally, drop the temp table. this will commit the txn in sqlite, + # so make sure to keep this actually last. + txn.execute( + "DROP TABLE events_to_purge" + ) + logger.info("[purge] done") @defer.inlineCallbacks From 39bf47319f002614d8de11948d09db7648b26315 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 14 Feb 2018 16:42:19 +0000 Subject: [PATCH 197/348] purge_history: fix sqlite syntax error apparently sqlite insists on indexes being named --- synapse/storage/events.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 90e910f615..28cce2979c 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2112,7 +2112,10 @@ class EventsStore(SQLBaseStore): # create an index on should_delete because later we'll be looking for # the should_delete / shouldn't_delete subsets - txn.execute("CREATE INDEX ON events_to_purge(should_delete)") + txn.execute( + "CREATE INDEX events_to_purge_should_delete" + " ON events_to_purge(should_delete)", + ) # First ensure that we're not about to delete all the forward extremeties txn.execute( From 92c52df702654df094abb69c9aa24f302cdc6130 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 14 Feb 2018 17:55:18 +0000 Subject: [PATCH 198/348] Make store_file use store_into_file --- synapse/rest/media/v1/media_storage.py | 37 ++++++++------------------ 1 file changed, 11 insertions(+), 26 deletions(-) diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py index 3f8d4b9c22..83471b3173 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py @@ -58,23 +58,13 @@ class MediaStorage(object): Returns: Deferred[str]: the file path written to in the primary media store """ - path = self._file_info_to_path(file_info) - fname = os.path.join(self.local_media_directory, path) - dirname = os.path.dirname(fname) - if not os.path.exists(dirname): - os.makedirs(dirname) - - # Write to the main repository - yield make_deferred_yieldable(threads.deferToThread( - _write_file_synchronously, source, fname, - )) - - # Tell the storage providers about the new file. They'll decide - # if they should upload it and whether to do so synchronously - # or not. - for provider in self.storage_providers: - yield provider.store_file(path, file_info) + with self.store_into_file(file_info) as (f, fname, finish_cb): + # Write to the main repository + yield make_deferred_yieldable(threads.deferToThread( + _write_file_synchronously, source, f, + )) + yield finish_cb() defer.returnValue(fname) @@ -240,21 +230,16 @@ class MediaStorage(object): ) -def _write_file_synchronously(source, fname): - """Write `source` to the path `fname` synchronously. Should be called +def _write_file_synchronously(source, dest): + """Write `source` to the file like `dest` synchronously. Should be called from a thread. Args: - source: A file like object to be written - fname (str): Path to write to + source: A file like object that's to be written + dest: A file like object to be written to """ - dirname = os.path.dirname(fname) - if not os.path.exists(dirname): - os.makedirs(dirname) - source.seek(0) # Ensure we read from the start of the file - with open(fname, "wb") as f: - shutil.copyfileobj(source, f) + shutil.copyfileobj(source, dest) class FileResponder(Responder): From ef344b10e529d1c0b023f009a8aae806e8a981bc Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Feb 2018 11:02:43 +0000 Subject: [PATCH 199/348] Don't log errors propogated from send_event --- synapse/replication/http/send_event.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py index ff9b9d2f10..7b21a2213c 100644 --- a/synapse/replication/http/send_event.py +++ b/synapse/replication/http/send_event.py @@ -15,6 +15,7 @@ from twisted.internet import defer +from synapse.api.errors import SynapseError, MatrixCodeMessageException from synapse.events import FrozenEvent from synapse.events.snapshot import EventContext from synapse.http.servlet import RestServlet, parse_json_object_from_request @@ -27,6 +28,7 @@ import re logger = logging.getLogger(__name__) +@defer.inlineCallbacks def send_event_to_master(client, host, port, requester, event, context): """Send event to be handled on the master @@ -48,7 +50,14 @@ def send_event_to_master(client, host, port, requester, event, context): "requester": requester.serialize(), } - return client.post_json_get_json(uri, payload) + try: + result = yield client.post_json_get_json(uri, payload) + except MatrixCodeMessageException as e: + # We convert to SynapseError as we know that it was a SynapseError + # on the master process that we should send to the client. (And + # importantly, not stack traces everywhere) + raise SynapseError(e.code, e.msg, e.errcode) + defer.returnValue(result) class ReplicationSendEventRestServlet(RestServlet): From fd1601c5965a8ba065341257f9d15fe31d49eb48 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Feb 2018 10:51:46 +0000 Subject: [PATCH 200/348] Fix state group storage bug in workers We needed to move `_count_state_group_hops_txn` to the StateGroupWorkerStore. --- synapse/storage/state.py | 82 ++++++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index adb48df73e..d0a840456a 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -655,6 +655,47 @@ class StateGroupWorkerStore(SQLBaseStore): return self.runInteraction("store_state_group", _store_state_group_txn) + def _count_state_group_hops_txn(self, txn, state_group): + """Given a state group, count how many hops there are in the tree. + + This is used to ensure the delta chains don't get too long. + """ + if isinstance(self.database_engine, PostgresEngine): + sql = (""" + WITH RECURSIVE state(state_group) AS ( + VALUES(?::bigint) + UNION ALL + SELECT prev_state_group FROM state_group_edges e, state s + WHERE s.state_group = e.state_group + ) + SELECT count(*) FROM state; + """) + + txn.execute(sql, (state_group,)) + row = txn.fetchone() + if row and row[0]: + return row[0] + else: + return 0 + else: + # We don't use WITH RECURSIVE on sqlite3 as there are distributions + # that ship with an sqlite3 version that doesn't support it (e.g. wheezy) + next_group = state_group + count = 0 + + while next_group: + next_group = self._simple_select_one_onecol_txn( + txn, + table="state_group_edges", + keyvalues={"state_group": next_group}, + retcol="prev_state_group", + allow_none=True, + ) + if next_group: + count += 1 + + return count + class StateStore(StateGroupWorkerStore, BackgroundUpdateStore): """ Keeps track of the state at a given event. @@ -729,47 +770,6 @@ class StateStore(StateGroupWorkerStore, BackgroundUpdateStore): (event_id,), state_group_id ) - def _count_state_group_hops_txn(self, txn, state_group): - """Given a state group, count how many hops there are in the tree. - - This is used to ensure the delta chains don't get too long. - """ - if isinstance(self.database_engine, PostgresEngine): - sql = (""" - WITH RECURSIVE state(state_group) AS ( - VALUES(?::bigint) - UNION ALL - SELECT prev_state_group FROM state_group_edges e, state s - WHERE s.state_group = e.state_group - ) - SELECT count(*) FROM state; - """) - - txn.execute(sql, (state_group,)) - row = txn.fetchone() - if row and row[0]: - return row[0] - else: - return 0 - else: - # We don't use WITH RECURSIVE on sqlite3 as there are distributions - # that ship with an sqlite3 version that doesn't support it (e.g. wheezy) - next_group = state_group - count = 0 - - while next_group: - next_group = self._simple_select_one_onecol_txn( - txn, - table="state_group_edges", - keyvalues={"state_group": next_group}, - retcol="prev_state_group", - allow_none=True, - ) - if next_group: - count += 1 - - return count - @defer.inlineCallbacks def _background_deduplicate_state(self, progress, batch_size): """This background update will slowly deduplicate state by reencoding From 106906a65e647d94a9d2faf1b3a626bc1f608a25 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Feb 2018 13:53:18 +0000 Subject: [PATCH 201/348] Don't serialize current state over replication --- synapse/events/snapshot.py | 41 ++++++++++++++++++++++---- synapse/replication/http/send_event.py | 4 +-- synapse/storage/state.py | 14 +++++++++ 3 files changed, 51 insertions(+), 8 deletions(-) diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index 7b80444f73..f9445bef13 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from twisted.internet import defer from frozendict import frozendict @@ -77,16 +78,30 @@ class EventContext(object): self.app_service = None - def serialize(self): + def serialize(self, event): """Converts self to a type that can be serialized as JSON, and then deserialized by `deserialize` + Args: + event (FrozenEvent): The event that this context relates to + Returns: dict """ + + # We don't serialize the full state dicts, instead they get pulled out + # of the DB on the other side. However, the other side can't figure out + # the prev_state_ids, so if we're a state event we include the event + # id that we replaced in the state. + if event.is_state(): + prev_state_id = self.prev_state_ids.get((event.type, event.state_key)) + else: + prev_state_id = None + return { - "current_state_ids": _encode_state_dict(self.current_state_ids), - "prev_state_ids": _encode_state_dict(self.prev_state_ids), + "prev_state_id": prev_state_id, + "event_type": event.type, + "event_state_key": event.state_key if event.is_state() else None, "state_group": self.state_group, "rejected": self.rejected, "push_actions": self.push_actions, @@ -97,6 +112,7 @@ class EventContext(object): } @staticmethod + @defer.inlineCallbacks def deserialize(store, input): """Converts a dict that was produced by `serialize` back into a EventContext. @@ -109,8 +125,6 @@ class EventContext(object): EventContext """ context = EventContext() - context.current_state_ids = _decode_state_dict(input["current_state_ids"]) - context.prev_state_ids = _decode_state_dict(input["prev_state_ids"]) context.state_group = input["state_group"] context.rejected = input["rejected"] context.push_actions = input["push_actions"] @@ -118,11 +132,26 @@ class EventContext(object): context.delta_ids = _decode_state_dict(input["delta_ids"]) context.prev_state_events = input["prev_state_events"] + # We use the state_group and prev_state_id stuff to pull the + # current_state_ids out of the DB and construct prev_state_ids. + prev_state_id = input["prev_state_id"] + event_type = input["event_type"] + event_state_key = input["event_state_key"] + + context.current_state_ids = yield store.get_state_ids_for_group( + context.state_group, + ) + if prev_state_id and event_state_key: + context.prev_state_ids = dict(context.current_state_ids) + context.prev_state_ids[(event_type, event_state_key)] = prev_state_id + else: + context.prev_state_ids = context.current_state_ids + app_service_id = input["app_service_id"] if app_service_id: context.app_service = store.get_app_service_by_id(app_service_id) - return context + defer.returnValue(context) def _encode_state_dict(state_dict): diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py index 7b21a2213c..468f4b68f4 100644 --- a/synapse/replication/http/send_event.py +++ b/synapse/replication/http/send_event.py @@ -46,7 +46,7 @@ def send_event_to_master(client, host, port, requester, event, context): "event": event.get_pdu_json(), "internal_metadata": event.internal_metadata.get_dict(), "rejected_reason": event.rejected_reason, - "context": context.serialize(), + "context": context.serialize(event), "requester": requester.serialize(), } @@ -96,7 +96,7 @@ class ReplicationSendEventRestServlet(RestServlet): event = FrozenEvent(event_dict, internal_metadata, rejected_reason) requester = Requester.deserialize(self.store, content["requester"]) - context = EventContext.deserialize(self.store, content["context"]) + context = yield EventContext.deserialize(self.store, content["context"]) if requester.user: request.authenticated_entity = requester.user.to_string() diff --git a/synapse/storage/state.py b/synapse/storage/state.py index d0a840456a..2b325e1c1f 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -139,6 +139,20 @@ class StateGroupWorkerStore(SQLBaseStore): defer.returnValue(group_to_state) + @defer.inlineCallbacks + def get_state_ids_for_group(self, state_group): + """Get the state IDs for the given state group + + Args: + state_group (int) + + Returns: + Deferred[dict]: Resolves to a map of (type, state_key) -> event_id + """ + group_to_state = yield self._get_state_for_groups((state_group,)) + + defer.returnValue(group_to_state[state_group]) + @defer.inlineCallbacks def get_state_groups(self, room_id, event_ids): """ Get the state groups for the given list of event_ids From acac21248cf1834233831383ee52198ca1bd010c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Feb 2018 15:01:12 +0000 Subject: [PATCH 202/348] Store push actions in staging area --- synapse/push/bulk_push_rule_evaluator.py | 3 +++ synapse/storage/event_push_actions.py | 27 +++++++++++++++++++ .../schema/delta/47/push_actions_staging.sql | 24 +++++++++++++++++ 3 files changed, 54 insertions(+) create mode 100644 synapse/storage/schema/delta/47/push_actions_staging.sql diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 425a017bdf..841ccbd1f1 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -191,6 +191,9 @@ class BulkPushRuleEvaluator(object): actions = [x for x in rule['actions'] if x != 'dont_notify'] if actions and 'notify' in actions: actions_by_user[uid] = actions + yield self.store.add_push_actions_to_staging( + event.event_id, uid, actions, + ) break defer.returnValue(actions_by_user) diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index 8efe2fd4bb..80c3cfe95f 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -738,6 +738,33 @@ class EventPushActionsStore(SQLBaseStore): (rotate_to_stream_ordering,) ) + def add_push_actions_to_staging(self, event_id, user_id, actions): + """Add the push actions for the user and event to the push + action staging area. + + Args: + event_id (str) + user_id (str) + actions (list) + + Returns: + Deferred + """ + + is_highlight = _action_has_highlight(actions) + + return self._simple_insert( + table="event_push_actions_staging", + values={ + "event_id": event_id, + "user_id": user_id, + "actions": _serialize_action(actions, is_highlight), + "notif": True, + "highlight": is_highlight, + }, + desc="add_push_actions_to_staging", + ) + def _action_has_highlight(actions): for action in actions: diff --git a/synapse/storage/schema/delta/47/push_actions_staging.sql b/synapse/storage/schema/delta/47/push_actions_staging.sql new file mode 100644 index 0000000000..ec4b1d7d42 --- /dev/null +++ b/synapse/storage/schema/delta/47/push_actions_staging.sql @@ -0,0 +1,24 @@ +/* Copyright 2018 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CREATE TABLE event_push_actions_staging ( + event_id TEXT NOT NULL, + user_id TEXT NOT NULL, + actions TEXT NOT NULL, + notif SMALLINT NOT NULL, + highlight SMALLINT NOT NULL +); + +CREATE INDEX event_push_actions_staging_id ON event_push_actions_staging(event_id); From c714c6185367e39123530cb7f89584004434c473 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Feb 2018 15:13:36 +0000 Subject: [PATCH 203/348] Update event_push_actions table from staging table --- synapse/storage/event_push_actions.py | 55 ++++++++++++++++++--------- synapse/storage/events.py | 2 +- 2 files changed, 37 insertions(+), 20 deletions(-) diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index 80c3cfe95f..34ff9be731 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -88,33 +88,50 @@ class EventPushActionsStore(SQLBaseStore): self._rotate_notifs, 30 * 60 * 1000 ) - def _set_push_actions_for_event_and_users_txn(self, txn, event, tuples): + def _set_push_actions_for_event_and_users_txn(self, txn, event): """ Args: event: the event set actions for tuples: list of tuples of (user_id, actions) """ - values = [] - for uid, actions in tuples: - is_highlight = 1 if _action_has_highlight(actions) else 0 - values.append({ - 'room_id': event.room_id, - 'event_id': event.event_id, - 'user_id': uid, - 'actions': _serialize_action(actions, is_highlight), - 'stream_ordering': event.internal_metadata.stream_ordering, - 'topological_ordering': event.depth, - 'notif': 1, - 'highlight': is_highlight, - }) + sql = """ + INSERT INTO event_push_actions ( + room_id, event_id, user_id, actions, stream_ordering, + topological_ordering, notif, highlight + ) + SELECT ?, event_id, user_id, actions, ?, ?, notif, highlight + FROM event_push_actions_staging + WHERE event_id = ? + """ - for uid, __ in tuples: + txn.execute(sql, ( + event.room_id, event.internal_metadata.stream_ordering, + event.depth, event.event_id, + )) + + user_ids = self._simple_select_onecol_txn( + txn, + table="event_push_actions_staging", + keyvalues={ + "event_id": event.event_id, + }, + retcol="user_id", + ) + + self._simple_delete_txn( + txn, + table="event_push_actions_staging", + keyvalues={ + "event_id": event.event_id, + }, + ) + + for uid in user_ids: txn.call_after( self.get_unread_event_push_actions_by_room_for_user.invalidate_many, - (event.room_id, uid) + (event.room_id, uid,) ) - self._simple_insert_many_txn(txn, "event_push_actions", values) @cachedInlineCallbacks(num_args=3, tree=True, max_entries=5000) def get_unread_event_push_actions_by_room_for_user( @@ -751,7 +768,7 @@ class EventPushActionsStore(SQLBaseStore): Deferred """ - is_highlight = _action_has_highlight(actions) + is_highlight = is_highlight = 1 if _action_has_highlight(actions) else 0 return self._simple_insert( table="event_push_actions_staging", @@ -759,7 +776,7 @@ class EventPushActionsStore(SQLBaseStore): "event_id": event_id, "user_id": user_id, "actions": _serialize_action(actions, is_highlight), - "notif": True, + "notif": 1, "highlight": is_highlight, }, desc="add_push_actions_to_staging", diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 28cce2979c..ca64aacb1c 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1170,7 +1170,7 @@ class EventsStore(SQLBaseStore): # Insert all the push actions into the event_push_actions table. if context.push_actions: self._set_push_actions_for_event_and_users_txn( - txn, event, context.push_actions + txn, event, ) if event.type == EventTypes.Redaction and event.redacts is not None: From 4810f7effd0fc3fd97f9edaf8ea0af48477adb0a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Feb 2018 15:18:37 +0000 Subject: [PATCH 204/348] Remove context.push_actions --- synapse/events/snapshot.py | 4 ---- synapse/push/action_generator.py | 6 +----- synapse/push/bulk_push_rule_evaluator.py | 9 +++------ synapse/storage/events.py | 7 +++---- tests/replication/slave/storage/test_events.py | 5 ++++- 5 files changed, 11 insertions(+), 20 deletions(-) diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index f9445bef13..8e684d91b5 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -52,7 +52,6 @@ class EventContext(object): "prev_state_ids", "state_group", "rejected", - "push_actions", "prev_group", "delta_ids", "prev_state_events", @@ -67,7 +66,6 @@ class EventContext(object): self.state_group = None self.rejected = False - self.push_actions = [] # A previously persisted state group and a delta between that # and this state. @@ -104,7 +102,6 @@ class EventContext(object): "event_state_key": event.state_key if event.is_state() else None, "state_group": self.state_group, "rejected": self.rejected, - "push_actions": self.push_actions, "prev_group": self.prev_group, "delta_ids": _encode_state_dict(self.delta_ids), "prev_state_events": self.prev_state_events, @@ -127,7 +124,6 @@ class EventContext(object): context = EventContext() context.state_group = input["state_group"] context.rejected = input["rejected"] - context.push_actions = input["push_actions"] context.prev_group = input["prev_group"] context.delta_ids = _decode_state_dict(input["delta_ids"]) context.prev_state_events = input["prev_state_events"] diff --git a/synapse/push/action_generator.py b/synapse/push/action_generator.py index fe09d50d55..8f619a7a1b 100644 --- a/synapse/push/action_generator.py +++ b/synapse/push/action_generator.py @@ -40,10 +40,6 @@ class ActionGenerator(object): @defer.inlineCallbacks def handle_push_actions_for_event(self, event, context): with Measure(self.clock, "action_for_event_by_user"): - actions_by_user = yield self.bulk_evaluator.action_for_event_by_user( + yield self.bulk_evaluator.action_for_event_by_user( event, context ) - - context.push_actions = [ - (uid, actions) for uid, actions in actions_by_user.iteritems() - ] diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 841ccbd1f1..1140788aa7 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -137,14 +137,13 @@ class BulkPushRuleEvaluator(object): @defer.inlineCallbacks def action_for_event_by_user(self, event, context): - """Given an event and context, evaluate the push rules and return - the results + """Given an event and context, evaluate the push rules and insert the + results into the event_push_actions_staging table. Returns: - dict of user_id -> action + Deferred """ rules_by_user = yield self._get_rules_for_event(event, context) - actions_by_user = {} room_members = yield self.store.get_joined_users_from_context( event, context @@ -190,12 +189,10 @@ class BulkPushRuleEvaluator(object): if matches: actions = [x for x in rule['actions'] if x != 'dont_notify'] if actions and 'notify' in actions: - actions_by_user[uid] = actions yield self.store.add_push_actions_to_staging( event.event_id, uid, actions, ) break - defer.returnValue(actions_by_user) def _condition_checker(evaluator, conditions, uid, display_name, cache): diff --git a/synapse/storage/events.py b/synapse/storage/events.py index ca64aacb1c..52b7b34749 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1168,10 +1168,9 @@ class EventsStore(SQLBaseStore): for event, context in events_and_contexts: # Insert all the push actions into the event_push_actions table. - if context.push_actions: - self._set_push_actions_for_event_and_users_txn( - txn, event, - ) + self._set_push_actions_for_event_and_users_txn( + txn, event, + ) if event.type == EventTypes.Redaction and event.redacts is not None: # Remove the entries in the event_push_actions table for the diff --git a/tests/replication/slave/storage/test_events.py b/tests/replication/slave/storage/test_events.py index f430cce931..4780f2ab72 100644 --- a/tests/replication/slave/storage/test_events.py +++ b/tests/replication/slave/storage/test_events.py @@ -230,7 +230,10 @@ class SlavedEventStoreTestCase(BaseSlavedStoreTestCase): state_handler = self.hs.get_state_handler() context = yield state_handler.compute_event_context(event) - context.push_actions = push_actions + for user_id, actions in push_actions: + yield self.master_store.add_push_actions_to_staging( + event.event_id, user_id, actions, + ) ordering = None if backfill: From b96278d6fe499e47133d2d2e82b9d3a0074d7005 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Feb 2018 15:37:40 +0000 Subject: [PATCH 205/348] Ensure that we delete staging push actions on errors --- synapse/handlers/message.py | 12 +++++++++--- synapse/storage/event_push_actions.py | 16 ++++++++++++++++ 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 1c3ac03f20..d99d8049b3 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -683,9 +683,15 @@ class EventCreationHandler(object): event, context ) - (event_stream_id, max_stream_id) = yield self.store.persist_event( - event, context=context - ) + try: + (event_stream_id, max_stream_id) = yield self.store.persist_event( + event, context=context + ) + except: # noqa: E722, as we reraise the exception this is fine. + # Ensure that we actually remove the entries in the push actions + # staging area + preserve_fn(self.store.remove_push_actions_from_staging)(event.event_id) + raise # this intentionally does not yield: we don't care about the result # and don't need to wait for it. diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index 34ff9be731..28226455bf 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -782,6 +782,22 @@ class EventPushActionsStore(SQLBaseStore): desc="add_push_actions_to_staging", ) + def remove_push_actions_from_staging(self, event_id): + """Called if we failed to persist the event to ensure that stale push + actions don't build up in the DB + + Args: + event_id (str) + """ + + return self._simple_delete( + table="event_push_actions_staging", + keyvalues={ + "event_id": event_id, + }, + desc="remove_push_actions_from_staging", + ) + def _action_has_highlight(actions): for action in actions: From 3a061cae2626a3aeb680f9100f39c8c7ebf13fef Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Feb 2018 16:24:07 +0000 Subject: [PATCH 206/348] Fix unit test --- tests/storage/test_event_push_actions.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py index 3135488353..d483e7cf9e 100644 --- a/tests/storage/test_event_push_actions.py +++ b/tests/storage/test_event_push_actions.py @@ -62,6 +62,7 @@ class EventPushActionsStoreTestCase(tests.unittest.TestCase): {"notify_count": noitf_count, "highlight_count": highlight_count} ) + @defer.inlineCallbacks def _inject_actions(stream, action): event = Mock() event.room_id = room_id @@ -69,11 +70,12 @@ class EventPushActionsStoreTestCase(tests.unittest.TestCase): event.internal_metadata.stream_ordering = stream event.depth = stream - tuples = [(user_id, action)] - - return self.store.runInteraction( + yield self.store.add_push_actions_to_staging( + event.event_id, user_id, action, + ) + yield self.store.runInteraction( "", self.store._set_push_actions_for_event_and_users_txn, - event, tuples + event, ) def _rotate(stream): From 012e8e142a4ca7d87e1ffd66cce44b23bf943e9c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 16 Feb 2018 11:35:01 +0000 Subject: [PATCH 207/348] Comments --- synapse/push/bulk_push_rule_evaluator.py | 3 +++ synapse/storage/event_push_actions.py | 3 ++- synapse/storage/schema/delta/47/push_actions_staging.sql | 4 ++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 1140788aa7..bf4f1c5836 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -189,6 +189,9 @@ class BulkPushRuleEvaluator(object): if matches: actions = [x for x in rule['actions'] if x != 'dont_notify'] if actions and 'notify' in actions: + # Push rules say we should notify the user of this event, + # so we mark it in the DB in the staging area. (This + # will then get handled when we persist the event) yield self.store.add_push_actions_to_staging( event.event_id, uid, actions, ) diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index 28226455bf..ea56d4d065 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -762,7 +762,8 @@ class EventPushActionsStore(SQLBaseStore): Args: event_id (str) user_id (str) - actions (list) + actions (list[dict|str]): An action can either be a string or + dict. Returns: Deferred diff --git a/synapse/storage/schema/delta/47/push_actions_staging.sql b/synapse/storage/schema/delta/47/push_actions_staging.sql index ec4b1d7d42..edccf4a96f 100644 --- a/synapse/storage/schema/delta/47/push_actions_staging.sql +++ b/synapse/storage/schema/delta/47/push_actions_staging.sql @@ -13,6 +13,10 @@ * limitations under the License. */ +-- Temporary staging area for push actions that have been calculated for an +-- event, but the event hasn't yet been persisted. +-- When the event is persisted the rows are moved over to the +-- event_push_actions table. CREATE TABLE event_push_actions_staging ( event_id TEXT NOT NULL, user_id TEXT NOT NULL, From 6af025d3c4c19ab8a6f90b667b8c4259606ba47a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 16 Feb 2018 11:35:31 +0000 Subject: [PATCH 208/348] Fix typo of double is_highlight --- synapse/storage/event_push_actions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index ea56d4d065..f787431b7a 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -769,7 +769,7 @@ class EventPushActionsStore(SQLBaseStore): Deferred """ - is_highlight = is_highlight = 1 if _action_has_highlight(actions) else 0 + is_highlight = 1 if _action_has_highlight(actions) else 0 return self._simple_insert( table="event_push_actions_staging", From d1a3325f9969385ec6cb090f80c61ae576d38e42 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 16 Feb 2018 14:02:31 +0000 Subject: [PATCH 209/348] (Really) fix tablescan of event_push_actions on purge commit 278d21b5 added new code to avoid the tablescan, but didn't remove the old :/ --- synapse/storage/events.py | 1 - 1 file changed, 1 deletion(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 52b7b34749..73177e0bc2 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2309,7 +2309,6 @@ class EventsStore(SQLBaseStore): "event_edge_hashes", "event_edges", "event_forward_extremities", - "event_push_actions", "event_reference_hashes", "event_search", "event_signatures", From 3acd616979d59c6ce6a63a4f896390d058bceacc Mon Sep 17 00:00:00 2001 From: Pascal Bach Date: Sun, 18 Feb 2018 11:08:15 +0100 Subject: [PATCH 210/348] Update pynacl dependency to 1.2.1 or higher Signed-off-by: Pascal Bach --- synapse/python_dependencies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index 97b631e60d..5d65b5fd6e 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -24,7 +24,7 @@ REQUIREMENTS = { "unpaddedbase64>=1.1.0": ["unpaddedbase64>=1.1.0"], "canonicaljson>=1.0.0": ["canonicaljson>=1.0.0"], "signedjson>=1.0.0": ["signedjson>=1.0.0"], - "pynacl==0.3.0": ["nacl==0.3.0", "nacl.bindings"], + "pynacl>=1.2.1": ["nacl>=1.2.1", "nacl.bindings"], "service_identity>=1.0.0": ["service_identity>=1.0.0"], "Twisted>=16.0.0": ["twisted>=16.0.0"], "pyopenssl>=0.14": ["OpenSSL>=0.14"], From 6ff8c87484d13c00fddc87b0bcc3f4cd691c81ff Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 20 Feb 2018 11:30:54 +0000 Subject: [PATCH 211/348] Batch inserts into event_push_actions_staging --- synapse/push/bulk_push_rule_evaluator.py | 15 ++++--- synapse/storage/event_push_actions.py | 53 ++++++++++++++++-------- 2 files changed, 45 insertions(+), 23 deletions(-) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index bf4f1c5836..64e9a1da57 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -144,6 +144,7 @@ class BulkPushRuleEvaluator(object): Deferred """ rules_by_user = yield self._get_rules_for_event(event, context) + actions_by_user = {} room_members = yield self.store.get_joined_users_from_context( event, context @@ -189,14 +190,16 @@ class BulkPushRuleEvaluator(object): if matches: actions = [x for x in rule['actions'] if x != 'dont_notify'] if actions and 'notify' in actions: - # Push rules say we should notify the user of this event, - # so we mark it in the DB in the staging area. (This - # will then get handled when we persist the event) - yield self.store.add_push_actions_to_staging( - event.event_id, uid, actions, - ) + actions_by_user[uid] = actions break + # Push rules say we should notify the user of this event, + # so we mark it in the DB in the staging area. (This + # will then get handled when we persist the event) + yield self.store.add_push_actions_to_staging( + event.event_id, actions_by_user, + ) + def _condition_checker(evaluator, conditions, uid, display_name, cache): for cond in conditions: diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index f787431b7a..04e8836e6e 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -755,32 +755,51 @@ class EventPushActionsStore(SQLBaseStore): (rotate_to_stream_ordering,) ) - def add_push_actions_to_staging(self, event_id, user_id, actions): - """Add the push actions for the user and event to the push - action staging area. + def add_push_actions_to_staging(self, event_id, user_id_actions): + """Add the push actions for the event to the push action staging area. Args: event_id (str) - user_id (str) - actions (list[dict|str]): An action can either be a string or - dict. + user_id_actions (dict[str, list[dict|str])]): A dictionary mapping + user_id to list of push actions, where an action can either be + a string or dict. Returns: Deferred """ - is_highlight = 1 if _action_has_highlight(actions) else 0 + if not user_id_actions: + return - return self._simple_insert( - table="event_push_actions_staging", - values={ - "event_id": event_id, - "user_id": user_id, - "actions": _serialize_action(actions, is_highlight), - "notif": 1, - "highlight": is_highlight, - }, - desc="add_push_actions_to_staging", + # This is a helper function for generating the necessary tuple that + # can be used to inert into the `event_push_actions_staging` table. + def _gen_entry(user_id, actions): + is_highlight = 1 if _action_has_highlight(actions) else 0 + return ( + event_id, # event_id column + user_id, # user_id column + _serialize_action(actions, is_highlight), # actions column + 1, # notif column + is_highlight, # highlight column + ) + + def _add_push_actions_to_staging_txn(txn): + # We don't use _simple_insert_many here to avoid the overhead + # of generating lists of dicts. + + sql = """ + INSERT INTO event_push_actions_staging + (event_id, user_id, actions, notif, highlight) + VALUES (?, ?, ?, ?, ?) + """ + + txn.executemany(sql, ( + _gen_entry(user_id, actions) + for user_id, actions in user_id_actions.iteritems() + )) + + return self.runInteraction( + "add_push_actions_to_staging", _add_push_actions_to_staging_txn ) def remove_push_actions_from_staging(self, event_id): From d874d4f2d78956973d589bd32aa64d752d14cdc9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 20 Feb 2018 11:36:56 +0000 Subject: [PATCH 212/348] Delete from push_actions_staging in federation too --- synapse/handlers/federation.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 46bcf8b081..8832ba58bc 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1447,16 +1447,24 @@ class FederationHandler(BaseHandler): auth_events=auth_events, ) - if not event.internal_metadata.is_outlier() and not backfilled: - yield self.action_generator.handle_push_actions_for_event( - event, context - ) + try: + if not event.internal_metadata.is_outlier() and not backfilled: + yield self.action_generator.handle_push_actions_for_event( + event, context + ) - event_stream_id, max_stream_id = yield self.store.persist_event( - event, - context=context, - backfilled=backfilled, - ) + event_stream_id, max_stream_id = yield self.store.persist_event( + event, + context=context, + backfilled=backfilled, + ) + except: # noqa: E722, as we reraise the exception this is fine. + # Ensure that we actually remove the entries in the push actions + # staging area + logcontext.preserve_fn( + self.store.remove_push_actions_from_staging + )(event.event_id) + raise if not backfilled: # this intentionally does not yield: we don't care about the result From e440e2845642a27f15d073aea58ce20b2785f66e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 20 Feb 2018 11:41:40 +0000 Subject: [PATCH 213/348] Fix unit tests --- tests/replication/slave/storage/test_events.py | 10 ++++++---- tests/storage/test_event_push_actions.py | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/replication/slave/storage/test_events.py b/tests/replication/slave/storage/test_events.py index 4780f2ab72..cb058d3142 100644 --- a/tests/replication/slave/storage/test_events.py +++ b/tests/replication/slave/storage/test_events.py @@ -230,10 +230,12 @@ class SlavedEventStoreTestCase(BaseSlavedStoreTestCase): state_handler = self.hs.get_state_handler() context = yield state_handler.compute_event_context(event) - for user_id, actions in push_actions: - yield self.master_store.add_push_actions_to_staging( - event.event_id, user_id, actions, - ) + yield self.master_store.add_push_actions_to_staging( + event.event_id, { + user_id: actions + for user_id, actions in push_actions + }, + ) ordering = None if backfill: diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py index d483e7cf9e..550f9ba662 100644 --- a/tests/storage/test_event_push_actions.py +++ b/tests/storage/test_event_push_actions.py @@ -71,7 +71,7 @@ class EventPushActionsStoreTestCase(tests.unittest.TestCase): event.depth = stream yield self.store.add_push_actions_to_staging( - event.event_id, user_id, action, + event.event_id, {user_id: action}, ) yield self.store.runInteraction( "", self.store._set_push_actions_for_event_and_users_txn, From ad0ccf15ea35603c9453523acb3f43661fdbaa12 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 20 Feb 2018 12:29:50 +0000 Subject: [PATCH 214/348] Refactor _set_push_actions_for_event_and_users_txn to use events_and_contexts --- synapse/storage/event_push_actions.py | 65 +++++++++++++++------------ synapse/storage/events.py | 11 ++--- 2 files changed, 42 insertions(+), 34 deletions(-) diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index f787431b7a..dac3505480 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -88,11 +88,13 @@ class EventPushActionsStore(SQLBaseStore): self._rotate_notifs, 30 * 60 * 1000 ) - def _set_push_actions_for_event_and_users_txn(self, txn, event): - """ + def _set_push_actions_for_event_and_users_txn(self, txn, events_and_contexts): + """Handles moving push actions from staging table to main + event_push_actions table for all events in `events_and_contexts`. + Args: - event: the event set actions for - tuples: list of tuples of (user_id, actions) + events_and_contexts (list[(EventBase, EventContext)]): events + we are persisting """ sql = """ @@ -105,34 +107,39 @@ class EventPushActionsStore(SQLBaseStore): WHERE event_id = ? """ - txn.execute(sql, ( - event.room_id, event.internal_metadata.stream_ordering, - event.depth, event.event_id, - )) + if events_and_contexts: + txn.executemany(sql, ( + ( + event.room_id, event.internal_metadata.stream_ordering, + event.depth, event.event_id, + ) + for event, _ in events_and_contexts + )) - user_ids = self._simple_select_onecol_txn( - txn, - table="event_push_actions_staging", - keyvalues={ - "event_id": event.event_id, - }, - retcol="user_id", - ) - - self._simple_delete_txn( - txn, - table="event_push_actions_staging", - keyvalues={ - "event_id": event.event_id, - }, - ) - - for uid in user_ids: - txn.call_after( - self.get_unread_event_push_actions_by_room_for_user.invalidate_many, - (event.room_id, uid,) + for event, _ in events_and_contexts: + user_ids = self._simple_select_onecol_txn( + txn, + table="event_push_actions_staging", + keyvalues={ + "event_id": event.event_id, + }, + retcol="user_id", ) + for uid in user_ids: + txn.call_after( + self.get_unread_event_push_actions_by_room_for_user.invalidate_many, + (event.room_id, uid,) + ) + + txn.executemany( + "DELETE FROM event_push_actions_staging WHERE event_id = ?", + ( + (event.event_id,) + for event, _ in events_and_contexts + ) + ) + @cachedInlineCallbacks(num_args=3, tree=True, max_entries=5000) def get_unread_event_push_actions_by_room_for_user( self, room_id, user_id, last_read_event_id diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 73177e0bc2..c8b8abc2e7 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1162,16 +1162,17 @@ class EventsStore(SQLBaseStore): backfilled (bool): True if the events were backfilled """ + # Insert all the push actions into the event_push_actions table. + self._set_push_actions_for_event_and_users_txn( + txn, + events_and_contexts=events_and_contexts, + ) + if not events_and_contexts: # nothing to do here return for event, context in events_and_contexts: - # Insert all the push actions into the event_push_actions table. - self._set_push_actions_for_event_and_users_txn( - txn, event, - ) - if event.type == EventTypes.Redaction and event.redacts is not None: # Remove the entries in the event_push_actions table for the # redacted event. From 24087bffa932660cf4482b465b759f7161465f8a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 20 Feb 2018 12:33:04 +0000 Subject: [PATCH 215/348] Ensure all push actions are deleted from staging --- synapse/storage/event_push_actions.py | 10 +++++++++- synapse/storage/events.py | 11 ++++++++++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index dac3505480..6a122b05a8 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -88,13 +88,21 @@ class EventPushActionsStore(SQLBaseStore): self._rotate_notifs, 30 * 60 * 1000 ) - def _set_push_actions_for_event_and_users_txn(self, txn, events_and_contexts): + def _set_push_actions_for_event_and_users_txn(self, txn, events_and_contexts, + all_events_and_contexts): """Handles moving push actions from staging table to main event_push_actions table for all events in `events_and_contexts`. + Also ensures that all events in `all_events_and_contexts` are removed + from the push action staging area. + Args: events_and_contexts (list[(EventBase, EventContext)]): events we are persisting + all_events_and_contexts (list[(EventBase, EventContext)]): all + events that we were going to persist. This includes events + we've already persisted, etc, that wouldn't appear in + events_and_context. """ sql = """ diff --git a/synapse/storage/events.py b/synapse/storage/events.py index c8b8abc2e7..7f8561a0c4 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -693,6 +693,8 @@ class EventsStore(SQLBaseStore): list of the event ids which are the forward extremities. """ + all_events_and_contexts = events_and_contexts + max_stream_order = events_and_contexts[-1][0].internal_metadata.stream_ordering self._update_current_state_txn(txn, state_delta_for_room, max_stream_order) @@ -755,6 +757,7 @@ class EventsStore(SQLBaseStore): self._update_metadata_tables_txn( txn, events_and_contexts=events_and_contexts, + all_events_and_contexts=all_events_and_contexts, backfilled=backfilled, ) @@ -1152,13 +1155,18 @@ class EventsStore(SQLBaseStore): ec for ec in events_and_contexts if ec[0] not in to_remove ] - def _update_metadata_tables_txn(self, txn, events_and_contexts, backfilled): + def _update_metadata_tables_txn(self, txn, events_and_contexts, + all_events_and_contexts, backfilled): """Update all the miscellaneous tables for new events Args: txn (twisted.enterprise.adbapi.Connection): db connection events_and_contexts (list[(EventBase, EventContext)]): events we are persisting + all_events_and_contexts (list[(EventBase, EventContext)]): all + events that we were going to persist. This includes events + we've already persisted, etc, that wouldn't appear in + events_and_context. backfilled (bool): True if the events were backfilled """ @@ -1166,6 +1174,7 @@ class EventsStore(SQLBaseStore): self._set_push_actions_for_event_and_users_txn( txn, events_and_contexts=events_and_contexts, + all_events_and_contexts=all_events_and_contexts, ) if not events_and_contexts: From 25634ed1523b88f82edaa73124f3afd057fbb16d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 20 Feb 2018 12:40:44 +0000 Subject: [PATCH 216/348] Fix test --- tests/storage/test_event_push_actions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py index d483e7cf9e..dc90e5c243 100644 --- a/tests/storage/test_event_push_actions.py +++ b/tests/storage/test_event_push_actions.py @@ -75,7 +75,7 @@ class EventPushActionsStoreTestCase(tests.unittest.TestCase): ) yield self.store.runInteraction( "", self.store._set_push_actions_for_event_and_users_txn, - event, + [(event, None)], [(event, None)], ) def _rotate(stream): From f5ac4dc2d46d329e7053259c61ad402269903ee3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 16 Feb 2018 12:17:14 +0000 Subject: [PATCH 217/348] Split ReceiptsStore --- synapse/replication/slave/storage/receipts.py | 33 +----- synapse/storage/__init__.py | 3 - synapse/storage/receipts.py | 109 ++++++++++-------- 3 files changed, 69 insertions(+), 76 deletions(-) diff --git a/synapse/replication/slave/storage/receipts.py b/synapse/replication/slave/storage/receipts.py index b371574ece..4e845ec041 100644 --- a/synapse/replication/slave/storage/receipts.py +++ b/synapse/replication/slave/storage/receipts.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,9 +17,7 @@ from ._base import BaseSlavedStore from ._slaved_id_tracker import SlavedIdTracker -from synapse.storage import DataStore -from synapse.storage.receipts import ReceiptsStore -from synapse.util.caches.stream_change_cache import StreamChangeCache +from synapse.storage.receipts import ReceiptsWorkerStore # So, um, we want to borrow a load of functions intended for reading from # a DataStore, but we don't want to take functions that either write to the @@ -29,36 +28,14 @@ from synapse.util.caches.stream_change_cache import StreamChangeCache # the method descriptor on the DataStore and chuck them into our class. -class SlavedReceiptsStore(BaseSlavedStore): +class SlavedReceiptsStore(ReceiptsWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): - super(SlavedReceiptsStore, self).__init__(db_conn, hs) - - self._receipts_id_gen = SlavedIdTracker( + receipts_id_gen = SlavedIdTracker( db_conn, "receipts_linearized", "stream_id" ) - self._receipts_stream_cache = StreamChangeCache( - "ReceiptsRoomChangeCache", self._receipts_id_gen.get_current_token() - ) - - get_receipts_for_user = ReceiptsStore.__dict__["get_receipts_for_user"] - get_linearized_receipts_for_room = ( - ReceiptsStore.__dict__["get_linearized_receipts_for_room"] - ) - _get_linearized_receipts_for_rooms = ( - ReceiptsStore.__dict__["_get_linearized_receipts_for_rooms"] - ) - get_last_receipt_event_id_for_user = ( - ReceiptsStore.__dict__["get_last_receipt_event_id_for_user"] - ) - - get_max_receipt_stream_id = DataStore.get_max_receipt_stream_id.__func__ - get_all_updated_receipts = DataStore.get_all_updated_receipts.__func__ - - get_linearized_receipts_for_rooms = ( - DataStore.get_linearized_receipts_for_rooms.__func__ - ) + super(SlavedReceiptsStore, self).__init__(receipts_id_gen, db_conn, hs) def stream_positions(self): result = super(SlavedReceiptsStore, self).stream_positions() diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index f8fbd02ceb..e1c4fe086e 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -104,9 +104,6 @@ class DataStore(RoomMemberStore, RoomStore, db_conn, "events", "stream_ordering", step=-1, extra_tables=[("ex_outlier_stream", "event_stream_ordering")] ) - self._receipts_id_gen = StreamIdGenerator( - db_conn, "receipts_linearized", "stream_id" - ) self._account_data_id_gen = StreamIdGenerator( db_conn, "account_data_max_stream_id", "stream_id" ) diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index 12b3cc7f5f..aa62474a46 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,6 +15,7 @@ # limitations under the License. from ._base import SQLBaseStore +from .util.id_generators import StreamIdGenerator from synapse.util.caches.descriptors import cachedInlineCallbacks, cachedList, cached from synapse.util.caches.stream_change_cache import StreamChangeCache @@ -26,9 +28,17 @@ import ujson as json logger = logging.getLogger(__name__) -class ReceiptsStore(SQLBaseStore): - def __init__(self, db_conn, hs): - super(ReceiptsStore, self).__init__(db_conn, hs) +class ReceiptsWorkerStore(SQLBaseStore): + def __init__(self, receipts_id_gen, db_conn, hs): + """ + Args: + receipts_id_gen (StreamIdGenerator|SlavedIdTracker) + db_conn: Database connection + hs (Homeserver) + """ + super(ReceiptsWorkerStore, self).__init__(db_conn, hs) + + self._receipts_id_gen = receipts_id_gen self._receipts_stream_cache = StreamChangeCache( "ReceiptsRoomChangeCache", self._receipts_id_gen.get_current_token() @@ -39,26 +49,6 @@ class ReceiptsStore(SQLBaseStore): receipts = yield self.get_receipts_for_room(room_id, "m.read") defer.returnValue(set(r['user_id'] for r in receipts)) - def _invalidate_get_users_with_receipts_in_room(self, room_id, receipt_type, - user_id): - if receipt_type != "m.read": - return - - # Returns an ObservableDeferred - res = self.get_users_with_read_receipts_in_room.cache.get( - room_id, None, update_metrics=False, - ) - - if res: - if isinstance(res, defer.Deferred) and res.called: - res = res.result - if user_id in res: - # We'd only be adding to the set, so no point invalidating if the - # user is already there - return - - self.get_users_with_read_receipts_in_room.invalidate((room_id,)) - @cached(num_args=2) def get_receipts_for_room(self, room_id, receipt_type): return self._simple_select_list( @@ -273,6 +263,57 @@ class ReceiptsStore(SQLBaseStore): def get_max_receipt_stream_id(self): return self._receipts_id_gen.get_current_token() + def get_all_updated_receipts(self, last_id, current_id, limit=None): + if last_id == current_id: + return defer.succeed([]) + + def get_all_updated_receipts_txn(txn): + sql = ( + "SELECT stream_id, room_id, receipt_type, user_id, event_id, data" + " FROM receipts_linearized" + " WHERE ? < stream_id AND stream_id <= ?" + " ORDER BY stream_id ASC" + ) + args = [last_id, current_id] + if limit is not None: + sql += " LIMIT ?" + args.append(limit) + txn.execute(sql, args) + + return txn.fetchall() + return self.runInteraction( + "get_all_updated_receipts", get_all_updated_receipts_txn + ) + + +class ReceiptsStore(ReceiptsWorkerStore): + def __init__(self, db_conn, hs): + receipts_id_gen = StreamIdGenerator( + db_conn, "receipts_linearized", "stream_id" + ) + + super(ReceiptsStore, self).__init__(receipts_id_gen, db_conn, hs) + + def _invalidate_get_users_with_receipts_in_room(self, room_id, receipt_type, + user_id): + if receipt_type != "m.read": + return + + # Returns an ObservableDeferred + res = self.get_users_with_read_receipts_in_room.cache.get( + room_id, None, update_metrics=False, + ) + + if res: + if isinstance(res, defer.Deferred) and res.called: + res = res.result + if user_id in res: + # We'd only be adding to the set, so no point invalidating if the + # user is already there + return + + self.get_users_with_read_receipts_in_room.invalidate((room_id,)) + def insert_linearized_receipt_txn(self, txn, room_id, receipt_type, user_id, event_id, data, stream_id): txn.call_after( @@ -457,25 +498,3 @@ class ReceiptsStore(SQLBaseStore): "data": json.dumps(data), } ) - - def get_all_updated_receipts(self, last_id, current_id, limit=None): - if last_id == current_id: - return defer.succeed([]) - - def get_all_updated_receipts_txn(txn): - sql = ( - "SELECT stream_id, room_id, receipt_type, user_id, event_id, data" - " FROM receipts_linearized" - " WHERE ? < stream_id AND stream_id <= ?" - " ORDER BY stream_id ASC" - ) - args = [last_id, current_id] - if limit is not None: - sql += " LIMIT ?" - args.append(limit) - txn.execute(sql, args) - - return txn.fetchall() - return self.runInteraction( - "get_all_updated_receipts", get_all_updated_receipts_txn - ) From e316bbb4c07cad97c4cff5bc0c5b0dc2cd7bc519 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 20 Feb 2018 17:33:18 +0000 Subject: [PATCH 218/348] Use abstract base class to access stream IDs --- synapse/replication/slave/storage/receipts.py | 9 +++- synapse/storage/receipts.py | 42 ++++++++++++------- 2 files changed, 34 insertions(+), 17 deletions(-) diff --git a/synapse/replication/slave/storage/receipts.py b/synapse/replication/slave/storage/receipts.py index 4e845ec041..a2eb4a02db 100644 --- a/synapse/replication/slave/storage/receipts.py +++ b/synapse/replication/slave/storage/receipts.py @@ -31,11 +31,16 @@ from synapse.storage.receipts import ReceiptsWorkerStore class SlavedReceiptsStore(ReceiptsWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): - receipts_id_gen = SlavedIdTracker( + # We instansiate this first as the ReceiptsWorkerStore constructor + # needs to be able to call get_max_receipt_stream_id + self._receipts_id_gen = SlavedIdTracker( db_conn, "receipts_linearized", "stream_id" ) - super(SlavedReceiptsStore, self).__init__(receipts_id_gen, db_conn, hs) + super(SlavedReceiptsStore, self).__init__(db_conn, hs) + + def get_max_receipt_stream_id(self): + return self._receipts_id_gen.get_current_token() def stream_positions(self): result = super(SlavedReceiptsStore, self).stream_positions() diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index aa62474a46..b11cf7ff62 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -21,6 +21,7 @@ from synapse.util.caches.stream_change_cache import StreamChangeCache from twisted.internet import defer +import abc import logging import ujson as json @@ -29,21 +30,30 @@ logger = logging.getLogger(__name__) class ReceiptsWorkerStore(SQLBaseStore): - def __init__(self, receipts_id_gen, db_conn, hs): - """ - Args: - receipts_id_gen (StreamIdGenerator|SlavedIdTracker) - db_conn: Database connection - hs (Homeserver) - """ + """This is an abstract base class where subclasses must implement + `get_max_receipt_stream_id` which can be called in the initializer. + """ + + # This ABCMeta metaclass ensures that we cannot be instantiated without + # the abstract methods being implemented. + __metaclass__ = abc.ABCMeta + + def __init__(self, db_conn, hs): super(ReceiptsWorkerStore, self).__init__(db_conn, hs) - self._receipts_id_gen = receipts_id_gen - self._receipts_stream_cache = StreamChangeCache( - "ReceiptsRoomChangeCache", self._receipts_id_gen.get_current_token() + "ReceiptsRoomChangeCache", self.get_max_receipt_stream_id() ) + @abc.abstractmethod + def get_max_receipt_stream_id(self): + """Get the current max stream ID for receipts stream + + Returns: + int + """ + pass + @cachedInlineCallbacks() def get_users_with_read_receipts_in_room(self, room_id): receipts = yield self.get_receipts_for_room(room_id, "m.read") @@ -260,9 +270,6 @@ class ReceiptsWorkerStore(SQLBaseStore): } defer.returnValue(results) - def get_max_receipt_stream_id(self): - return self._receipts_id_gen.get_current_token() - def get_all_updated_receipts(self, last_id, current_id, limit=None): if last_id == current_id: return defer.succeed([]) @@ -288,11 +295,16 @@ class ReceiptsWorkerStore(SQLBaseStore): class ReceiptsStore(ReceiptsWorkerStore): def __init__(self, db_conn, hs): - receipts_id_gen = StreamIdGenerator( + # We instansiate this first as the ReceiptsWorkerStore constructor + # needs to be able to call get_max_receipt_stream_id + self._receipts_id_gen = StreamIdGenerator( db_conn, "receipts_linearized", "stream_id" ) - super(ReceiptsStore, self).__init__(receipts_id_gen, db_conn, hs) + super(ReceiptsStore, self).__init__(db_conn, hs) + + def get_max_receipt_stream_id(self): + return self._receipts_id_gen.get_current_token() def _invalidate_get_users_with_receipts_in_room(self, room_id, receipt_type, user_id): From 95e4cffd859b0fc3fcd54c755fcc0da403f97b94 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 20 Feb 2018 17:58:40 +0000 Subject: [PATCH 219/348] Fix comment --- synapse/replication/slave/storage/receipts.py | 2 +- synapse/storage/receipts.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/replication/slave/storage/receipts.py b/synapse/replication/slave/storage/receipts.py index a2eb4a02db..f0e29e9836 100644 --- a/synapse/replication/slave/storage/receipts.py +++ b/synapse/replication/slave/storage/receipts.py @@ -31,7 +31,7 @@ from synapse.storage.receipts import ReceiptsWorkerStore class SlavedReceiptsStore(ReceiptsWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): - # We instansiate this first as the ReceiptsWorkerStore constructor + # We instantiate this first as the ReceiptsWorkerStore constructor # needs to be able to call get_max_receipt_stream_id self._receipts_id_gen = SlavedIdTracker( db_conn, "receipts_linearized", "stream_id" diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index b11cf7ff62..c2a6613a62 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -295,7 +295,7 @@ class ReceiptsWorkerStore(SQLBaseStore): class ReceiptsStore(ReceiptsWorkerStore): def __init__(self, db_conn, hs): - # We instansiate this first as the ReceiptsWorkerStore constructor + # We instantiate this first as the ReceiptsWorkerStore constructor # needs to be able to call get_max_receipt_stream_id self._receipts_id_gen = StreamIdGenerator( db_conn, "receipts_linearized", "stream_id" From 8fbb4d0d19031d8cd3742285fb9b36c3bdfc52a0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 20 Feb 2018 17:59:23 +0000 Subject: [PATCH 220/348] Raise exception in abstract method --- synapse/storage/receipts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index c2a6613a62..40530632c6 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -52,7 +52,7 @@ class ReceiptsWorkerStore(SQLBaseStore): Returns: int """ - pass + raise NotImplementedError() @cachedInlineCallbacks() def get_users_with_read_receipts_in_room(self, room_id): From ca9b9d970374766f7b53dc5b1762cfa1b0e1a5dd Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 16 Feb 2018 12:08:42 +0000 Subject: [PATCH 221/348] Split AccountDataStore and TagStore --- .../replication/slave/storage/account_data.py | 38 +--------- synapse/storage/__init__.py | 8 -- synapse/storage/account_data.py | 75 +++++++++++++++---- synapse/storage/tags.py | 15 ++-- 4 files changed, 69 insertions(+), 67 deletions(-) diff --git a/synapse/replication/slave/storage/account_data.py b/synapse/replication/slave/storage/account_data.py index efbd87918e..6c95261aa5 100644 --- a/synapse/replication/slave/storage/account_data.py +++ b/synapse/replication/slave/storage/account_data.py @@ -15,48 +15,18 @@ from ._base import BaseSlavedStore from ._slaved_id_tracker import SlavedIdTracker -from synapse.storage import DataStore -from synapse.storage.account_data import AccountDataStore -from synapse.storage.tags import TagsStore -from synapse.util.caches.stream_change_cache import StreamChangeCache +from synapse.storage.account_data import AccountDataWorkerStore +from synapse.storage.tags import TagsWorkerStore -class SlavedAccountDataStore(BaseSlavedStore): +class SlavedAccountDataStore(TagsWorkerStore, AccountDataWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): - super(SlavedAccountDataStore, self).__init__(db_conn, hs) self._account_data_id_gen = SlavedIdTracker( db_conn, "account_data_max_stream_id", "stream_id", ) - self._account_data_stream_cache = StreamChangeCache( - "AccountDataAndTagsChangeCache", - self._account_data_id_gen.get_current_token(), - ) - get_account_data_for_user = ( - AccountDataStore.__dict__["get_account_data_for_user"] - ) - - get_global_account_data_by_type_for_users = ( - AccountDataStore.__dict__["get_global_account_data_by_type_for_users"] - ) - - get_global_account_data_by_type_for_user = ( - AccountDataStore.__dict__["get_global_account_data_by_type_for_user"] - ) - - get_tags_for_user = TagsStore.__dict__["get_tags_for_user"] - get_tags_for_room = ( - DataStore.get_tags_for_room.__func__ - ) - get_account_data_for_room = ( - DataStore.get_account_data_for_room.__func__ - ) - - get_updated_tags = DataStore.get_updated_tags.__func__ - get_updated_account_data_for_user = ( - DataStore.get_updated_account_data_for_user.__func__ - ) + super(SlavedAccountDataStore, self).__init__(db_conn, hs) def get_max_account_data_stream_id(self): return self._account_data_id_gen.get_current_token() diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index e1c4fe086e..e221284ee2 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -104,9 +104,6 @@ class DataStore(RoomMemberStore, RoomStore, db_conn, "events", "stream_ordering", step=-1, extra_tables=[("ex_outlier_stream", "event_stream_ordering")] ) - self._account_data_id_gen = StreamIdGenerator( - db_conn, "account_data_max_stream_id", "stream_id" - ) self._presence_id_gen = StreamIdGenerator( db_conn, "presence_stream", "stream_id" ) @@ -159,11 +156,6 @@ class DataStore(RoomMemberStore, RoomStore, "MembershipStreamChangeCache", events_max, ) - account_max = self._account_data_id_gen.get_current_token() - self._account_data_stream_cache = StreamChangeCache( - "AccountDataAndTagsChangeCache", account_max, - ) - self._presence_on_startup = self._get_active_presence(db_conn) presence_cache_prefill, min_presence_val = self._get_cache_dict( diff --git a/synapse/storage/account_data.py b/synapse/storage/account_data.py index 56a0bde549..66fed4bdcf 100644 --- a/synapse/storage/account_data.py +++ b/synapse/storage/account_data.py @@ -13,18 +13,46 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ._base import SQLBaseStore from twisted.internet import defer +from ._base import SQLBaseStore +from .util.id_generators import StreamIdGenerator + +from synapse.util.caches.stream_change_cache import StreamChangeCache from synapse.util.caches.descriptors import cached, cachedList, cachedInlineCallbacks +import abc import ujson as json import logging logger = logging.getLogger(__name__) -class AccountDataStore(SQLBaseStore): +class AccountDataWorkerStore(SQLBaseStore): + """This is an abstract base class where subclasses must implement + `get_max_account_data_stream_id` which can be called in the initializer. + """ + + # This ABCMeta metaclass ensures that we cannot be instantiated without + # the abstract methods being implemented. + __metaclass__ = abc.ABCMeta + + def __init__(self, db_conn, hs): + account_max = self.get_max_account_data_stream_id() + self._account_data_stream_cache = StreamChangeCache( + "AccountDataAndTagsChangeCache", account_max, + ) + + super(AccountDataWorkerStore, self).__init__(db_conn, hs) + + @abc.abstractmethod + def get_max_account_data_stream_id(self): + """Get the current max stream ID for account data stream + + Returns: + int + """ + raise NotImplementedError() @cached() def get_account_data_for_user(self, user_id): @@ -209,6 +237,36 @@ class AccountDataStore(SQLBaseStore): "get_updated_account_data_for_user", get_updated_account_data_for_user_txn ) + @cachedInlineCallbacks(num_args=2, cache_context=True, max_entries=5000) + def is_ignored_by(self, ignored_user_id, ignorer_user_id, cache_context): + ignored_account_data = yield self.get_global_account_data_by_type_for_user( + "m.ignored_user_list", ignorer_user_id, + on_invalidate=cache_context.invalidate, + ) + if not ignored_account_data: + defer.returnValue(False) + + defer.returnValue( + ignored_user_id in ignored_account_data.get("ignored_users", {}) + ) + + +class AccountDataStore(AccountDataWorkerStore): + def __init__(self, db_conn, hs): + self._account_data_id_gen = StreamIdGenerator( + db_conn, "account_data_max_stream_id", "stream_id" + ) + + super(AccountDataStore, self).__init__(db_conn, hs) + + def get_max_account_data_stream_id(self): + """Get the current max stream id for the private user data stream + + Returns: + A deferred int. + """ + return self._account_data_id_gen.get_current_token() + @defer.inlineCallbacks def add_account_data_to_room(self, user_id, room_id, account_data_type, content): """Add some account_data to a room for a user. @@ -321,16 +379,3 @@ class AccountDataStore(SQLBaseStore): "update_account_data_max_stream_id", _update, ) - - @cachedInlineCallbacks(num_args=2, cache_context=True, max_entries=5000) - def is_ignored_by(self, ignored_user_id, ignorer_user_id, cache_context): - ignored_account_data = yield self.get_global_account_data_by_type_for_user( - "m.ignored_user_list", ignorer_user_id, - on_invalidate=cache_context.invalidate, - ) - if not ignored_account_data: - defer.returnValue(False) - - defer.returnValue( - ignored_user_id in ignored_account_data.get("ignored_users", {}) - ) diff --git a/synapse/storage/tags.py b/synapse/storage/tags.py index bff73f3f04..484d66991a 100644 --- a/synapse/storage/tags.py +++ b/synapse/storage/tags.py @@ -13,7 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ._base import SQLBaseStore +from .account_data import AccountDataWorkerStore + from synapse.util.caches.descriptors import cached from twisted.internet import defer @@ -23,15 +24,7 @@ import logging logger = logging.getLogger(__name__) -class TagsStore(SQLBaseStore): - def get_max_account_data_stream_id(self): - """Get the current max stream id for the private user data stream - - Returns: - A deferred int. - """ - return self._account_data_id_gen.get_current_token() - +class TagsWorkerStore(AccountDataWorkerStore): @cached() def get_tags_for_user(self, user_id): """Get all the tags for a user. @@ -170,6 +163,8 @@ class TagsStore(SQLBaseStore): row["tag"]: json.loads(row["content"]) for row in rows }) + +class TagsStore(TagsWorkerStore): @defer.inlineCallbacks def add_tag_to_room(self, user_id, room_id, tag, content): """Add a tag to a room for a user. From cbaad969f951c2db067ced709154151a7dfaf6f8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 21 Feb 2018 10:39:27 +0000 Subject: [PATCH 222/348] Split PushRulesStore --- .../replication/slave/storage/push_rule.py | 23 ++---- synapse/storage/__init__.py | 12 ---- synapse/storage/push_rule.py | 71 ++++++++++++++----- 3 files changed, 61 insertions(+), 45 deletions(-) diff --git a/synapse/replication/slave/storage/push_rule.py b/synapse/replication/slave/storage/push_rule.py index 83e880fdd2..0e3d9a87dc 100644 --- a/synapse/replication/slave/storage/push_rule.py +++ b/synapse/replication/slave/storage/push_rule.py @@ -15,29 +15,15 @@ from .events import SlavedEventStore from ._slaved_id_tracker import SlavedIdTracker -from synapse.storage import DataStore -from synapse.storage.push_rule import PushRuleStore -from synapse.util.caches.stream_change_cache import StreamChangeCache +from synapse.storage.push_rule import PushRulesWorkerStore -class SlavedPushRuleStore(SlavedEventStore): +class SlavedPushRuleStore(PushRulesWorkerStore, SlavedEventStore): def __init__(self, db_conn, hs): - super(SlavedPushRuleStore, self).__init__(db_conn, hs) self._push_rules_stream_id_gen = SlavedIdTracker( db_conn, "push_rules_stream", "stream_id", ) - self.push_rules_stream_cache = StreamChangeCache( - "PushRulesStreamChangeCache", - self._push_rules_stream_id_gen.get_current_token(), - ) - - get_push_rules_for_user = PushRuleStore.__dict__["get_push_rules_for_user"] - get_push_rules_enabled_for_user = ( - PushRuleStore.__dict__["get_push_rules_enabled_for_user"] - ) - have_push_rules_changed_for_user = ( - DataStore.have_push_rules_changed_for_user.__func__ - ) + super(SlavedPushRuleStore, self).__init__(db_conn, hs) def get_push_rules_stream_token(self): return ( @@ -45,6 +31,9 @@ class SlavedPushRuleStore(SlavedEventStore): self._stream_id_gen.get_current_token(), ) + def get_max_push_rules_stream_id(self): + return self._push_rules_stream_id_gen.get_current_token() + def stream_positions(self): result = super(SlavedPushRuleStore, self).stream_positions() result["push_rules"] = self._push_rules_stream_id_gen.get_current_token() diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index e1c4fe086e..cd2759858c 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -177,18 +177,6 @@ class DataStore(RoomMemberStore, RoomStore, prefilled_cache=presence_cache_prefill ) - push_rules_prefill, push_rules_id = self._get_cache_dict( - db_conn, "push_rules_stream", - entity_column="user_id", - stream_column="stream_id", - max_value=self._push_rules_stream_id_gen.get_current_token()[0], - ) - - self.push_rules_stream_cache = StreamChangeCache( - "PushRulesStreamChangeCache", push_rules_id, - prefilled_cache=push_rules_prefill, - ) - max_device_inbox_id = self._device_inbox_id_gen.get_current_token() device_inbox_prefill, min_device_inbox_id = self._get_cache_dict( db_conn, "device_inbox", diff --git a/synapse/storage/push_rule.py b/synapse/storage/push_rule.py index 8758b1c0c7..b35bd7a644 100644 --- a/synapse/storage/push_rule.py +++ b/synapse/storage/push_rule.py @@ -15,10 +15,12 @@ from ._base import SQLBaseStore from synapse.util.caches.descriptors import cachedInlineCallbacks, cachedList +from synapse.util.caches.stream_change_cache import StreamChangeCache from synapse.push.baserules import list_with_base_rules from synapse.api.constants import EventTypes from twisted.internet import defer +import abc import logging import simplejson as json @@ -48,7 +50,39 @@ def _load_rules(rawrules, enabled_map): return rules -class PushRuleStore(SQLBaseStore): +class PushRulesWorkerStore(SQLBaseStore): + """This is an abstract base class where subclasses must implement + `get_max_push_rules_stream_id` which can be called in the initializer. + """ + + # This ABCMeta metaclass ensures that we cannot be instantiated without + # the abstract methods being implemented. + __metaclass__ = abc.ABCMeta + + def __init__(self, db_conn, hs): + super(PushRulesWorkerStore, self).__init__(db_conn, hs) + + push_rules_prefill, push_rules_id = self._get_cache_dict( + db_conn, "push_rules_stream", + entity_column="user_id", + stream_column="stream_id", + max_value=self.get_max_push_rules_stream_id(), + ) + + self.push_rules_stream_cache = StreamChangeCache( + "PushRulesStreamChangeCache", push_rules_id, + prefilled_cache=push_rules_prefill, + ) + + @abc.abstractmethod + def get_max_push_rules_stream_id(self): + """Get the position of the push rules stream. + + Returns: + int + """ + raise NotImplementedError() + @cachedInlineCallbacks(max_entries=5000) def get_push_rules_for_user(self, user_id): rows = yield self._simple_select_list( @@ -89,6 +123,24 @@ class PushRuleStore(SQLBaseStore): r['rule_id']: False if r['enabled'] == 0 else True for r in results }) + def have_push_rules_changed_for_user(self, user_id, last_id): + if not self.push_rules_stream_cache.has_entity_changed(user_id, last_id): + return defer.succeed(False) + else: + def have_push_rules_changed_txn(txn): + sql = ( + "SELECT COUNT(stream_id) FROM push_rules_stream" + " WHERE user_id = ? AND ? < stream_id" + ) + txn.execute(sql, (user_id, last_id)) + count, = txn.fetchone() + return bool(count) + return self.runInteraction( + "have_push_rules_changed", have_push_rules_changed_txn + ) + + +class PushRuleStore(PushRulesWorkerStore): @cachedList(cached_method_name="get_push_rules_for_user", list_name="user_ids", num_args=1, inlineCallbacks=True) def bulk_get_push_rules(self, user_ids): @@ -526,21 +578,8 @@ class PushRuleStore(SQLBaseStore): room stream ordering it corresponds to.""" return self._push_rules_stream_id_gen.get_current_token() - def have_push_rules_changed_for_user(self, user_id, last_id): - if not self.push_rules_stream_cache.has_entity_changed(user_id, last_id): - return defer.succeed(False) - else: - def have_push_rules_changed_txn(txn): - sql = ( - "SELECT COUNT(stream_id) FROM push_rules_stream" - " WHERE user_id = ? AND ? < stream_id" - ) - txn.execute(sql, (user_id, last_id)) - count, = txn.fetchone() - return bool(count) - return self.runInteraction( - "have_push_rules_changed", have_push_rules_changed_txn - ) + def get_max_push_rules_stream_id(self): + return self.get_push_rules_stream_token()[0] class RuleNotFoundException(Exception): From 6f727653717d9fc32f5676f4d991fbbe85c1c812 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 21 Feb 2018 10:54:18 +0000 Subject: [PATCH 223/348] Split PusherStore --- synapse/replication/slave/storage/pushers.py | 11 ++--------- synapse/storage/pusher.py | 10 ++++++---- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/synapse/replication/slave/storage/pushers.py b/synapse/replication/slave/storage/pushers.py index 4e8d68ece9..e352c3235d 100644 --- a/synapse/replication/slave/storage/pushers.py +++ b/synapse/replication/slave/storage/pushers.py @@ -16,10 +16,10 @@ from ._base import BaseSlavedStore from ._slaved_id_tracker import SlavedIdTracker -from synapse.storage import DataStore +from synapse.storage.pusher import PusherWorkerStore -class SlavedPusherStore(BaseSlavedStore): +class SlavedPusherStore(PusherWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): super(SlavedPusherStore, self).__init__(db_conn, hs) @@ -28,13 +28,6 @@ class SlavedPusherStore(BaseSlavedStore): extra_tables=[("deleted_pushers", "stream_id")], ) - get_all_pushers = DataStore.get_all_pushers.__func__ - get_pushers_by = DataStore.get_pushers_by.__func__ - get_pushers_by_app_id_and_pushkey = ( - DataStore.get_pushers_by_app_id_and_pushkey.__func__ - ) - _decode_pushers_rows = DataStore._decode_pushers_rows.__func__ - def stream_positions(self): result = super(SlavedPusherStore, self).stream_positions() result["pushers"] = self._pushers_id_gen.get_current_token() diff --git a/synapse/storage/pusher.py b/synapse/storage/pusher.py index 3d8b4d5d5b..b0159c70c0 100644 --- a/synapse/storage/pusher.py +++ b/synapse/storage/pusher.py @@ -27,7 +27,7 @@ import types logger = logging.getLogger(__name__) -class PusherStore(SQLBaseStore): +class PusherWorkerStore(SQLBaseStore): def _decode_pushers_rows(self, rows): for r in rows: dataJson = r['data'] @@ -102,9 +102,6 @@ class PusherStore(SQLBaseStore): rows = yield self.runInteraction("get_all_pushers", get_pushers) defer.returnValue(rows) - def get_pushers_stream_token(self): - return self._pushers_id_gen.get_current_token() - def get_all_updated_pushers(self, last_id, current_id, limit): if last_id == current_id: return defer.succeed(([], [])) @@ -177,6 +174,11 @@ class PusherStore(SQLBaseStore): "get_all_updated_pushers_rows", get_all_updated_pushers_rows_txn ) + +class PusherStore(PusherWorkerStore): + def get_pushers_stream_token(self): + return self._pushers_id_gen.get_current_token() + @cachedInlineCallbacks(num_args=1, max_entries=15000) def get_if_user_has_pusher(self, user_id): # This only exists for the cachedList decorator From d15d237b0de69d9e8ea533060df9a8bb844b9a4d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 21 Feb 2018 11:01:13 +0000 Subject: [PATCH 224/348] Split out EventPushActionWorkerStore --- synapse/replication/slave/storage/events.py | 23 +--- synapse/storage/event_push_actions.py | 144 ++++++++++---------- 2 files changed, 76 insertions(+), 91 deletions(-) diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index f8c164b48b..c5d6c6bd86 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -17,7 +17,7 @@ import logging from synapse.api.constants import EventTypes from synapse.storage import DataStore from synapse.storage.event_federation import EventFederationStore -from synapse.storage.event_push_actions import EventPushActionsStore +from synapse.storage.event_push_actions import EventPushActionsWorkerStore from synapse.storage.roommember import RoomMemberStore from synapse.storage.state import StateGroupWorkerStore from synapse.storage.stream import StreamStore @@ -38,7 +38,8 @@ logger = logging.getLogger(__name__) # the method descriptor on the DataStore and chuck them into our class. -class SlavedEventStore(StateGroupWorkerStore, BaseSlavedStore): +class SlavedEventStore(EventPushActionsWorkerStore, StateGroupWorkerStore, + BaseSlavedStore): def __init__(self, db_conn, hs): super(SlavedEventStore, self).__init__(db_conn, hs) @@ -80,30 +81,12 @@ class SlavedEventStore(StateGroupWorkerStore, BaseSlavedStore): get_invited_rooms_for_user = RoomMemberStore.__dict__[ "get_invited_rooms_for_user" ] - get_unread_event_push_actions_by_room_for_user = ( - EventPushActionsStore.__dict__["get_unread_event_push_actions_by_room_for_user"] - ) - _get_unread_counts_by_receipt_txn = ( - DataStore._get_unread_counts_by_receipt_txn.__func__ - ) - _get_unread_counts_by_pos_txn = ( - DataStore._get_unread_counts_by_pos_txn.__func__ - ) get_recent_event_ids_for_room = ( StreamStore.__dict__["get_recent_event_ids_for_room"] ) _get_joined_hosts_cache = RoomMemberStore.__dict__["_get_joined_hosts_cache"] has_room_changed_since = DataStore.has_room_changed_since.__func__ - get_unread_push_actions_for_user_in_range_for_http = ( - DataStore.get_unread_push_actions_for_user_in_range_for_http.__func__ - ) - get_unread_push_actions_for_user_in_range_for_email = ( - DataStore.get_unread_push_actions_for_user_in_range_for_email.__func__ - ) - get_push_action_users_in_range = ( - DataStore.get_push_action_users_in_range.__func__ - ) get_event = DataStore.get_event.__func__ get_events = DataStore.get_events.__func__ get_rooms_for_user_where_membership_is = ( diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index f787431b7a..124583835d 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -62,77 +62,7 @@ def _deserialize_action(actions, is_highlight): return DEFAULT_NOTIF_ACTION -class EventPushActionsStore(SQLBaseStore): - EPA_HIGHLIGHT_INDEX = "epa_highlight_index" - - def __init__(self, db_conn, hs): - super(EventPushActionsStore, self).__init__(db_conn, hs) - - self.register_background_index_update( - self.EPA_HIGHLIGHT_INDEX, - index_name="event_push_actions_u_highlight", - table="event_push_actions", - columns=["user_id", "stream_ordering"], - ) - - self.register_background_index_update( - "event_push_actions_highlights_index", - index_name="event_push_actions_highlights_index", - table="event_push_actions", - columns=["user_id", "room_id", "topological_ordering", "stream_ordering"], - where_clause="highlight=1" - ) - - self._doing_notif_rotation = False - self._rotate_notif_loop = self._clock.looping_call( - self._rotate_notifs, 30 * 60 * 1000 - ) - - def _set_push_actions_for_event_and_users_txn(self, txn, event): - """ - Args: - event: the event set actions for - tuples: list of tuples of (user_id, actions) - """ - - sql = """ - INSERT INTO event_push_actions ( - room_id, event_id, user_id, actions, stream_ordering, - topological_ordering, notif, highlight - ) - SELECT ?, event_id, user_id, actions, ?, ?, notif, highlight - FROM event_push_actions_staging - WHERE event_id = ? - """ - - txn.execute(sql, ( - event.room_id, event.internal_metadata.stream_ordering, - event.depth, event.event_id, - )) - - user_ids = self._simple_select_onecol_txn( - txn, - table="event_push_actions_staging", - keyvalues={ - "event_id": event.event_id, - }, - retcol="user_id", - ) - - self._simple_delete_txn( - txn, - table="event_push_actions_staging", - keyvalues={ - "event_id": event.event_id, - }, - ) - - for uid in user_ids: - txn.call_after( - self.get_unread_event_push_actions_by_room_for_user.invalidate_many, - (event.room_id, uid,) - ) - +class EventPushActionsWorkerStore(SQLBaseStore): @cachedInlineCallbacks(num_args=3, tree=True, max_entries=5000) def get_unread_event_push_actions_by_room_for_user( self, room_id, user_id, last_read_event_id @@ -449,6 +379,78 @@ class EventPushActionsStore(SQLBaseStore): # Now return the first `limit` defer.returnValue(notifs[:limit]) + +class EventPushActionsStore(EventPushActionsWorkerStore): + EPA_HIGHLIGHT_INDEX = "epa_highlight_index" + + def __init__(self, db_conn, hs): + super(EventPushActionsStore, self).__init__(db_conn, hs) + + self.register_background_index_update( + self.EPA_HIGHLIGHT_INDEX, + index_name="event_push_actions_u_highlight", + table="event_push_actions", + columns=["user_id", "stream_ordering"], + ) + + self.register_background_index_update( + "event_push_actions_highlights_index", + index_name="event_push_actions_highlights_index", + table="event_push_actions", + columns=["user_id", "room_id", "topological_ordering", "stream_ordering"], + where_clause="highlight=1" + ) + + self._doing_notif_rotation = False + self._rotate_notif_loop = self._clock.looping_call( + self._rotate_notifs, 30 * 60 * 1000 + ) + + def _set_push_actions_for_event_and_users_txn(self, txn, event): + """ + Args: + event: the event set actions for + tuples: list of tuples of (user_id, actions) + """ + + sql = """ + INSERT INTO event_push_actions ( + room_id, event_id, user_id, actions, stream_ordering, + topological_ordering, notif, highlight + ) + SELECT ?, event_id, user_id, actions, ?, ?, notif, highlight + FROM event_push_actions_staging + WHERE event_id = ? + """ + + txn.execute(sql, ( + event.room_id, event.internal_metadata.stream_ordering, + event.depth, event.event_id, + )) + + user_ids = self._simple_select_onecol_txn( + txn, + table="event_push_actions_staging", + keyvalues={ + "event_id": event.event_id, + }, + retcol="user_id", + ) + + self._simple_delete_txn( + txn, + table="event_push_actions_staging", + keyvalues={ + "event_id": event.event_id, + }, + ) + + for uid in user_ids: + txn.call_after( + self.get_unread_event_push_actions_by_room_for_user.invalidate_many, + (event.room_id, uid,) + ) + @defer.inlineCallbacks def get_push_actions_for_user(self, user_id, before=None, limit=50, only_highlight=False): From c96d547f4dd3c10d3c4be66c40cb07232a16a987 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 21 Feb 2018 11:03:42 +0000 Subject: [PATCH 225/348] Actually use new param --- synapse/storage/event_push_actions.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index 6a122b05a8..214ace27c9 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -140,11 +140,13 @@ class EventPushActionsStore(SQLBaseStore): (event.room_id, uid,) ) + # Now we delete the staging area for *all* events that were being + # persisted. txn.executemany( "DELETE FROM event_push_actions_staging WHERE event_id = ?", ( (event.event_id,) - for event, _ in events_and_contexts + for event, _ in all_events_and_contexts ) ) From 573712da6b720cb808e61e4cbd5426e85e58a161 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 21 Feb 2018 11:29:49 +0000 Subject: [PATCH 226/348] Update comments --- synapse/push/bulk_push_rule_evaluator.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 64e9a1da57..7c680659b6 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -190,12 +190,13 @@ class BulkPushRuleEvaluator(object): if matches: actions = [x for x in rule['actions'] if x != 'dont_notify'] if actions and 'notify' in actions: + # Push rules say we should notify the user of this event actions_by_user[uid] = actions break - # Push rules say we should notify the user of this event, - # so we mark it in the DB in the staging area. (This - # will then get handled when we persist the event) + # Mark in the DB staging area the push actions for users who should be + # notified for this event. (This will then get handled when we persist + # the event) yield self.store.add_push_actions_to_staging( event.event_id, actions_by_user, ) From 27b094f382a33b7d69eb592951a463c3e53af5b4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 21 Feb 2018 11:41:48 +0000 Subject: [PATCH 227/348] Split out get_events and co into a worker store --- synapse/replication/slave/storage/events.py | 13 +- synapse/storage/events.py | 695 ++++++++++---------- 2 files changed, 352 insertions(+), 356 deletions(-) diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index f8c164b48b..74a81a0a51 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -18,6 +18,7 @@ from synapse.api.constants import EventTypes from synapse.storage import DataStore from synapse.storage.event_federation import EventFederationStore from synapse.storage.event_push_actions import EventPushActionsStore +from synapse.storage.events import EventsWorkerStore from synapse.storage.roommember import RoomMemberStore from synapse.storage.state import StateGroupWorkerStore from synapse.storage.stream import StreamStore @@ -38,7 +39,7 @@ logger = logging.getLogger(__name__) # the method descriptor on the DataStore and chuck them into our class. -class SlavedEventStore(StateGroupWorkerStore, BaseSlavedStore): +class SlavedEventStore(EventsWorkerStore, StateGroupWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): super(SlavedEventStore, self).__init__(db_conn, hs) @@ -104,8 +105,6 @@ class SlavedEventStore(StateGroupWorkerStore, BaseSlavedStore): get_push_action_users_in_range = ( DataStore.get_push_action_users_in_range.__func__ ) - get_event = DataStore.get_event.__func__ - get_events = DataStore.get_events.__func__ get_rooms_for_user_where_membership_is = ( DataStore.get_rooms_for_user_where_membership_is.__func__ ) @@ -135,14 +134,6 @@ class SlavedEventStore(StateGroupWorkerStore, BaseSlavedStore): _set_before_and_after = staticmethod(DataStore._set_before_and_after) - _get_events = DataStore._get_events.__func__ - _get_events_from_cache = DataStore._get_events_from_cache.__func__ - - _invalidate_get_event_cache = DataStore._invalidate_get_event_cache.__func__ - _enqueue_events = DataStore._enqueue_events.__func__ - _do_fetch = DataStore._do_fetch.__func__ - _fetch_event_rows = DataStore._fetch_event_rows.__func__ - _get_event_from_row = DataStore._get_event_from_row.__func__ _get_rooms_for_user_where_membership_is_txn = ( DataStore._get_rooms_for_user_where_membership_is_txn.__func__ ) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 73177e0bc2..681a33314d 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -199,7 +199,356 @@ def _retry_on_integrity_error(func): return f -class EventsStore(SQLBaseStore): +class EventsWorkerStore(SQLBaseStore): + def __init__(self, db_conn, hs): + super(EventsWorkerStore, self).__init__(db_conn, hs) + + self._event_persist_queue = _EventPeristenceQueue() + + @defer.inlineCallbacks + def get_event(self, event_id, check_redacted=True, + get_prev_content=False, allow_rejected=False, + allow_none=False): + """Get an event from the database by event_id. + + Args: + event_id (str): The event_id of the event to fetch + check_redacted (bool): If True, check if event has been redacted + and redact it. + get_prev_content (bool): If True and event is a state event, + include the previous states content in the unsigned field. + allow_rejected (bool): If True return rejected events. + allow_none (bool): If True, return None if no event found, if + False throw an exception. + + Returns: + Deferred : A FrozenEvent. + """ + events = yield self._get_events( + [event_id], + check_redacted=check_redacted, + get_prev_content=get_prev_content, + allow_rejected=allow_rejected, + ) + + if not events and not allow_none: + raise SynapseError(404, "Could not find event %s" % (event_id,)) + + defer.returnValue(events[0] if events else None) + + @defer.inlineCallbacks + def get_events(self, event_ids, check_redacted=True, + get_prev_content=False, allow_rejected=False): + """Get events from the database + + Args: + event_ids (list): The event_ids of the events to fetch + check_redacted (bool): If True, check if event has been redacted + and redact it. + get_prev_content (bool): If True and event is a state event, + include the previous states content in the unsigned field. + allow_rejected (bool): If True return rejected events. + + Returns: + Deferred : Dict from event_id to event. + """ + events = yield self._get_events( + event_ids, + check_redacted=check_redacted, + get_prev_content=get_prev_content, + allow_rejected=allow_rejected, + ) + + defer.returnValue({e.event_id: e for e in events}) + + @defer.inlineCallbacks + def _get_events(self, event_ids, check_redacted=True, + get_prev_content=False, allow_rejected=False): + if not event_ids: + defer.returnValue([]) + + event_id_list = event_ids + event_ids = set(event_ids) + + event_entry_map = self._get_events_from_cache( + event_ids, + allow_rejected=allow_rejected, + ) + + missing_events_ids = [e for e in event_ids if e not in event_entry_map] + + if missing_events_ids: + missing_events = yield self._enqueue_events( + missing_events_ids, + check_redacted=check_redacted, + allow_rejected=allow_rejected, + ) + + event_entry_map.update(missing_events) + + events = [] + for event_id in event_id_list: + entry = event_entry_map.get(event_id, None) + if not entry: + continue + + if allow_rejected or not entry.event.rejected_reason: + if check_redacted and entry.redacted_event: + event = entry.redacted_event + else: + event = entry.event + + events.append(event) + + if get_prev_content: + if "replaces_state" in event.unsigned: + prev = yield self.get_event( + event.unsigned["replaces_state"], + get_prev_content=False, + allow_none=True, + ) + if prev: + event.unsigned = dict(event.unsigned) + event.unsigned["prev_content"] = prev.content + event.unsigned["prev_sender"] = prev.sender + + defer.returnValue(events) + + def _invalidate_get_event_cache(self, event_id): + self._get_event_cache.invalidate((event_id,)) + + def _get_events_from_cache(self, events, allow_rejected, update_metrics=True): + """Fetch events from the caches + + Args: + events (list(str)): list of event_ids to fetch + allow_rejected (bool): Whether to teturn events that were rejected + update_metrics (bool): Whether to update the cache hit ratio metrics + + Returns: + dict of event_id -> _EventCacheEntry for each event_id in cache. If + allow_rejected is `False` then there will still be an entry but it + will be `None` + """ + event_map = {} + + for event_id in events: + ret = self._get_event_cache.get( + (event_id,), None, + update_metrics=update_metrics, + ) + if not ret: + continue + + if allow_rejected or not ret.event.rejected_reason: + event_map[event_id] = ret + else: + event_map[event_id] = None + + return event_map + + def _do_fetch(self, conn): + """Takes a database connection and waits for requests for events from + the _event_fetch_list queue. + """ + event_list = [] + i = 0 + while True: + try: + with self._event_fetch_lock: + event_list = self._event_fetch_list + self._event_fetch_list = [] + + if not event_list: + single_threaded = self.database_engine.single_threaded + if single_threaded or i > EVENT_QUEUE_ITERATIONS: + self._event_fetch_ongoing -= 1 + return + else: + self._event_fetch_lock.wait(EVENT_QUEUE_TIMEOUT_S) + i += 1 + continue + i = 0 + + event_id_lists = zip(*event_list)[0] + event_ids = [ + item for sublist in event_id_lists for item in sublist + ] + + rows = self._new_transaction( + conn, "do_fetch", [], [], None, self._fetch_event_rows, event_ids + ) + + row_dict = { + r["event_id"]: r + for r in rows + } + + # We only want to resolve deferreds from the main thread + def fire(lst, res): + for ids, d in lst: + if not d.called: + try: + with PreserveLoggingContext(): + d.callback([ + res[i] + for i in ids + if i in res + ]) + except Exception: + logger.exception("Failed to callback") + with PreserveLoggingContext(): + reactor.callFromThread(fire, event_list, row_dict) + except Exception as e: + logger.exception("do_fetch") + + # We only want to resolve deferreds from the main thread + def fire(evs): + for _, d in evs: + if not d.called: + with PreserveLoggingContext(): + d.errback(e) + + if event_list: + with PreserveLoggingContext(): + reactor.callFromThread(fire, event_list) + + @defer.inlineCallbacks + def _enqueue_events(self, events, check_redacted=True, allow_rejected=False): + """Fetches events from the database using the _event_fetch_list. This + allows batch and bulk fetching of events - it allows us to fetch events + without having to create a new transaction for each request for events. + """ + if not events: + defer.returnValue({}) + + events_d = defer.Deferred() + with self._event_fetch_lock: + self._event_fetch_list.append( + (events, events_d) + ) + + self._event_fetch_lock.notify() + + if self._event_fetch_ongoing < EVENT_QUEUE_THREADS: + self._event_fetch_ongoing += 1 + should_start = True + else: + should_start = False + + if should_start: + with PreserveLoggingContext(): + self.runWithConnection( + self._do_fetch + ) + + logger.debug("Loading %d events", len(events)) + with PreserveLoggingContext(): + rows = yield events_d + logger.debug("Loaded %d events (%d rows)", len(events), len(rows)) + + if not allow_rejected: + rows[:] = [r for r in rows if not r["rejects"]] + + res = yield make_deferred_yieldable(defer.gatherResults( + [ + preserve_fn(self._get_event_from_row)( + row["internal_metadata"], row["json"], row["redacts"], + rejected_reason=row["rejects"], + ) + for row in rows + ], + consumeErrors=True + )) + + defer.returnValue({ + e.event.event_id: e + for e in res if e + }) + + def _fetch_event_rows(self, txn, events): + rows = [] + N = 200 + for i in range(1 + len(events) / N): + evs = events[i * N:(i + 1) * N] + if not evs: + break + + sql = ( + "SELECT " + " e.event_id as event_id, " + " e.internal_metadata," + " e.json," + " r.redacts as redacts," + " rej.event_id as rejects " + " FROM event_json as e" + " LEFT JOIN rejections as rej USING (event_id)" + " LEFT JOIN redactions as r ON e.event_id = r.redacts" + " WHERE e.event_id IN (%s)" + ) % (",".join(["?"] * len(evs)),) + + txn.execute(sql, evs) + rows.extend(self.cursor_to_dict(txn)) + + return rows + + @defer.inlineCallbacks + def _get_event_from_row(self, internal_metadata, js, redacted, + rejected_reason=None): + with Measure(self._clock, "_get_event_from_row"): + d = json.loads(js) + internal_metadata = json.loads(internal_metadata) + + if rejected_reason: + rejected_reason = yield self._simple_select_one_onecol( + table="rejections", + keyvalues={"event_id": rejected_reason}, + retcol="reason", + desc="_get_event_from_row_rejected_reason", + ) + + original_ev = FrozenEvent( + d, + internal_metadata_dict=internal_metadata, + rejected_reason=rejected_reason, + ) + + redacted_event = None + if redacted: + redacted_event = prune_event(original_ev) + + redaction_id = yield self._simple_select_one_onecol( + table="redactions", + keyvalues={"redacts": redacted_event.event_id}, + retcol="event_id", + desc="_get_event_from_row_redactions", + ) + + redacted_event.unsigned["redacted_by"] = redaction_id + # Get the redaction event. + + because = yield self.get_event( + redaction_id, + check_redacted=False, + allow_none=True, + ) + + if because: + # It's fine to do add the event directly, since get_pdu_json + # will serialise this field correctly + redacted_event.unsigned["redacted_because"] = because + + cache_entry = _EventCacheEntry( + event=original_ev, + redacted_event=redacted_event, + ) + + self._get_event_cache.prefill((original_ev.event_id,), cache_entry) + + defer.returnValue(cache_entry) + + +class EventsStore(EventsWorkerStore): EVENT_ORIGIN_SERVER_TS_NAME = "event_origin_server_ts" EVENT_FIELDS_SENDER_URL_UPDATE_NAME = "event_fields_sender_url" @@ -234,8 +583,6 @@ class EventsStore(SQLBaseStore): psql_only=True, ) - self._event_persist_queue = _EventPeristenceQueue() - self._state_resolution_handler = hs.get_state_resolution_handler() def persist_events(self, events_and_contexts, backfilled=False): @@ -609,62 +956,6 @@ class EventsStore(SQLBaseStore): defer.returnValue((to_delete, to_insert)) - @defer.inlineCallbacks - def get_event(self, event_id, check_redacted=True, - get_prev_content=False, allow_rejected=False, - allow_none=False): - """Get an event from the database by event_id. - - Args: - event_id (str): The event_id of the event to fetch - check_redacted (bool): If True, check if event has been redacted - and redact it. - get_prev_content (bool): If True and event is a state event, - include the previous states content in the unsigned field. - allow_rejected (bool): If True return rejected events. - allow_none (bool): If True, return None if no event found, if - False throw an exception. - - Returns: - Deferred : A FrozenEvent. - """ - events = yield self._get_events( - [event_id], - check_redacted=check_redacted, - get_prev_content=get_prev_content, - allow_rejected=allow_rejected, - ) - - if not events and not allow_none: - raise SynapseError(404, "Could not find event %s" % (event_id,)) - - defer.returnValue(events[0] if events else None) - - @defer.inlineCallbacks - def get_events(self, event_ids, check_redacted=True, - get_prev_content=False, allow_rejected=False): - """Get events from the database - - Args: - event_ids (list): The event_ids of the events to fetch - check_redacted (bool): If True, check if event has been redacted - and redact it. - get_prev_content (bool): If True and event is a state event, - include the previous states content in the unsigned field. - allow_rejected (bool): If True return rejected events. - - Returns: - Deferred : Dict from event_id to event. - """ - events = yield self._get_events( - event_ids, - check_redacted=check_redacted, - get_prev_content=get_prev_content, - allow_rejected=allow_rejected, - ) - - defer.returnValue({e.event_id: e for e in events}) - @log_function def _persist_events_txn(self, txn, events_and_contexts, backfilled, delete_existing=False, state_delta_for_room={}, @@ -1375,292 +1666,6 @@ class EventsStore(SQLBaseStore): "have_events", f, ) - @defer.inlineCallbacks - def _get_events(self, event_ids, check_redacted=True, - get_prev_content=False, allow_rejected=False): - if not event_ids: - defer.returnValue([]) - - event_id_list = event_ids - event_ids = set(event_ids) - - event_entry_map = self._get_events_from_cache( - event_ids, - allow_rejected=allow_rejected, - ) - - missing_events_ids = [e for e in event_ids if e not in event_entry_map] - - if missing_events_ids: - missing_events = yield self._enqueue_events( - missing_events_ids, - check_redacted=check_redacted, - allow_rejected=allow_rejected, - ) - - event_entry_map.update(missing_events) - - events = [] - for event_id in event_id_list: - entry = event_entry_map.get(event_id, None) - if not entry: - continue - - if allow_rejected or not entry.event.rejected_reason: - if check_redacted and entry.redacted_event: - event = entry.redacted_event - else: - event = entry.event - - events.append(event) - - if get_prev_content: - if "replaces_state" in event.unsigned: - prev = yield self.get_event( - event.unsigned["replaces_state"], - get_prev_content=False, - allow_none=True, - ) - if prev: - event.unsigned = dict(event.unsigned) - event.unsigned["prev_content"] = prev.content - event.unsigned["prev_sender"] = prev.sender - - defer.returnValue(events) - - def _invalidate_get_event_cache(self, event_id): - self._get_event_cache.invalidate((event_id,)) - - def _get_events_from_cache(self, events, allow_rejected, update_metrics=True): - """Fetch events from the caches - - Args: - events (list(str)): list of event_ids to fetch - allow_rejected (bool): Whether to teturn events that were rejected - update_metrics (bool): Whether to update the cache hit ratio metrics - - Returns: - dict of event_id -> _EventCacheEntry for each event_id in cache. If - allow_rejected is `False` then there will still be an entry but it - will be `None` - """ - event_map = {} - - for event_id in events: - ret = self._get_event_cache.get( - (event_id,), None, - update_metrics=update_metrics, - ) - if not ret: - continue - - if allow_rejected or not ret.event.rejected_reason: - event_map[event_id] = ret - else: - event_map[event_id] = None - - return event_map - - def _do_fetch(self, conn): - """Takes a database connection and waits for requests for events from - the _event_fetch_list queue. - """ - event_list = [] - i = 0 - while True: - try: - with self._event_fetch_lock: - event_list = self._event_fetch_list - self._event_fetch_list = [] - - if not event_list: - single_threaded = self.database_engine.single_threaded - if single_threaded or i > EVENT_QUEUE_ITERATIONS: - self._event_fetch_ongoing -= 1 - return - else: - self._event_fetch_lock.wait(EVENT_QUEUE_TIMEOUT_S) - i += 1 - continue - i = 0 - - event_id_lists = zip(*event_list)[0] - event_ids = [ - item for sublist in event_id_lists for item in sublist - ] - - rows = self._new_transaction( - conn, "do_fetch", [], [], None, self._fetch_event_rows, event_ids - ) - - row_dict = { - r["event_id"]: r - for r in rows - } - - # We only want to resolve deferreds from the main thread - def fire(lst, res): - for ids, d in lst: - if not d.called: - try: - with PreserveLoggingContext(): - d.callback([ - res[i] - for i in ids - if i in res - ]) - except Exception: - logger.exception("Failed to callback") - with PreserveLoggingContext(): - reactor.callFromThread(fire, event_list, row_dict) - except Exception as e: - logger.exception("do_fetch") - - # We only want to resolve deferreds from the main thread - def fire(evs): - for _, d in evs: - if not d.called: - with PreserveLoggingContext(): - d.errback(e) - - if event_list: - with PreserveLoggingContext(): - reactor.callFromThread(fire, event_list) - - @defer.inlineCallbacks - def _enqueue_events(self, events, check_redacted=True, allow_rejected=False): - """Fetches events from the database using the _event_fetch_list. This - allows batch and bulk fetching of events - it allows us to fetch events - without having to create a new transaction for each request for events. - """ - if not events: - defer.returnValue({}) - - events_d = defer.Deferred() - with self._event_fetch_lock: - self._event_fetch_list.append( - (events, events_d) - ) - - self._event_fetch_lock.notify() - - if self._event_fetch_ongoing < EVENT_QUEUE_THREADS: - self._event_fetch_ongoing += 1 - should_start = True - else: - should_start = False - - if should_start: - with PreserveLoggingContext(): - self.runWithConnection( - self._do_fetch - ) - - logger.debug("Loading %d events", len(events)) - with PreserveLoggingContext(): - rows = yield events_d - logger.debug("Loaded %d events (%d rows)", len(events), len(rows)) - - if not allow_rejected: - rows[:] = [r for r in rows if not r["rejects"]] - - res = yield make_deferred_yieldable(defer.gatherResults( - [ - preserve_fn(self._get_event_from_row)( - row["internal_metadata"], row["json"], row["redacts"], - rejected_reason=row["rejects"], - ) - for row in rows - ], - consumeErrors=True - )) - - defer.returnValue({ - e.event.event_id: e - for e in res if e - }) - - def _fetch_event_rows(self, txn, events): - rows = [] - N = 200 - for i in range(1 + len(events) / N): - evs = events[i * N:(i + 1) * N] - if not evs: - break - - sql = ( - "SELECT " - " e.event_id as event_id, " - " e.internal_metadata," - " e.json," - " r.redacts as redacts," - " rej.event_id as rejects " - " FROM event_json as e" - " LEFT JOIN rejections as rej USING (event_id)" - " LEFT JOIN redactions as r ON e.event_id = r.redacts" - " WHERE e.event_id IN (%s)" - ) % (",".join(["?"] * len(evs)),) - - txn.execute(sql, evs) - rows.extend(self.cursor_to_dict(txn)) - - return rows - - @defer.inlineCallbacks - def _get_event_from_row(self, internal_metadata, js, redacted, - rejected_reason=None): - with Measure(self._clock, "_get_event_from_row"): - d = json.loads(js) - internal_metadata = json.loads(internal_metadata) - - if rejected_reason: - rejected_reason = yield self._simple_select_one_onecol( - table="rejections", - keyvalues={"event_id": rejected_reason}, - retcol="reason", - desc="_get_event_from_row_rejected_reason", - ) - - original_ev = FrozenEvent( - d, - internal_metadata_dict=internal_metadata, - rejected_reason=rejected_reason, - ) - - redacted_event = None - if redacted: - redacted_event = prune_event(original_ev) - - redaction_id = yield self._simple_select_one_onecol( - table="redactions", - keyvalues={"redacts": redacted_event.event_id}, - retcol="event_id", - desc="_get_event_from_row_redactions", - ) - - redacted_event.unsigned["redacted_by"] = redaction_id - # Get the redaction event. - - because = yield self.get_event( - redaction_id, - check_redacted=False, - allow_none=True, - ) - - if because: - # It's fine to do add the event directly, since get_pdu_json - # will serialise this field correctly - redacted_event.unsigned["redacted_because"] = because - - cache_entry = _EventCacheEntry( - event=original_ev, - redacted_event=redacted_event, - ) - - self._get_event_cache.prefill((original_ev.event_id,), cache_entry) - - defer.returnValue(cache_entry) - @defer.inlineCallbacks def count_daily_messages(self): """ From 46244b27591f8674364ddefa9ae62cecb161fea3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 21 Feb 2018 11:20:42 +0000 Subject: [PATCH 228/348] Split AS stores --- .../replication/slave/storage/appservice.py | 34 ++++--------------- synapse/storage/appservice.py | 26 +++++++++----- 2 files changed, 23 insertions(+), 37 deletions(-) diff --git a/synapse/replication/slave/storage/appservice.py b/synapse/replication/slave/storage/appservice.py index 0d3f31a50c..3b9ded0098 100644 --- a/synapse/replication/slave/storage/appservice.py +++ b/synapse/replication/slave/storage/appservice.py @@ -13,33 +13,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ._base import BaseSlavedStore -from synapse.storage import DataStore -from synapse.config.appservice import load_appservices -from synapse.storage.appservice import _make_exclusive_regex +from synapse.storage.appservice import ( + ApplicationServiceWorkerStore, ApplicationServiceTransactionWorkerStore, +) -class SlavedApplicationServiceStore(BaseSlavedStore): - def __init__(self, db_conn, hs): - super(SlavedApplicationServiceStore, self).__init__(db_conn, hs) - self.services_cache = load_appservices( - hs.config.server_name, - hs.config.app_service_config_files - ) - self.exclusive_user_regex = _make_exclusive_regex(self.services_cache) - - get_app_service_by_token = DataStore.get_app_service_by_token.__func__ - get_app_service_by_user_id = DataStore.get_app_service_by_user_id.__func__ - get_app_services = DataStore.get_app_services.__func__ - get_new_events_for_appservice = DataStore.get_new_events_for_appservice.__func__ - create_appservice_txn = DataStore.create_appservice_txn.__func__ - get_appservices_by_state = DataStore.get_appservices_by_state.__func__ - get_oldest_unsent_txn = DataStore.get_oldest_unsent_txn.__func__ - _get_last_txn = DataStore._get_last_txn.__func__ - complete_appservice_txn = DataStore.complete_appservice_txn.__func__ - get_appservice_state = DataStore.get_appservice_state.__func__ - set_appservice_last_pos = DataStore.set_appservice_last_pos.__func__ - set_appservice_state = DataStore.set_appservice_state.__func__ - get_if_app_services_interested_in_user = ( - DataStore.get_if_app_services_interested_in_user.__func__ - ) +class SlavedApplicationServiceStore(ApplicationServiceTransactionWorkerStore, + ApplicationServiceWorkerStore): + pass diff --git a/synapse/storage/appservice.py b/synapse/storage/appservice.py index 79673b4273..f66cc98091 100644 --- a/synapse/storage/appservice.py +++ b/synapse/storage/appservice.py @@ -20,6 +20,7 @@ from twisted.internet import defer from synapse.api.constants import Membership from synapse.appservice import AppServiceTransaction from synapse.config.appservice import load_appservices +from synapse.storage.events import EventsWorkerStore from synapse.storage.roommember import RoomsForUser from ._base import SQLBaseStore @@ -46,17 +47,16 @@ def _make_exclusive_regex(services_cache): return exclusive_user_regex -class ApplicationServiceStore(SQLBaseStore): - +class ApplicationServiceWorkerStore(SQLBaseStore): def __init__(self, db_conn, hs): - super(ApplicationServiceStore, self).__init__(db_conn, hs) - self.hostname = hs.hostname self.services_cache = load_appservices( hs.hostname, hs.config.app_service_config_files ) self.exclusive_user_regex = _make_exclusive_regex(self.services_cache) + super(ApplicationServiceWorkerStore, self).__init__(db_conn, hs) + def get_app_services(self): return self.services_cache @@ -112,6 +112,13 @@ class ApplicationServiceStore(SQLBaseStore): return service return None + +class ApplicationServiceStore(ApplicationServiceWorkerStore): + + def __init__(self, db_conn, hs): + super(ApplicationServiceStore, self).__init__(db_conn, hs) + self.hostname = hs.hostname + def get_app_service_rooms(self, service): """Get a list of RoomsForUser for this application service. @@ -184,11 +191,8 @@ class ApplicationServiceStore(SQLBaseStore): return rooms_for_user_matching_user_id -class ApplicationServiceTransactionStore(SQLBaseStore): - - def __init__(self, db_conn, hs): - super(ApplicationServiceTransactionStore, self).__init__(db_conn, hs) - +class ApplicationServiceTransactionWorkerStore(ApplicationServiceWorkerStore, + EventsWorkerStore): @defer.inlineCallbacks def get_appservices_by_state(self, state): """Get a list of application services based on their state. @@ -433,3 +437,7 @@ class ApplicationServiceTransactionStore(SQLBaseStore): events = yield self._get_events(event_ids) defer.returnValue((upper_bound, events)) + + +class ApplicationServiceTransactionStore(ApplicationServiceTransactionWorkerStore): + pass From 3dec9c66b3c2af1bf5b7283d7db443b65ebbd8a4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 21 Feb 2018 12:01:08 +0000 Subject: [PATCH 229/348] Split out RoomMemberStore --- synapse/replication/slave/storage/events.py | 30 +- synapse/storage/roommember.py | 360 ++++++++++---------- 2 files changed, 184 insertions(+), 206 deletions(-) diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index 74a81a0a51..0dc87aee7f 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -19,7 +19,7 @@ from synapse.storage import DataStore from synapse.storage.event_federation import EventFederationStore from synapse.storage.event_push_actions import EventPushActionsStore from synapse.storage.events import EventsWorkerStore -from synapse.storage.roommember import RoomMemberStore +from synapse.storage.roommember import RoomMemberWorkerStore from synapse.storage.state import StateGroupWorkerStore from synapse.storage.stream import StreamStore from synapse.storage.signatures import SignatureStore @@ -39,7 +39,8 @@ logger = logging.getLogger(__name__) # the method descriptor on the DataStore and chuck them into our class. -class SlavedEventStore(EventsWorkerStore, StateGroupWorkerStore, BaseSlavedStore): +class SlavedEventStore(RoomMemberWorkerStore, EventsWorkerStore, + StateGroupWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): super(SlavedEventStore, self).__init__(db_conn, hs) @@ -69,18 +70,9 @@ class SlavedEventStore(EventsWorkerStore, StateGroupWorkerStore, BaseSlavedStore # Cached functions can't be accessed through a class instance so we need # to reach inside the __dict__ to extract them. - get_rooms_for_user = RoomMemberStore.__dict__["get_rooms_for_user"] - get_users_in_room = RoomMemberStore.__dict__["get_users_in_room"] - get_hosts_in_room = RoomMemberStore.__dict__["get_hosts_in_room"] - get_users_who_share_room_with_user = ( - RoomMemberStore.__dict__["get_users_who_share_room_with_user"] - ) get_latest_event_ids_in_room = EventFederationStore.__dict__[ "get_latest_event_ids_in_room" ] - get_invited_rooms_for_user = RoomMemberStore.__dict__[ - "get_invited_rooms_for_user" - ] get_unread_event_push_actions_by_room_for_user = ( EventPushActionsStore.__dict__["get_unread_event_push_actions_by_room_for_user"] ) @@ -93,7 +85,6 @@ class SlavedEventStore(EventsWorkerStore, StateGroupWorkerStore, BaseSlavedStore get_recent_event_ids_for_room = ( StreamStore.__dict__["get_recent_event_ids_for_room"] ) - _get_joined_hosts_cache = RoomMemberStore.__dict__["_get_joined_hosts_cache"] has_room_changed_since = DataStore.has_room_changed_since.__func__ get_unread_push_actions_for_user_in_range_for_http = ( @@ -105,9 +96,6 @@ class SlavedEventStore(EventsWorkerStore, StateGroupWorkerStore, BaseSlavedStore get_push_action_users_in_range = ( DataStore.get_push_action_users_in_range.__func__ ) - get_rooms_for_user_where_membership_is = ( - DataStore.get_rooms_for_user_where_membership_is.__func__ - ) get_membership_changes_for_user = ( DataStore.get_membership_changes_for_user.__func__ ) @@ -116,27 +104,15 @@ class SlavedEventStore(EventsWorkerStore, StateGroupWorkerStore, BaseSlavedStore DataStore.get_room_events_stream_for_room.__func__ ) get_events_around = DataStore.get_events_around.__func__ - get_joined_users_from_state = DataStore.get_joined_users_from_state.__func__ - get_joined_users_from_context = DataStore.get_joined_users_from_context.__func__ - _get_joined_users_from_context = ( - RoomMemberStore.__dict__["_get_joined_users_from_context"] - ) - - get_joined_hosts = DataStore.get_joined_hosts.__func__ - _get_joined_hosts = RoomMemberStore.__dict__["_get_joined_hosts"] get_recent_events_for_room = DataStore.get_recent_events_for_room.__func__ get_room_events_stream_for_rooms = ( DataStore.get_room_events_stream_for_rooms.__func__ ) - is_host_joined = RoomMemberStore.__dict__["is_host_joined"] get_stream_token_for_event = DataStore.get_stream_token_for_event.__func__ _set_before_and_after = staticmethod(DataStore._set_before_and_after) - _get_rooms_for_user_where_membership_is_txn = ( - DataStore._get_rooms_for_user_where_membership_is_txn.__func__ - ) _get_events_around_txn = DataStore._get_events_around_txn.__func__ get_backfill_events = DataStore.get_backfill_events.__func__ diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 3e77fd3901..6574fe74b4 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -17,7 +17,7 @@ from twisted.internet import defer from collections import namedtuple -from ._base import SQLBaseStore +from synapse.storage.events import EventsWorkerStore from synapse.util.async import Linearizer from synapse.util.caches import intern_string from synapse.util.caches.descriptors import cached, cachedInlineCallbacks @@ -48,97 +48,7 @@ ProfileInfo = namedtuple( _MEMBERSHIP_PROFILE_UPDATE_NAME = "room_membership_profile_update" -class RoomMemberStore(SQLBaseStore): - def __init__(self, db_conn, hs): - super(RoomMemberStore, self).__init__(db_conn, hs) - self.register_background_update_handler( - _MEMBERSHIP_PROFILE_UPDATE_NAME, self._background_add_membership_profile - ) - - def _store_room_members_txn(self, txn, events, backfilled): - """Store a room member in the database. - """ - self._simple_insert_many_txn( - txn, - table="room_memberships", - values=[ - { - "event_id": event.event_id, - "user_id": event.state_key, - "sender": event.user_id, - "room_id": event.room_id, - "membership": event.membership, - "display_name": event.content.get("displayname", None), - "avatar_url": event.content.get("avatar_url", None), - } - for event in events - ] - ) - - for event in events: - txn.call_after( - self._membership_stream_cache.entity_has_changed, - event.state_key, event.internal_metadata.stream_ordering - ) - txn.call_after( - self.get_invited_rooms_for_user.invalidate, (event.state_key,) - ) - - # We update the local_invites table only if the event is "current", - # i.e., its something that has just happened. - # The only current event that can also be an outlier is if its an - # invite that has come in across federation. - is_new_state = not backfilled and ( - not event.internal_metadata.is_outlier() - or event.internal_metadata.is_invite_from_remote() - ) - is_mine = self.hs.is_mine_id(event.state_key) - if is_new_state and is_mine: - if event.membership == Membership.INVITE: - self._simple_insert_txn( - txn, - table="local_invites", - values={ - "event_id": event.event_id, - "invitee": event.state_key, - "inviter": event.sender, - "room_id": event.room_id, - "stream_id": event.internal_metadata.stream_ordering, - } - ) - else: - sql = ( - "UPDATE local_invites SET stream_id = ?, replaced_by = ? WHERE" - " room_id = ? AND invitee = ? AND locally_rejected is NULL" - " AND replaced_by is NULL" - ) - - txn.execute(sql, ( - event.internal_metadata.stream_ordering, - event.event_id, - event.room_id, - event.state_key, - )) - - @defer.inlineCallbacks - def locally_reject_invite(self, user_id, room_id): - sql = ( - "UPDATE local_invites SET stream_id = ?, locally_rejected = ? WHERE" - " room_id = ? AND invitee = ? AND locally_rejected is NULL" - " AND replaced_by is NULL" - ) - - def f(txn, stream_ordering): - txn.execute(sql, ( - stream_ordering, - True, - room_id, - user_id, - )) - - with self._stream_id_gen.get_next() as stream_ordering: - yield self.runInteraction("locally_reject_invite", f, stream_ordering) - +class RoomMemberWorkerStore(EventsWorkerStore): @cachedInlineCallbacks(max_entries=100000, iterable=True, cache_context=True) def get_hosts_in_room(self, room_id, cache_context): """Returns the set of all hosts currently in the room @@ -295,89 +205,6 @@ class RoomMemberStore(SQLBaseStore): defer.returnValue(user_who_share_room) - def forget(self, user_id, room_id): - """Indicate that user_id wishes to discard history for room_id.""" - def f(txn): - sql = ( - "UPDATE" - " room_memberships" - " SET" - " forgotten = 1" - " WHERE" - " user_id = ?" - " AND" - " room_id = ?" - ) - txn.execute(sql, (user_id, room_id)) - - txn.call_after(self.was_forgotten_at.invalidate_all) - txn.call_after(self.did_forget.invalidate, (user_id, room_id)) - self._invalidate_cache_and_stream( - txn, self.who_forgot_in_room, (room_id,) - ) - return self.runInteraction("forget_membership", f) - - @cachedInlineCallbacks(num_args=2) - def did_forget(self, user_id, room_id): - """Returns whether user_id has elected to discard history for room_id. - - Returns False if they have since re-joined.""" - def f(txn): - sql = ( - "SELECT" - " COUNT(*)" - " FROM" - " room_memberships" - " WHERE" - " user_id = ?" - " AND" - " room_id = ?" - " AND" - " forgotten = 0" - ) - txn.execute(sql, (user_id, room_id)) - rows = txn.fetchall() - return rows[0][0] - count = yield self.runInteraction("did_forget_membership", f) - defer.returnValue(count == 0) - - @cachedInlineCallbacks(num_args=3) - def was_forgotten_at(self, user_id, room_id, event_id): - """Returns whether user_id has elected to discard history for room_id at - event_id. - - event_id must be a membership event.""" - def f(txn): - sql = ( - "SELECT" - " forgotten" - " FROM" - " room_memberships" - " WHERE" - " user_id = ?" - " AND" - " room_id = ?" - " AND" - " event_id = ?" - ) - txn.execute(sql, (user_id, room_id, event_id)) - rows = txn.fetchall() - return rows[0][0] - forgot = yield self.runInteraction("did_forget_membership_at", f) - defer.returnValue(forgot == 1) - - @cached() - def who_forgot_in_room(self, room_id): - return self._simple_select_list( - table="room_memberships", - retcols=("user_id", "event_id"), - keyvalues={ - "room_id": room_id, - "forgotten": 1, - }, - desc="who_forgot" - ) - def get_joined_users_from_context(self, event, context): state_group = context.state_group if not state_group: @@ -600,6 +427,185 @@ class RoomMemberStore(SQLBaseStore): defer.returnValue(joined_hosts) + @cached(max_entries=10000, iterable=True) + def _get_joined_hosts_cache(self, room_id): + return _JoinedHostsCache(self, room_id) + + +class RoomMemberStore(RoomMemberWorkerStore): + def __init__(self, db_conn, hs): + super(RoomMemberStore, self).__init__(db_conn, hs) + self.register_background_update_handler( + _MEMBERSHIP_PROFILE_UPDATE_NAME, self._background_add_membership_profile + ) + + def _store_room_members_txn(self, txn, events, backfilled): + """Store a room member in the database. + """ + self._simple_insert_many_txn( + txn, + table="room_memberships", + values=[ + { + "event_id": event.event_id, + "user_id": event.state_key, + "sender": event.user_id, + "room_id": event.room_id, + "membership": event.membership, + "display_name": event.content.get("displayname", None), + "avatar_url": event.content.get("avatar_url", None), + } + for event in events + ] + ) + + for event in events: + txn.call_after( + self._membership_stream_cache.entity_has_changed, + event.state_key, event.internal_metadata.stream_ordering + ) + txn.call_after( + self.get_invited_rooms_for_user.invalidate, (event.state_key,) + ) + + # We update the local_invites table only if the event is "current", + # i.e., its something that has just happened. + # The only current event that can also be an outlier is if its an + # invite that has come in across federation. + is_new_state = not backfilled and ( + not event.internal_metadata.is_outlier() + or event.internal_metadata.is_invite_from_remote() + ) + is_mine = self.hs.is_mine_id(event.state_key) + if is_new_state and is_mine: + if event.membership == Membership.INVITE: + self._simple_insert_txn( + txn, + table="local_invites", + values={ + "event_id": event.event_id, + "invitee": event.state_key, + "inviter": event.sender, + "room_id": event.room_id, + "stream_id": event.internal_metadata.stream_ordering, + } + ) + else: + sql = ( + "UPDATE local_invites SET stream_id = ?, replaced_by = ? WHERE" + " room_id = ? AND invitee = ? AND locally_rejected is NULL" + " AND replaced_by is NULL" + ) + + txn.execute(sql, ( + event.internal_metadata.stream_ordering, + event.event_id, + event.room_id, + event.state_key, + )) + + @defer.inlineCallbacks + def locally_reject_invite(self, user_id, room_id): + sql = ( + "UPDATE local_invites SET stream_id = ?, locally_rejected = ? WHERE" + " room_id = ? AND invitee = ? AND locally_rejected is NULL" + " AND replaced_by is NULL" + ) + + def f(txn, stream_ordering): + txn.execute(sql, ( + stream_ordering, + True, + room_id, + user_id, + )) + + with self._stream_id_gen.get_next() as stream_ordering: + yield self.runInteraction("locally_reject_invite", f, stream_ordering) + + def forget(self, user_id, room_id): + """Indicate that user_id wishes to discard history for room_id.""" + def f(txn): + sql = ( + "UPDATE" + " room_memberships" + " SET" + " forgotten = 1" + " WHERE" + " user_id = ?" + " AND" + " room_id = ?" + ) + txn.execute(sql, (user_id, room_id)) + + txn.call_after(self.was_forgotten_at.invalidate_all) + txn.call_after(self.did_forget.invalidate, (user_id, room_id)) + self._invalidate_cache_and_stream( + txn, self.who_forgot_in_room, (room_id,) + ) + return self.runInteraction("forget_membership", f) + + @cachedInlineCallbacks(num_args=2) + def did_forget(self, user_id, room_id): + """Returns whether user_id has elected to discard history for room_id. + + Returns False if they have since re-joined.""" + def f(txn): + sql = ( + "SELECT" + " COUNT(*)" + " FROM" + " room_memberships" + " WHERE" + " user_id = ?" + " AND" + " room_id = ?" + " AND" + " forgotten = 0" + ) + txn.execute(sql, (user_id, room_id)) + rows = txn.fetchall() + return rows[0][0] + count = yield self.runInteraction("did_forget_membership", f) + defer.returnValue(count == 0) + + @cachedInlineCallbacks(num_args=3) + def was_forgotten_at(self, user_id, room_id, event_id): + """Returns whether user_id has elected to discard history for room_id at + event_id. + + event_id must be a membership event.""" + def f(txn): + sql = ( + "SELECT" + " forgotten" + " FROM" + " room_memberships" + " WHERE" + " user_id = ?" + " AND" + " room_id = ?" + " AND" + " event_id = ?" + ) + txn.execute(sql, (user_id, room_id, event_id)) + rows = txn.fetchall() + return rows[0][0] + forgot = yield self.runInteraction("did_forget_membership_at", f) + defer.returnValue(forgot == 1) + + @cached() + def who_forgot_in_room(self, room_id): + return self._simple_select_list( + table="room_memberships", + retcols=("user_id", "event_id"), + keyvalues={ + "room_id": room_id, + "forgotten": 1, + }, + desc="who_forgot" + ) + @defer.inlineCallbacks def _background_add_membership_profile(self, progress, batch_size): target_min_stream_id = progress.get( @@ -675,10 +681,6 @@ class RoomMemberStore(SQLBaseStore): defer.returnValue(result) - @cached(max_entries=10000, iterable=True) - def _get_joined_hosts_cache(self, room_id): - return _JoinedHostsCache(self, room_id) - class _JoinedHostsCache(object): """Cache for joined hosts in a room that is optimised to handle updates From faeb369f158a3ca6ba8f48ca1d551b2b53f4c53a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 21 Feb 2018 15:19:54 +0000 Subject: [PATCH 230/348] Fix missing invalidations for receipt storage --- synapse/replication/slave/storage/receipts.py | 2 ++ synapse/storage/receipts.py | 28 +++++++++---------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/synapse/replication/slave/storage/receipts.py b/synapse/replication/slave/storage/receipts.py index f0e29e9836..1647072f65 100644 --- a/synapse/replication/slave/storage/receipts.py +++ b/synapse/replication/slave/storage/receipts.py @@ -53,6 +53,8 @@ class SlavedReceiptsStore(ReceiptsWorkerStore, BaseSlavedStore): self.get_last_receipt_event_id_for_user.invalidate( (user_id, room_id, receipt_type) ) + self._invalidate_get_users_with_receipts_in_room(room_id, receipt_type, user_id) + self.get_receipts_for_room.invalidate((room_id, receipt_type)) def process_replication_rows(self, stream_name, token, rows): if stream_name == "receipts": diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index 40530632c6..eac8694e0f 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -292,20 +292,6 @@ class ReceiptsWorkerStore(SQLBaseStore): "get_all_updated_receipts", get_all_updated_receipts_txn ) - -class ReceiptsStore(ReceiptsWorkerStore): - def __init__(self, db_conn, hs): - # We instantiate this first as the ReceiptsWorkerStore constructor - # needs to be able to call get_max_receipt_stream_id - self._receipts_id_gen = StreamIdGenerator( - db_conn, "receipts_linearized", "stream_id" - ) - - super(ReceiptsStore, self).__init__(db_conn, hs) - - def get_max_receipt_stream_id(self): - return self._receipts_id_gen.get_current_token() - def _invalidate_get_users_with_receipts_in_room(self, room_id, receipt_type, user_id): if receipt_type != "m.read": @@ -326,6 +312,20 @@ class ReceiptsStore(ReceiptsWorkerStore): self.get_users_with_read_receipts_in_room.invalidate((room_id,)) + +class ReceiptsStore(ReceiptsWorkerStore): + def __init__(self, db_conn, hs): + # We instantiate this first as the ReceiptsWorkerStore constructor + # needs to be able to call get_max_receipt_stream_id + self._receipts_id_gen = StreamIdGenerator( + db_conn, "receipts_linearized", "stream_id" + ) + + super(ReceiptsStore, self).__init__(db_conn, hs) + + def get_max_receipt_stream_id(self): + return self._receipts_id_gen.get_current_token() + def insert_linearized_receipt_txn(self, txn, room_id, receipt_type, user_id, event_id, data, stream_id): txn.call_after( From bb73f55fc6559658080d6cdd5672506fda7843ab Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Feb 2018 10:31:16 +0000 Subject: [PATCH 231/348] Use absolute imports --- synapse/replication/slave/storage/account_data.py | 4 ++-- synapse/storage/account_data.py | 4 ++-- synapse/storage/tags.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/synapse/replication/slave/storage/account_data.py b/synapse/replication/slave/storage/account_data.py index 6c95261aa5..f76dc5a56b 100644 --- a/synapse/replication/slave/storage/account_data.py +++ b/synapse/replication/slave/storage/account_data.py @@ -13,8 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ._base import BaseSlavedStore -from ._slaved_id_tracker import SlavedIdTracker +from synapse.replication.slave.storage._base import BaseSlavedStore +from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker from synapse.storage.account_data import AccountDataWorkerStore from synapse.storage.tags import TagsWorkerStore diff --git a/synapse/storage/account_data.py b/synapse/storage/account_data.py index 66fed4bdcf..0d6acbd9a1 100644 --- a/synapse/storage/account_data.py +++ b/synapse/storage/account_data.py @@ -15,8 +15,8 @@ from twisted.internet import defer -from ._base import SQLBaseStore -from .util.id_generators import StreamIdGenerator +from synapse.storage._base import SQLBaseStore +from synapse.storage.util.id_generators import StreamIdGenerator from synapse.util.caches.stream_change_cache import StreamChangeCache from synapse.util.caches.descriptors import cached, cachedList, cachedInlineCallbacks diff --git a/synapse/storage/tags.py b/synapse/storage/tags.py index 484d66991a..a8d0bf80c4 100644 --- a/synapse/storage/tags.py +++ b/synapse/storage/tags.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .account_data import AccountDataWorkerStore +from synapse.storage.account_data import AccountDataWorkerStore from synapse.util.caches.descriptors import cached from twisted.internet import defer From 26d37f7a63cf33ac8e3f8346a6e982c7b2f6cbb1 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Feb 2018 10:33:55 +0000 Subject: [PATCH 232/348] Update copyright --- synapse/replication/slave/storage/account_data.py | 1 + synapse/storage/account_data.py | 1 + synapse/storage/tags.py | 1 + 3 files changed, 3 insertions(+) diff --git a/synapse/replication/slave/storage/account_data.py b/synapse/replication/slave/storage/account_data.py index f76dc5a56b..6c8d2954d7 100644 --- a/synapse/replication/slave/storage/account_data.py +++ b/synapse/replication/slave/storage/account_data.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/storage/account_data.py b/synapse/storage/account_data.py index 0d6acbd9a1..466194e96f 100644 --- a/synapse/storage/account_data.py +++ b/synapse/storage/account_data.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/storage/tags.py b/synapse/storage/tags.py index a8d0bf80c4..fc46bf7bb3 100644 --- a/synapse/storage/tags.py +++ b/synapse/storage/tags.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 7e6cf89dc2a7fc3c159459b34562719ab61713ba Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Feb 2018 10:39:19 +0000 Subject: [PATCH 233/348] Update copyright --- synapse/replication/slave/storage/push_rule.py | 1 + synapse/storage/__init__.py | 1 + synapse/storage/push_rule.py | 1 + 3 files changed, 3 insertions(+) diff --git a/synapse/replication/slave/storage/push_rule.py b/synapse/replication/slave/storage/push_rule.py index 0e3d9a87dc..bb2c40b6e3 100644 --- a/synapse/replication/slave/storage/push_rule.py +++ b/synapse/replication/slave/storage/push_rule.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2015, 2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index cd2759858c..26faf985be 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/storage/push_rule.py b/synapse/storage/push_rule.py index b35bd7a644..583efb7bdf 100644 --- a/synapse/storage/push_rule.py +++ b/synapse/storage/push_rule.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From c2ecfcc3a4374d47db38465510e4c480da353ebb Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Feb 2018 10:41:34 +0000 Subject: [PATCH 234/348] Update copyright --- synapse/replication/slave/storage/pushers.py | 1 + synapse/storage/pusher.py | 1 + 2 files changed, 2 insertions(+) diff --git a/synapse/replication/slave/storage/pushers.py b/synapse/replication/slave/storage/pushers.py index e352c3235d..a7cd5a7291 100644 --- a/synapse/replication/slave/storage/pushers.py +++ b/synapse/replication/slave/storage/pushers.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/storage/pusher.py b/synapse/storage/pusher.py index b0159c70c0..f4af3e4caa 100644 --- a/synapse/storage/pusher.py +++ b/synapse/storage/pusher.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From eb9b5eec819a2d594bc237652d831e64517d0c93 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Feb 2018 10:42:39 +0000 Subject: [PATCH 235/348] Update copyright --- synapse/storage/event_push_actions.py | 1 + synapse/storage/events.py | 1 + 2 files changed, 2 insertions(+) diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index 124583835d..4cabf70ad0 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2015 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 73177e0bc2..c636da4b72 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 3bd760628bc0b178f6709b9ea3439a44ebcebab2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Feb 2018 10:49:18 +0000 Subject: [PATCH 236/348] _event_persist_queue shouldn't be in worker store --- synapse/storage/events.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 681a33314d..32da81c47b 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -200,10 +200,6 @@ def _retry_on_integrity_error(func): class EventsWorkerStore(SQLBaseStore): - def __init__(self, db_conn, hs): - super(EventsWorkerStore, self).__init__(db_conn, hs) - - self._event_persist_queue = _EventPeristenceQueue() @defer.inlineCallbacks def get_event(self, event_id, check_redacted=True, @@ -583,6 +579,10 @@ class EventsStore(EventsWorkerStore): psql_only=True, ) + + + self._event_persist_queue = _EventPeristenceQueue() + self._state_resolution_handler = hs.get_state_resolution_handler() def persist_events(self, events_and_contexts, backfilled=False): From 5d0f6658489287dc19ce9e089fc61b06208f3fc4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Feb 2018 10:49:58 +0000 Subject: [PATCH 237/348] Remove redundant clock --- synapse/storage/events.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 32da81c47b..f6aa3612e1 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -550,7 +550,6 @@ class EventsStore(EventsWorkerStore): def __init__(self, db_conn, hs): super(EventsStore, self).__init__(db_conn, hs) - self._clock = hs.get_clock() self.register_background_update_handler( self.EVENT_ORIGIN_SERVER_TS_NAME, self._background_reindex_origin_server_ts ) @@ -579,8 +578,6 @@ class EventsStore(EventsWorkerStore): psql_only=True, ) - - self._event_persist_queue = _EventPeristenceQueue() self._state_resolution_handler = hs.get_state_resolution_handler() From bf8a36e0805a1626d335f78ddb90ba25220bbfa1 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Feb 2018 10:52:10 +0000 Subject: [PATCH 238/348] Update copyright --- synapse/replication/slave/storage/events.py | 1 + synapse/storage/events.py | 1 + 2 files changed, 2 insertions(+) diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index 74a81a0a51..f35cba2899 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/storage/events.py b/synapse/storage/events.py index f6aa3612e1..84a6f6782a 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From eba93b05bfaa1e6cb5bd66621021f6fff750ab97 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Feb 2018 11:01:21 +0000 Subject: [PATCH 239/348] Split EventsWorkerStore into separate file --- synapse/replication/slave/storage/events.py | 2 +- synapse/storage/events.py | 365 +----------------- synapse/storage/events_worker.py | 395 ++++++++++++++++++++ 3 files changed, 401 insertions(+), 361 deletions(-) create mode 100644 synapse/storage/events_worker.py diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index f35cba2899..5edfacc9ec 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -19,7 +19,7 @@ from synapse.api.constants import EventTypes from synapse.storage import DataStore from synapse.storage.event_federation import EventFederationStore from synapse.storage.event_push_actions import EventPushActionsStore -from synapse.storage.events import EventsWorkerStore +from synapse.storage.events_worker import EventsWorkerStore from synapse.storage.roommember import RoomMemberStore from synapse.storage.state import StateGroupWorkerStore from synapse.storage.stream import StreamStore diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 84a6f6782a..99d6cca585 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -13,16 +13,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from ._base import SQLBaseStore -from twisted.internet import defer, reactor +from synapse.storage.events_worker import EventsWorkerStore -from synapse.events import FrozenEvent, USE_FROZEN_DICTS -from synapse.events.utils import prune_event +from twisted.internet import defer + +from synapse.events import USE_FROZEN_DICTS from synapse.util.async import ObservableDeferred from synapse.util.logcontext import ( - preserve_fn, PreserveLoggingContext, make_deferred_yieldable + PreserveLoggingContext, make_deferred_yieldable ) from synapse.util.logutils import log_function from synapse.util.metrics import Measure @@ -62,16 +62,6 @@ def encode_json(json_object): return json.dumps(json_object, ensure_ascii=False) -# These values are used in the `enqueus_event` and `_do_fetch` methods to -# control how we batch/bulk fetch events from the database. -# The values are plucked out of thing air to make initial sync run faster -# on jki.re -# TODO: Make these configurable. -EVENT_QUEUE_THREADS = 3 # Max number of threads that will fetch events -EVENT_QUEUE_ITERATIONS = 3 # No. times we block waiting for requests for events -EVENT_QUEUE_TIMEOUT_S = 0.1 # Timeout when waiting for requests for events - - class _EventPeristenceQueue(object): """Queues up events so that they can be persisted in bulk with only one concurrent transaction per room. @@ -200,351 +190,6 @@ def _retry_on_integrity_error(func): return f -class EventsWorkerStore(SQLBaseStore): - - @defer.inlineCallbacks - def get_event(self, event_id, check_redacted=True, - get_prev_content=False, allow_rejected=False, - allow_none=False): - """Get an event from the database by event_id. - - Args: - event_id (str): The event_id of the event to fetch - check_redacted (bool): If True, check if event has been redacted - and redact it. - get_prev_content (bool): If True and event is a state event, - include the previous states content in the unsigned field. - allow_rejected (bool): If True return rejected events. - allow_none (bool): If True, return None if no event found, if - False throw an exception. - - Returns: - Deferred : A FrozenEvent. - """ - events = yield self._get_events( - [event_id], - check_redacted=check_redacted, - get_prev_content=get_prev_content, - allow_rejected=allow_rejected, - ) - - if not events and not allow_none: - raise SynapseError(404, "Could not find event %s" % (event_id,)) - - defer.returnValue(events[0] if events else None) - - @defer.inlineCallbacks - def get_events(self, event_ids, check_redacted=True, - get_prev_content=False, allow_rejected=False): - """Get events from the database - - Args: - event_ids (list): The event_ids of the events to fetch - check_redacted (bool): If True, check if event has been redacted - and redact it. - get_prev_content (bool): If True and event is a state event, - include the previous states content in the unsigned field. - allow_rejected (bool): If True return rejected events. - - Returns: - Deferred : Dict from event_id to event. - """ - events = yield self._get_events( - event_ids, - check_redacted=check_redacted, - get_prev_content=get_prev_content, - allow_rejected=allow_rejected, - ) - - defer.returnValue({e.event_id: e for e in events}) - - @defer.inlineCallbacks - def _get_events(self, event_ids, check_redacted=True, - get_prev_content=False, allow_rejected=False): - if not event_ids: - defer.returnValue([]) - - event_id_list = event_ids - event_ids = set(event_ids) - - event_entry_map = self._get_events_from_cache( - event_ids, - allow_rejected=allow_rejected, - ) - - missing_events_ids = [e for e in event_ids if e not in event_entry_map] - - if missing_events_ids: - missing_events = yield self._enqueue_events( - missing_events_ids, - check_redacted=check_redacted, - allow_rejected=allow_rejected, - ) - - event_entry_map.update(missing_events) - - events = [] - for event_id in event_id_list: - entry = event_entry_map.get(event_id, None) - if not entry: - continue - - if allow_rejected or not entry.event.rejected_reason: - if check_redacted and entry.redacted_event: - event = entry.redacted_event - else: - event = entry.event - - events.append(event) - - if get_prev_content: - if "replaces_state" in event.unsigned: - prev = yield self.get_event( - event.unsigned["replaces_state"], - get_prev_content=False, - allow_none=True, - ) - if prev: - event.unsigned = dict(event.unsigned) - event.unsigned["prev_content"] = prev.content - event.unsigned["prev_sender"] = prev.sender - - defer.returnValue(events) - - def _invalidate_get_event_cache(self, event_id): - self._get_event_cache.invalidate((event_id,)) - - def _get_events_from_cache(self, events, allow_rejected, update_metrics=True): - """Fetch events from the caches - - Args: - events (list(str)): list of event_ids to fetch - allow_rejected (bool): Whether to teturn events that were rejected - update_metrics (bool): Whether to update the cache hit ratio metrics - - Returns: - dict of event_id -> _EventCacheEntry for each event_id in cache. If - allow_rejected is `False` then there will still be an entry but it - will be `None` - """ - event_map = {} - - for event_id in events: - ret = self._get_event_cache.get( - (event_id,), None, - update_metrics=update_metrics, - ) - if not ret: - continue - - if allow_rejected or not ret.event.rejected_reason: - event_map[event_id] = ret - else: - event_map[event_id] = None - - return event_map - - def _do_fetch(self, conn): - """Takes a database connection and waits for requests for events from - the _event_fetch_list queue. - """ - event_list = [] - i = 0 - while True: - try: - with self._event_fetch_lock: - event_list = self._event_fetch_list - self._event_fetch_list = [] - - if not event_list: - single_threaded = self.database_engine.single_threaded - if single_threaded or i > EVENT_QUEUE_ITERATIONS: - self._event_fetch_ongoing -= 1 - return - else: - self._event_fetch_lock.wait(EVENT_QUEUE_TIMEOUT_S) - i += 1 - continue - i = 0 - - event_id_lists = zip(*event_list)[0] - event_ids = [ - item for sublist in event_id_lists for item in sublist - ] - - rows = self._new_transaction( - conn, "do_fetch", [], [], None, self._fetch_event_rows, event_ids - ) - - row_dict = { - r["event_id"]: r - for r in rows - } - - # We only want to resolve deferreds from the main thread - def fire(lst, res): - for ids, d in lst: - if not d.called: - try: - with PreserveLoggingContext(): - d.callback([ - res[i] - for i in ids - if i in res - ]) - except Exception: - logger.exception("Failed to callback") - with PreserveLoggingContext(): - reactor.callFromThread(fire, event_list, row_dict) - except Exception as e: - logger.exception("do_fetch") - - # We only want to resolve deferreds from the main thread - def fire(evs): - for _, d in evs: - if not d.called: - with PreserveLoggingContext(): - d.errback(e) - - if event_list: - with PreserveLoggingContext(): - reactor.callFromThread(fire, event_list) - - @defer.inlineCallbacks - def _enqueue_events(self, events, check_redacted=True, allow_rejected=False): - """Fetches events from the database using the _event_fetch_list. This - allows batch and bulk fetching of events - it allows us to fetch events - without having to create a new transaction for each request for events. - """ - if not events: - defer.returnValue({}) - - events_d = defer.Deferred() - with self._event_fetch_lock: - self._event_fetch_list.append( - (events, events_d) - ) - - self._event_fetch_lock.notify() - - if self._event_fetch_ongoing < EVENT_QUEUE_THREADS: - self._event_fetch_ongoing += 1 - should_start = True - else: - should_start = False - - if should_start: - with PreserveLoggingContext(): - self.runWithConnection( - self._do_fetch - ) - - logger.debug("Loading %d events", len(events)) - with PreserveLoggingContext(): - rows = yield events_d - logger.debug("Loaded %d events (%d rows)", len(events), len(rows)) - - if not allow_rejected: - rows[:] = [r for r in rows if not r["rejects"]] - - res = yield make_deferred_yieldable(defer.gatherResults( - [ - preserve_fn(self._get_event_from_row)( - row["internal_metadata"], row["json"], row["redacts"], - rejected_reason=row["rejects"], - ) - for row in rows - ], - consumeErrors=True - )) - - defer.returnValue({ - e.event.event_id: e - for e in res if e - }) - - def _fetch_event_rows(self, txn, events): - rows = [] - N = 200 - for i in range(1 + len(events) / N): - evs = events[i * N:(i + 1) * N] - if not evs: - break - - sql = ( - "SELECT " - " e.event_id as event_id, " - " e.internal_metadata," - " e.json," - " r.redacts as redacts," - " rej.event_id as rejects " - " FROM event_json as e" - " LEFT JOIN rejections as rej USING (event_id)" - " LEFT JOIN redactions as r ON e.event_id = r.redacts" - " WHERE e.event_id IN (%s)" - ) % (",".join(["?"] * len(evs)),) - - txn.execute(sql, evs) - rows.extend(self.cursor_to_dict(txn)) - - return rows - - @defer.inlineCallbacks - def _get_event_from_row(self, internal_metadata, js, redacted, - rejected_reason=None): - with Measure(self._clock, "_get_event_from_row"): - d = json.loads(js) - internal_metadata = json.loads(internal_metadata) - - if rejected_reason: - rejected_reason = yield self._simple_select_one_onecol( - table="rejections", - keyvalues={"event_id": rejected_reason}, - retcol="reason", - desc="_get_event_from_row_rejected_reason", - ) - - original_ev = FrozenEvent( - d, - internal_metadata_dict=internal_metadata, - rejected_reason=rejected_reason, - ) - - redacted_event = None - if redacted: - redacted_event = prune_event(original_ev) - - redaction_id = yield self._simple_select_one_onecol( - table="redactions", - keyvalues={"redacts": redacted_event.event_id}, - retcol="event_id", - desc="_get_event_from_row_redactions", - ) - - redacted_event.unsigned["redacted_by"] = redaction_id - # Get the redaction event. - - because = yield self.get_event( - redaction_id, - check_redacted=False, - allow_none=True, - ) - - if because: - # It's fine to do add the event directly, since get_pdu_json - # will serialise this field correctly - redacted_event.unsigned["redacted_because"] = because - - cache_entry = _EventCacheEntry( - event=original_ev, - redacted_event=redacted_event, - ) - - self._get_event_cache.prefill((original_ev.event_id,), cache_entry) - - defer.returnValue(cache_entry) - - class EventsStore(EventsWorkerStore): EVENT_ORIGIN_SERVER_TS_NAME = "event_origin_server_ts" EVENT_FIELDS_SENDER_URL_UPDATE_NAME = "event_fields_sender_url" diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py new file mode 100644 index 0000000000..86c3b48ad4 --- /dev/null +++ b/synapse/storage/events_worker.py @@ -0,0 +1,395 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from ._base import SQLBaseStore + +from twisted.internet import defer, reactor + +from synapse.events import FrozenEvent +from synapse.events.utils import prune_event + +from synapse.util.logcontext import ( + preserve_fn, PreserveLoggingContext, make_deferred_yieldable +) +from synapse.util.metrics import Measure +from synapse.api.errors import SynapseError + +from collections import namedtuple + +import logging +import ujson as json + +# these are only included to make the type annotations work +from synapse.events import EventBase # noqa: F401 +from synapse.events.snapshot import EventContext # noqa: F401 + +logger = logging.getLogger(__name__) + + +# These values are used in the `enqueus_event` and `_do_fetch` methods to +# control how we batch/bulk fetch events from the database. +# The values are plucked out of thing air to make initial sync run faster +# on jki.re +# TODO: Make these configurable. +EVENT_QUEUE_THREADS = 3 # Max number of threads that will fetch events +EVENT_QUEUE_ITERATIONS = 3 # No. times we block waiting for requests for events +EVENT_QUEUE_TIMEOUT_S = 0.1 # Timeout when waiting for requests for events + + +_EventCacheEntry = namedtuple("_EventCacheEntry", ("event", "redacted_event")) + + +class EventsWorkerStore(SQLBaseStore): + + @defer.inlineCallbacks + def get_event(self, event_id, check_redacted=True, + get_prev_content=False, allow_rejected=False, + allow_none=False): + """Get an event from the database by event_id. + + Args: + event_id (str): The event_id of the event to fetch + check_redacted (bool): If True, check if event has been redacted + and redact it. + get_prev_content (bool): If True and event is a state event, + include the previous states content in the unsigned field. + allow_rejected (bool): If True return rejected events. + allow_none (bool): If True, return None if no event found, if + False throw an exception. + + Returns: + Deferred : A FrozenEvent. + """ + events = yield self._get_events( + [event_id], + check_redacted=check_redacted, + get_prev_content=get_prev_content, + allow_rejected=allow_rejected, + ) + + if not events and not allow_none: + raise SynapseError(404, "Could not find event %s" % (event_id,)) + + defer.returnValue(events[0] if events else None) + + @defer.inlineCallbacks + def get_events(self, event_ids, check_redacted=True, + get_prev_content=False, allow_rejected=False): + """Get events from the database + + Args: + event_ids (list): The event_ids of the events to fetch + check_redacted (bool): If True, check if event has been redacted + and redact it. + get_prev_content (bool): If True and event is a state event, + include the previous states content in the unsigned field. + allow_rejected (bool): If True return rejected events. + + Returns: + Deferred : Dict from event_id to event. + """ + events = yield self._get_events( + event_ids, + check_redacted=check_redacted, + get_prev_content=get_prev_content, + allow_rejected=allow_rejected, + ) + + defer.returnValue({e.event_id: e for e in events}) + + @defer.inlineCallbacks + def _get_events(self, event_ids, check_redacted=True, + get_prev_content=False, allow_rejected=False): + if not event_ids: + defer.returnValue([]) + + event_id_list = event_ids + event_ids = set(event_ids) + + event_entry_map = self._get_events_from_cache( + event_ids, + allow_rejected=allow_rejected, + ) + + missing_events_ids = [e for e in event_ids if e not in event_entry_map] + + if missing_events_ids: + missing_events = yield self._enqueue_events( + missing_events_ids, + check_redacted=check_redacted, + allow_rejected=allow_rejected, + ) + + event_entry_map.update(missing_events) + + events = [] + for event_id in event_id_list: + entry = event_entry_map.get(event_id, None) + if not entry: + continue + + if allow_rejected or not entry.event.rejected_reason: + if check_redacted and entry.redacted_event: + event = entry.redacted_event + else: + event = entry.event + + events.append(event) + + if get_prev_content: + if "replaces_state" in event.unsigned: + prev = yield self.get_event( + event.unsigned["replaces_state"], + get_prev_content=False, + allow_none=True, + ) + if prev: + event.unsigned = dict(event.unsigned) + event.unsigned["prev_content"] = prev.content + event.unsigned["prev_sender"] = prev.sender + + defer.returnValue(events) + + def _invalidate_get_event_cache(self, event_id): + self._get_event_cache.invalidate((event_id,)) + + def _get_events_from_cache(self, events, allow_rejected, update_metrics=True): + """Fetch events from the caches + + Args: + events (list(str)): list of event_ids to fetch + allow_rejected (bool): Whether to teturn events that were rejected + update_metrics (bool): Whether to update the cache hit ratio metrics + + Returns: + dict of event_id -> _EventCacheEntry for each event_id in cache. If + allow_rejected is `False` then there will still be an entry but it + will be `None` + """ + event_map = {} + + for event_id in events: + ret = self._get_event_cache.get( + (event_id,), None, + update_metrics=update_metrics, + ) + if not ret: + continue + + if allow_rejected or not ret.event.rejected_reason: + event_map[event_id] = ret + else: + event_map[event_id] = None + + return event_map + + def _do_fetch(self, conn): + """Takes a database connection and waits for requests for events from + the _event_fetch_list queue. + """ + event_list = [] + i = 0 + while True: + try: + with self._event_fetch_lock: + event_list = self._event_fetch_list + self._event_fetch_list = [] + + if not event_list: + single_threaded = self.database_engine.single_threaded + if single_threaded or i > EVENT_QUEUE_ITERATIONS: + self._event_fetch_ongoing -= 1 + return + else: + self._event_fetch_lock.wait(EVENT_QUEUE_TIMEOUT_S) + i += 1 + continue + i = 0 + + event_id_lists = zip(*event_list)[0] + event_ids = [ + item for sublist in event_id_lists for item in sublist + ] + + rows = self._new_transaction( + conn, "do_fetch", [], [], None, self._fetch_event_rows, event_ids + ) + + row_dict = { + r["event_id"]: r + for r in rows + } + + # We only want to resolve deferreds from the main thread + def fire(lst, res): + for ids, d in lst: + if not d.called: + try: + with PreserveLoggingContext(): + d.callback([ + res[i] + for i in ids + if i in res + ]) + except Exception: + logger.exception("Failed to callback") + with PreserveLoggingContext(): + reactor.callFromThread(fire, event_list, row_dict) + except Exception as e: + logger.exception("do_fetch") + + # We only want to resolve deferreds from the main thread + def fire(evs): + for _, d in evs: + if not d.called: + with PreserveLoggingContext(): + d.errback(e) + + if event_list: + with PreserveLoggingContext(): + reactor.callFromThread(fire, event_list) + + @defer.inlineCallbacks + def _enqueue_events(self, events, check_redacted=True, allow_rejected=False): + """Fetches events from the database using the _event_fetch_list. This + allows batch and bulk fetching of events - it allows us to fetch events + without having to create a new transaction for each request for events. + """ + if not events: + defer.returnValue({}) + + events_d = defer.Deferred() + with self._event_fetch_lock: + self._event_fetch_list.append( + (events, events_d) + ) + + self._event_fetch_lock.notify() + + if self._event_fetch_ongoing < EVENT_QUEUE_THREADS: + self._event_fetch_ongoing += 1 + should_start = True + else: + should_start = False + + if should_start: + with PreserveLoggingContext(): + self.runWithConnection( + self._do_fetch + ) + + logger.debug("Loading %d events", len(events)) + with PreserveLoggingContext(): + rows = yield events_d + logger.debug("Loaded %d events (%d rows)", len(events), len(rows)) + + if not allow_rejected: + rows[:] = [r for r in rows if not r["rejects"]] + + res = yield make_deferred_yieldable(defer.gatherResults( + [ + preserve_fn(self._get_event_from_row)( + row["internal_metadata"], row["json"], row["redacts"], + rejected_reason=row["rejects"], + ) + for row in rows + ], + consumeErrors=True + )) + + defer.returnValue({ + e.event.event_id: e + for e in res if e + }) + + def _fetch_event_rows(self, txn, events): + rows = [] + N = 200 + for i in range(1 + len(events) / N): + evs = events[i * N:(i + 1) * N] + if not evs: + break + + sql = ( + "SELECT " + " e.event_id as event_id, " + " e.internal_metadata," + " e.json," + " r.redacts as redacts," + " rej.event_id as rejects " + " FROM event_json as e" + " LEFT JOIN rejections as rej USING (event_id)" + " LEFT JOIN redactions as r ON e.event_id = r.redacts" + " WHERE e.event_id IN (%s)" + ) % (",".join(["?"] * len(evs)),) + + txn.execute(sql, evs) + rows.extend(self.cursor_to_dict(txn)) + + return rows + + @defer.inlineCallbacks + def _get_event_from_row(self, internal_metadata, js, redacted, + rejected_reason=None): + with Measure(self._clock, "_get_event_from_row"): + d = json.loads(js) + internal_metadata = json.loads(internal_metadata) + + if rejected_reason: + rejected_reason = yield self._simple_select_one_onecol( + table="rejections", + keyvalues={"event_id": rejected_reason}, + retcol="reason", + desc="_get_event_from_row_rejected_reason", + ) + + original_ev = FrozenEvent( + d, + internal_metadata_dict=internal_metadata, + rejected_reason=rejected_reason, + ) + + redacted_event = None + if redacted: + redacted_event = prune_event(original_ev) + + redaction_id = yield self._simple_select_one_onecol( + table="redactions", + keyvalues={"redacts": redacted_event.event_id}, + retcol="event_id", + desc="_get_event_from_row_redactions", + ) + + redacted_event.unsigned["redacted_by"] = redaction_id + # Get the redaction event. + + because = yield self.get_event( + redaction_id, + check_redacted=False, + allow_none=True, + ) + + if because: + # It's fine to do add the event directly, since get_pdu_json + # will serialise this field correctly + redacted_event.unsigned["redacted_because"] = because + + cache_entry = _EventCacheEntry( + event=original_ev, + redacted_event=redacted_event, + ) + + self._get_event_cache.prefill((original_ev.event_id,), cache_entry) + + defer.returnValue(cache_entry) From 70349872c2dcff20a7b174bf0fcfdd5b8e47eec3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Feb 2018 11:14:35 +0000 Subject: [PATCH 240/348] Update copyright --- synapse/replication/slave/storage/events.py | 1 + synapse/storage/events.py | 1 + synapse/storage/roommember.py | 1 + 3 files changed, 3 insertions(+) diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index 0dc87aee7f..ef7a42d801 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 681a33314d..3e3229b408 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 6574fe74b4..b9158b9896 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 6ae9a3d2a6cd9db7e07fda270728cf15351a5a0b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 23 Feb 2018 10:45:00 +0000 Subject: [PATCH 241/348] Update copyright --- synapse/replication/slave/storage/appservice.py | 1 + synapse/storage/appservice.py | 1 + 2 files changed, 2 insertions(+) diff --git a/synapse/replication/slave/storage/appservice.py b/synapse/replication/slave/storage/appservice.py index 3b9ded0098..8cae3076f4 100644 --- a/synapse/replication/slave/storage/appservice.py +++ b/synapse/replication/slave/storage/appservice.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2015, 2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/synapse/storage/appservice.py b/synapse/storage/appservice.py index f66cc98091..063906f5ae 100644 --- a/synapse/storage/appservice.py +++ b/synapse/storage/appservice.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2015, 2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 8956f0147aecc27a0590eeeecd130d05b7a55767 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 27 Feb 2018 10:06:51 +0000 Subject: [PATCH 242/348] Add comment --- synapse/storage/appservice.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/synapse/storage/appservice.py b/synapse/storage/appservice.py index 063906f5ae..90fb51d43c 100644 --- a/synapse/storage/appservice.py +++ b/synapse/storage/appservice.py @@ -441,4 +441,8 @@ class ApplicationServiceTransactionWorkerStore(ApplicationServiceWorkerStore, class ApplicationServiceTransactionStore(ApplicationServiceTransactionWorkerStore): + # This is currently empty due to there not being any AS storage functions + # that can't be run on the workers. Since this may change in future, and + # to keep consistency with the other stores, we keep this empty class for + # now. pass From 493e25d5545389264f696be0e07544bf82a0818a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 27 Feb 2018 12:01:36 +0000 Subject: [PATCH 243/348] Move storage functions for push calculations This will allow push actions for an event to be calculated on workers. --- synapse/app/pusher.py | 5 - synapse/app/synchrotron.py | 8 +- synapse/storage/event_push_actions.py | 126 +++++++++++++------------- synapse/storage/push_rule.py | 14 ++- synapse/storage/pusher.py | 22 +++-- synapse/storage/roommember.py | 24 ++--- 6 files changed, 101 insertions(+), 98 deletions(-) diff --git a/synapse/app/pusher.py b/synapse/app/pusher.py index 32ccea3f13..98a4a7c62c 100644 --- a/synapse/app/pusher.py +++ b/synapse/app/pusher.py @@ -32,7 +32,6 @@ from synapse.replication.tcp.client import ReplicationClientHandler from synapse.server import HomeServer from synapse.storage import DataStore from synapse.storage.engines import create_engine -from synapse.storage.roommember import RoomMemberStore from synapse.util.httpresourcetree import create_resource_tree from synapse.util.logcontext import LoggingContext, preserve_fn from synapse.util.manhole import manhole @@ -75,10 +74,6 @@ class PusherSlaveStore( DataStore.get_profile_displayname.__func__ ) - who_forgot_in_room = ( - RoomMemberStore.__dict__["who_forgot_in_room"] - ) - class PusherServer(HomeServer): def setup(self): diff --git a/synapse/app/synchrotron.py b/synapse/app/synchrotron.py index f87531f1b6..abe91dcfbd 100644 --- a/synapse/app/synchrotron.py +++ b/synapse/app/synchrotron.py @@ -62,8 +62,6 @@ logger = logging.getLogger("synapse.app.synchrotron") class SynchrotronSlavedStore( - SlavedPushRuleStore, - SlavedEventStore, SlavedReceiptsStore, SlavedAccountDataStore, SlavedApplicationServiceStore, @@ -73,14 +71,12 @@ class SynchrotronSlavedStore( SlavedGroupServerStore, SlavedDeviceInboxStore, SlavedDeviceStore, + SlavedPushRuleStore, + SlavedEventStore, SlavedClientIpStore, RoomStore, BaseSlavedStore, ): - who_forgot_in_room = ( - RoomMemberStore.__dict__["who_forgot_in_room"] - ) - did_forget = ( RoomMemberStore.__dict__["did_forget"] ) diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index fe6887414e..6454045c2d 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -380,6 +380,69 @@ class EventPushActionsWorkerStore(SQLBaseStore): # Now return the first `limit` defer.returnValue(notifs[:limit]) + def add_push_actions_to_staging(self, event_id, user_id_actions): + """Add the push actions for the event to the push action staging area. + + Args: + event_id (str) + user_id_actions (dict[str, list[dict|str])]): A dictionary mapping + user_id to list of push actions, where an action can either be + a string or dict. + + Returns: + Deferred + """ + + if not user_id_actions: + return + + # This is a helper function for generating the necessary tuple that + # can be used to inert into the `event_push_actions_staging` table. + def _gen_entry(user_id, actions): + is_highlight = 1 if _action_has_highlight(actions) else 0 + return ( + event_id, # event_id column + user_id, # user_id column + _serialize_action(actions, is_highlight), # actions column + 1, # notif column + is_highlight, # highlight column + ) + + def _add_push_actions_to_staging_txn(txn): + # We don't use _simple_insert_many here to avoid the overhead + # of generating lists of dicts. + + sql = """ + INSERT INTO event_push_actions_staging + (event_id, user_id, actions, notif, highlight) + VALUES (?, ?, ?, ?, ?) + """ + + txn.executemany(sql, ( + _gen_entry(user_id, actions) + for user_id, actions in user_id_actions.iteritems() + )) + + return self.runInteraction( + "add_push_actions_to_staging", _add_push_actions_to_staging_txn + ) + + def remove_push_actions_from_staging(self, event_id): + """Called if we failed to persist the event to ensure that stale push + actions don't build up in the DB + + Args: + event_id (str) + """ + + return self._simple_delete( + table="event_push_actions_staging", + keyvalues={ + "event_id": event_id, + }, + desc="remove_push_actions_from_staging", + ) + class EventPushActionsStore(EventPushActionsWorkerStore): EPA_HIGHLIGHT_INDEX = "epa_highlight_index" @@ -775,69 +838,6 @@ class EventPushActionsStore(EventPushActionsWorkerStore): (rotate_to_stream_ordering,) ) - def add_push_actions_to_staging(self, event_id, user_id_actions): - """Add the push actions for the event to the push action staging area. - - Args: - event_id (str) - user_id_actions (dict[str, list[dict|str])]): A dictionary mapping - user_id to list of push actions, where an action can either be - a string or dict. - - Returns: - Deferred - """ - - if not user_id_actions: - return - - # This is a helper function for generating the necessary tuple that - # can be used to inert into the `event_push_actions_staging` table. - def _gen_entry(user_id, actions): - is_highlight = 1 if _action_has_highlight(actions) else 0 - return ( - event_id, # event_id column - user_id, # user_id column - _serialize_action(actions, is_highlight), # actions column - 1, # notif column - is_highlight, # highlight column - ) - - def _add_push_actions_to_staging_txn(txn): - # We don't use _simple_insert_many here to avoid the overhead - # of generating lists of dicts. - - sql = """ - INSERT INTO event_push_actions_staging - (event_id, user_id, actions, notif, highlight) - VALUES (?, ?, ?, ?, ?) - """ - - txn.executemany(sql, ( - _gen_entry(user_id, actions) - for user_id, actions in user_id_actions.iteritems() - )) - - return self.runInteraction( - "add_push_actions_to_staging", _add_push_actions_to_staging_txn - ) - - def remove_push_actions_from_staging(self, event_id): - """Called if we failed to persist the event to ensure that stale push - actions don't build up in the DB - - Args: - event_id (str) - """ - - return self._simple_delete( - table="event_push_actions_staging", - keyvalues={ - "event_id": event_id, - }, - desc="remove_push_actions_from_staging", - ) - def _action_has_highlight(actions): for action in actions: diff --git a/synapse/storage/push_rule.py b/synapse/storage/push_rule.py index 583efb7bdf..04a0b59a39 100644 --- a/synapse/storage/push_rule.py +++ b/synapse/storage/push_rule.py @@ -15,6 +15,10 @@ # limitations under the License. from ._base import SQLBaseStore +from synapse.storage.appservice import ApplicationServiceWorkerStore +from synapse.storage.pusher import PusherWorkerStore +from synapse.storage.receipts import ReceiptsWorkerStore +from synapse.storage.roommember import RoomMemberWorkerStore from synapse.util.caches.descriptors import cachedInlineCallbacks, cachedList from synapse.util.caches.stream_change_cache import StreamChangeCache from synapse.push.baserules import list_with_base_rules @@ -51,7 +55,11 @@ def _load_rules(rawrules, enabled_map): return rules -class PushRulesWorkerStore(SQLBaseStore): +class PushRulesWorkerStore(ApplicationServiceWorkerStore, + ReceiptsWorkerStore, + PusherWorkerStore, + RoomMemberWorkerStore, + SQLBaseStore): """This is an abstract base class where subclasses must implement `get_max_push_rules_stream_id` which can be called in the initializer. """ @@ -140,8 +148,6 @@ class PushRulesWorkerStore(SQLBaseStore): "have_push_rules_changed", have_push_rules_changed_txn ) - -class PushRuleStore(PushRulesWorkerStore): @cachedList(cached_method_name="get_push_rules_for_user", list_name="user_ids", num_args=1, inlineCallbacks=True) def bulk_get_push_rules(self, user_ids): @@ -281,6 +287,8 @@ class PushRuleStore(PushRulesWorkerStore): results.setdefault(row['user_name'], {})[row['rule_id']] = enabled defer.returnValue(results) + +class PushRuleStore(PushRulesWorkerStore): @defer.inlineCallbacks def add_push_rule( self, user_id, rule_id, priority_class, conditions, actions, diff --git a/synapse/storage/pusher.py b/synapse/storage/pusher.py index f4af3e4caa..307660b99a 100644 --- a/synapse/storage/pusher.py +++ b/synapse/storage/pusher.py @@ -175,11 +175,6 @@ class PusherWorkerStore(SQLBaseStore): "get_all_updated_pushers_rows", get_all_updated_pushers_rows_txn ) - -class PusherStore(PusherWorkerStore): - def get_pushers_stream_token(self): - return self._pushers_id_gen.get_current_token() - @cachedInlineCallbacks(num_args=1, max_entries=15000) def get_if_user_has_pusher(self, user_id): # This only exists for the cachedList decorator @@ -201,6 +196,11 @@ class PusherStore(PusherWorkerStore): defer.returnValue(result) + +class PusherStore(PusherWorkerStore): + def get_pushers_stream_token(self): + return self._pushers_id_gen.get_current_token() + @defer.inlineCallbacks def add_pusher(self, user_id, access_token, kind, app_id, app_display_name, device_display_name, @@ -233,14 +233,18 @@ class PusherStore(PusherWorkerStore): ) if newly_inserted: - # get_if_user_has_pusher only cares if the user has - # at least *one* pusher. - self.get_if_user_has_pusher.invalidate(user_id,) + self.runInteraction( + "add_pusher", + self._invalidate_cache_and_stream, + self.get_if_user_has_pusher, (user_id,) + ) @defer.inlineCallbacks def delete_pusher_by_app_id_pushkey_user_id(self, app_id, pushkey, user_id): def delete_pusher_txn(txn, stream_id): - txn.call_after(self.get_if_user_has_pusher.invalidate, (user_id,)) + self._invalidate_cache_and_stream( + txn, self.get_if_user_has_pusher, (user_id,) + ) self._simple_delete_one_txn( txn, diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index b9158b9896..d79877dac7 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -432,6 +432,18 @@ class RoomMemberWorkerStore(EventsWorkerStore): def _get_joined_hosts_cache(self, room_id): return _JoinedHostsCache(self, room_id) + @cached() + def who_forgot_in_room(self, room_id): + return self._simple_select_list( + table="room_memberships", + retcols=("user_id", "event_id"), + keyvalues={ + "room_id": room_id, + "forgotten": 1, + }, + desc="who_forgot" + ) + class RoomMemberStore(RoomMemberWorkerStore): def __init__(self, db_conn, hs): @@ -595,18 +607,6 @@ class RoomMemberStore(RoomMemberWorkerStore): forgot = yield self.runInteraction("did_forget_membership_at", f) defer.returnValue(forgot == 1) - @cached() - def who_forgot_in_room(self, room_id): - return self._simple_select_list( - table="room_memberships", - retcols=("user_id", "event_id"), - keyvalues={ - "room_id": room_id, - "forgotten": 1, - }, - desc="who_forgot" - ) - @defer.inlineCallbacks def _background_add_membership_profile(self, progress, batch_size): target_min_stream_id = progress.get( From 28e973ac119e0b4ec5b9e45772a572a94d0e6643 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Feb 2018 16:30:10 +0000 Subject: [PATCH 244/348] Calculate push actions on worker --- synapse/app/event_creator.py | 8 +++ synapse/handlers/message.py | 84 ++++++++++++++++++-------- synapse/replication/http/send_event.py | 2 +- 3 files changed, 69 insertions(+), 25 deletions(-) diff --git a/synapse/app/event_creator.py b/synapse/app/event_creator.py index b2ce399258..fc0b9e8c04 100644 --- a/synapse/app/event_creator.py +++ b/synapse/app/event_creator.py @@ -27,10 +27,14 @@ from synapse.http.server import JsonResource from synapse.http.site import SynapseSite from synapse.metrics.resource import METRICS_PREFIX, MetricsResource from synapse.replication.slave.storage._base import BaseSlavedStore +from synapse.replication.slave.storage.account_data import SlavedAccountDataStore from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore from synapse.replication.slave.storage.client_ips import SlavedClientIpStore from synapse.replication.slave.storage.devices import SlavedDeviceStore from synapse.replication.slave.storage.events import SlavedEventStore +from synapse.replication.slave.storage.push_rule import SlavedPushRuleStore +from synapse.replication.slave.storage.pushers import SlavedPusherStore +from synapse.replication.slave.storage.receipts import SlavedReceiptsStore from synapse.replication.slave.storage.registration import SlavedRegistrationStore from synapse.replication.slave.storage.room import RoomStore from synapse.replication.tcp.client import ReplicationClientHandler @@ -48,6 +52,10 @@ logger = logging.getLogger("synapse.app.event_creator") class EventCreatorSlavedStore( + SlavedAccountDataStore, + SlavedPusherStore, + SlavedReceiptsStore, + SlavedPushRuleStore, SlavedDeviceStore, SlavedClientIpStore, SlavedApplicationServiceStore, diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index d99d8049b3..4c186965a7 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -553,21 +553,67 @@ class EventCreationHandler(object): event, context, ratelimit=True, - extra_users=[] + extra_users=[], ): - # We now need to go and hit out to wherever we need to hit out to. + """Processes a new event. This includes checking auth, persisting it, + notifying users, sending to remote servers, etc. - # If we're a worker we need to hit out to the master. - if self.config.worker_app: - yield send_event_to_master( - self.http_client, - host=self.config.worker_replication_host, - port=self.config.worker_replication_http_port, - requester=requester, - event=event, - context=context, + If called from a worker will hit out to the master process for final + processing. + + Args: + requester (Requester) + event (FrozenEvent) + context (EventContext) + ratelimit (bool) + extra_users (list(str)): Any extra users to notify about event + """ + + yield self.action_generator.handle_push_actions_for_event( + event, context + ) + + try: + # We now need to go and hit out to wherever we need to hit out to. + + # If we're a worker we need to hit out to the master. + if self.config.worker_app: + yield send_event_to_master( + self.http_client, + host=self.config.worker_replication_host, + port=self.config.worker_replication_http_port, + requester=requester, + event=event, + context=context, + ) + return + + yield self.persist_and_notify_client_event( + requester, + event, + context, + ratelimit=ratelimit, + extra_users=extra_users, ) - return + except: # noqa: E722, as we reraise the exception this is fine. + # Ensure that we actually remove the entries in the push actions + # staging area, if we calculated them. + preserve_fn(self.store.remove_push_actions_from_staging)(event.event_id) + raise + + @defer.inlineCallbacks + def persist_and_notify_client_event( + self, + requester, + event, + context, + ratelimit=True, + extra_users=[], + ): + """Called when we have fully built and authed the event. This should + only be run on master. + """ + assert not self.config.worker_app if ratelimit: yield self.base_handler.ratelimit(requester) @@ -679,20 +725,10 @@ class EventCreationHandler(object): "Changing the room create event is forbidden", ) - yield self.action_generator.handle_push_actions_for_event( - event, context + (event_stream_id, max_stream_id) = yield self.store.persist_event( + event, context=context ) - try: - (event_stream_id, max_stream_id) = yield self.store.persist_event( - event, context=context - ) - except: # noqa: E722, as we reraise the exception this is fine. - # Ensure that we actually remove the entries in the push actions - # staging area - preserve_fn(self.store.remove_push_actions_from_staging)(event.event_id) - raise - # this intentionally does not yield: we don't care about the result # and don't need to wait for it. preserve_fn(self.pusher_pool.on_new_notifications)( diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py index 468f4b68f4..3a99a88bc5 100644 --- a/synapse/replication/http/send_event.py +++ b/synapse/replication/http/send_event.py @@ -106,7 +106,7 @@ class ReplicationSendEventRestServlet(RestServlet): event.event_id, event.room_id, ) - yield self.event_creation_handler.handle_new_client_event( + yield self.event_creation_handler.persist_and_notify_client_event( requester, event, context, ) From f756f961eab7ae6e53052ee419413c74d171d144 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 10:05:27 +0000 Subject: [PATCH 245/348] Fixup comments --- synapse/handlers/message.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 4c186965a7..c4151d73eb 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -574,8 +574,6 @@ class EventCreationHandler(object): ) try: - # We now need to go and hit out to wherever we need to hit out to. - # If we're a worker we need to hit out to the master. if self.config.worker_app: yield send_event_to_master( @@ -610,8 +608,10 @@ class EventCreationHandler(object): ratelimit=True, extra_users=[], ): - """Called when we have fully built and authed the event. This should - only be run on master. + """Called when we have fully built the event, and have already + calculated the push actions for the event. + + This should only be run on master. """ assert not self.config.worker_app From 6b8604239f4c9463023e59664e7810ba58b8f428 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 10:08:28 +0000 Subject: [PATCH 246/348] Correctly send ratelimit and extra_users params --- synapse/handlers/message.py | 2 ++ synapse/replication/http/send_event.py | 14 +++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index c4151d73eb..5f88f84d38 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -583,6 +583,8 @@ class EventCreationHandler(object): requester=requester, event=event, context=context, + ratelimit=ratelimit, + extra_users=extra_users, ) return diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py index 3a99a88bc5..439bfbb4f6 100644 --- a/synapse/replication/http/send_event.py +++ b/synapse/replication/http/send_event.py @@ -29,7 +29,8 @@ logger = logging.getLogger(__name__) @defer.inlineCallbacks -def send_event_to_master(client, host, port, requester, event, context): +def send_event_to_master(client, host, port, requester, event, context, + ratelimit, extra_users): """Send event to be handled on the master Args: @@ -39,6 +40,8 @@ def send_event_to_master(client, host, port, requester, event, context): requester (Requester) event (FrozenEvent) context (EventContext) + ratelimit (bool) + extra_users (list(str)): Any extra users to notify about event """ uri = "http://%s:%s/_synapse/replication/send_event" % (host, port,) @@ -48,6 +51,8 @@ def send_event_to_master(client, host, port, requester, event, context): "rejected_reason": event.rejected_reason, "context": context.serialize(event), "requester": requester.serialize(), + "ratelimit": ratelimit, + "extra_users": extra_users, } try: @@ -74,6 +79,8 @@ class ReplicationSendEventRestServlet(RestServlet): "rejected_reason": .., // The event.rejected_reason field "context": { .. serialized event context .. }, "requester": { .. serialized requester .. }, + "ratelimit": true, + "extra_users": [], } """ PATTERNS = [re.compile("^/_synapse/replication/send_event$")] @@ -98,6 +105,9 @@ class ReplicationSendEventRestServlet(RestServlet): requester = Requester.deserialize(self.store, content["requester"]) context = yield EventContext.deserialize(self.store, content["context"]) + ratelimit = content["ratelimit"] + extra_users = content["extra_users"] + if requester.user: request.authenticated_entity = requester.user.to_string() @@ -108,6 +118,8 @@ class ReplicationSendEventRestServlet(RestServlet): yield self.event_creation_handler.persist_and_notify_client_event( requester, event, context, + ratelimit=ratelimit, + extra_users=extra_users, ) defer.returnValue((200, {})) From f381d6381344eb442f46ae27f29e039175721ff5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 10:18:33 +0000 Subject: [PATCH 247/348] Check event auth on the worker --- synapse/handlers/message.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 5f88f84d38..7d28c2745c 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -569,6 +569,20 @@ class EventCreationHandler(object): extra_users (list(str)): Any extra users to notify about event """ + try: + yield self.auth.check_from_context(event, context) + except AuthError as err: + logger.warn("Denying new event %r because %s", event, err) + raise err + + # Ensure that we can round trip before trying to persist in db + try: + dump = ujson.dumps(unfreeze(event.content)) + ujson.loads(dump) + except Exception: + logger.exception("Failed to encode content: %r", event.content) + raise + yield self.action_generator.handle_push_actions_for_event( event, context ) @@ -610,8 +624,8 @@ class EventCreationHandler(object): ratelimit=True, extra_users=[], ): - """Called when we have fully built the event, and have already - calculated the push actions for the event. + """Called when we have fully built the event, have already + calculated the push actions for the event, and checked auth. This should only be run on master. """ @@ -620,20 +634,6 @@ class EventCreationHandler(object): if ratelimit: yield self.base_handler.ratelimit(requester) - try: - yield self.auth.check_from_context(event, context) - except AuthError as err: - logger.warn("Denying new event %r because %s", event, err) - raise err - - # Ensure that we can round trip before trying to persist in db - try: - dump = ujson.dumps(unfreeze(event.content)) - ujson.loads(dump) - except Exception: - logger.exception("Failed to encode content: %r", event.content) - raise - yield self.base_handler.maybe_kick_guest_users(event, context) if event.type == EventTypes.CanonicalAlias: From 8ded8ba2c755f254bd98e15db7bc865eed997f07 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 11:20:34 +0000 Subject: [PATCH 248/348] Make repl send_event idempotent and retry on timeouts If we treated timeouts as failures on the worker we would attempt to clean up e.g. push actions while the master might still process the event. --- synapse/replication/http/send_event.py | 44 ++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 6 deletions(-) diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py index 439bfbb4f6..73cd3d91d9 100644 --- a/synapse/replication/http/send_event.py +++ b/synapse/replication/http/send_event.py @@ -15,10 +15,15 @@ from twisted.internet import defer -from synapse.api.errors import SynapseError, MatrixCodeMessageException +from synapse.api.errors import ( + SynapseError, MatrixCodeMessageException, CodeMessageException, +) from synapse.events import FrozenEvent from synapse.events.snapshot import EventContext from synapse.http.servlet import RestServlet, parse_json_object_from_request +from synapse.util.async import sleep +from synapse.util.caches.response_cache import ResponseCache +from synapse.util.logcontext import make_deferred_yieldable, preserve_fn from synapse.util.metrics import Measure from synapse.types import Requester @@ -43,7 +48,9 @@ def send_event_to_master(client, host, port, requester, event, context, ratelimit (bool) extra_users (list(str)): Any extra users to notify about event """ - uri = "http://%s:%s/_synapse/replication/send_event" % (host, port,) + uri = "http://%s:%s/_synapse/replication/send_event/%s" % ( + host, port, event.event_id, + ) payload = { "event": event.get_pdu_json(), @@ -56,7 +63,20 @@ def send_event_to_master(client, host, port, requester, event, context, } try: - result = yield client.post_json_get_json(uri, payload) + # We keep retrying the same request for timeouts. This is so that we + # have a good idea that the request has either succeeded or failed on + # the master, and so whether we should clean up or not. + while True: + try: + result = yield client.put_json(uri, payload) + break + except CodeMessageException as e: + if e.code != 504: + raise + + # If we timed out we probably don't need to worry about backing + # off too much, but lets just wait a little anyway. + yield sleep(1) except MatrixCodeMessageException as e: # We convert to SynapseError as we know that it was a SynapseError # on the master process that we should send to the client. (And @@ -71,7 +91,7 @@ class ReplicationSendEventRestServlet(RestServlet): The API looks like: - POST /_synapse/replication/send_event + POST /_synapse/replication/send_event/:event_id { "event": { .. serialized event .. }, @@ -83,7 +103,7 @@ class ReplicationSendEventRestServlet(RestServlet): "extra_users": [], } """ - PATTERNS = [re.compile("^/_synapse/replication/send_event$")] + PATTERNS = [re.compile("^/_synapse/replication/send_event/(?P[^/]+)$")] def __init__(self, hs): super(ReplicationSendEventRestServlet, self).__init__() @@ -92,8 +112,20 @@ class ReplicationSendEventRestServlet(RestServlet): self.store = hs.get_datastore() self.clock = hs.get_clock() + # The responses are tiny, so we may as well cache them for a while + self.response_cache = ResponseCache(hs, timeout_ms=30 * 60 * 1000) + + def on_PUT(self, request, event_id): + result = self.response_cache.get(event_id) + if not result: + result = self.response_cache.set( + event_id, + preserve_fn(self._handle_request)(request) + ) + return make_deferred_yieldable(result) + @defer.inlineCallbacks - def on_POST(self, request): + def _handle_request(self, request): with Measure(self.clock, "repl_send_event_parse"): content = parse_json_object_from_request(request) From 89f90d808ad923374a204377b0bc8ed15a9385a9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 11:59:16 +0000 Subject: [PATCH 249/348] Add some logging --- synapse/replication/http/send_event.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py index 73cd3d91d9..f490622b63 100644 --- a/synapse/replication/http/send_event.py +++ b/synapse/replication/http/send_event.py @@ -72,6 +72,7 @@ def send_event_to_master(client, host, port, requester, event, context, break except CodeMessageException as e: if e.code != 504: + logger.warn("send_event request timed out") raise # If we timed out we probably don't need to worry about backing @@ -118,6 +119,7 @@ class ReplicationSendEventRestServlet(RestServlet): def on_PUT(self, request, event_id): result = self.response_cache.get(event_id) if not result: + logger.warn("Returning cached response") result = self.response_cache.set( event_id, preserve_fn(self._handle_request)(request) From 157298f9862bd06e9268aed9cdffbc1c912fd74d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 11:59:45 +0000 Subject: [PATCH 250/348] Don't do preserve_fn for every request --- synapse/replication/http/send_event.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py index f490622b63..665a56d2e6 100644 --- a/synapse/replication/http/send_event.py +++ b/synapse/replication/http/send_event.py @@ -122,10 +122,11 @@ class ReplicationSendEventRestServlet(RestServlet): logger.warn("Returning cached response") result = self.response_cache.set( event_id, - preserve_fn(self._handle_request)(request) + self._handle_request(request) ) return make_deferred_yieldable(result) + @preserve_fn @defer.inlineCallbacks def _handle_request(self, request): with Measure(self.clock, "repl_send_event_parse"): From 126b9bf96f2c989e7402b5e0177ee39a2a20940e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 12:05:33 +0000 Subject: [PATCH 251/348] Log in the correct places --- synapse/replication/http/send_event.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py index 665a56d2e6..70f2fe456a 100644 --- a/synapse/replication/http/send_event.py +++ b/synapse/replication/http/send_event.py @@ -72,9 +72,10 @@ def send_event_to_master(client, host, port, requester, event, context, break except CodeMessageException as e: if e.code != 504: - logger.warn("send_event request timed out") raise + logger.warn("send_event request timed out") + # If we timed out we probably don't need to worry about backing # off too much, but lets just wait a little anyway. yield sleep(1) @@ -119,11 +120,12 @@ class ReplicationSendEventRestServlet(RestServlet): def on_PUT(self, request, event_id): result = self.response_cache.get(event_id) if not result: - logger.warn("Returning cached response") result = self.response_cache.set( event_id, self._handle_request(request) ) + else: + logger.warn("Returning cached response") return make_deferred_yieldable(result) @preserve_fn From 3a75de923b9183c073bbddae1e08fae546a11f7a Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 1 Mar 2018 12:19:09 +0000 Subject: [PATCH 252/348] Rewrite make_deferred_yieldable avoiding inlineCallbacks ... because (a) it's actually simpler (b) it might be marginally more performant? --- synapse/util/logcontext.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/synapse/util/logcontext.py b/synapse/util/logcontext.py index 94fa7cac98..a8dea15c1b 100644 --- a/synapse/util/logcontext.py +++ b/synapse/util/logcontext.py @@ -299,10 +299,6 @@ def preserve_fn(f): Useful for wrapping functions that return a deferred which you don't yield on. """ - def reset_context(result): - LoggingContext.set_current_context(LoggingContext.sentinel) - return result - def g(*args, **kwargs): current = LoggingContext.current_context() res = f(*args, **kwargs) @@ -323,12 +319,11 @@ def preserve_fn(f): # which is supposed to have a single entry and exit point. But # by spawning off another deferred, we are effectively # adding a new exit point.) - res.addBoth(reset_context) + res.addBoth(_set_context_cb, LoggingContext.sentinel) return res return g -@defer.inlineCallbacks def make_deferred_yieldable(deferred): """Given a deferred, make it follow the Synapse logcontext rules: @@ -342,9 +337,16 @@ def make_deferred_yieldable(deferred): (This is more-or-less the opposite operation to preserve_fn.) """ - with PreserveLoggingContext(): - r = yield deferred - defer.returnValue(r) + if isinstance(deferred, defer.Deferred) and not deferred.called: + prev_context = LoggingContext.set_current_context(LoggingContext.sentinel) + deferred.addBoth(_set_context_cb, prev_context) + return deferred + + +def _set_context_cb(result, context): + """A callback function which just sets the logging context""" + LoggingContext.set_current_context(context) + return result # modules to ignore in `logcontext_tracer` From 0c8ba5dd1ce3e5cec201165c50f69aaa5c68c45d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 11:39:45 +0000 Subject: [PATCH 253/348] Split up RoomStore --- synapse/replication/slave/storage/room.py | 21 +- synapse/storage/room.py | 239 +++++++++++----------- 2 files changed, 125 insertions(+), 135 deletions(-) diff --git a/synapse/replication/slave/storage/room.py b/synapse/replication/slave/storage/room.py index f510384033..5ae1670157 100644 --- a/synapse/replication/slave/storage/room.py +++ b/synapse/replication/slave/storage/room.py @@ -14,32 +14,19 @@ # limitations under the License. from ._base import BaseSlavedStore -from synapse.storage import DataStore -from synapse.storage.room import RoomStore +from synapse.storage.room import RoomWorkerStore from ._slaved_id_tracker import SlavedIdTracker -class RoomStore(BaseSlavedStore): +class RoomStore(RoomWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): super(RoomStore, self).__init__(db_conn, hs) self._public_room_id_gen = SlavedIdTracker( db_conn, "public_room_list_stream", "stream_id" ) - get_public_room_ids = DataStore.get_public_room_ids.__func__ - get_current_public_room_stream_id = ( - DataStore.get_current_public_room_stream_id.__func__ - ) - get_public_room_ids_at_stream_id = ( - RoomStore.__dict__["get_public_room_ids_at_stream_id"] - ) - get_public_room_ids_at_stream_id_txn = ( - DataStore.get_public_room_ids_at_stream_id_txn.__func__ - ) - get_published_at_stream_id_txn = ( - DataStore.get_published_at_stream_id_txn.__func__ - ) - get_public_room_changes = DataStore.get_public_room_changes.__func__ + def get_current_public_room_stream_id(self): + return self._public_room_id_gen.get_current_token() def stream_positions(self): result = super(RoomStore, self).stream_positions() diff --git a/synapse/storage/room.py b/synapse/storage/room.py index fff6652e05..7f2c08d7a6 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -16,6 +16,7 @@ from twisted.internet import defer from synapse.api.errors import StoreError +from synapse.storage._base import SQLBaseStore from synapse.storage.search import SearchStore from synapse.util.caches.descriptors import cached, cachedInlineCallbacks @@ -38,7 +39,126 @@ RatelimitOverride = collections.namedtuple( ) -class RoomStore(SearchStore): +class RoomWorkerStore(SQLBaseStore): + def get_public_room_ids(self): + return self._simple_select_onecol( + table="rooms", + keyvalues={ + "is_public": True, + }, + retcol="room_id", + desc="get_public_room_ids", + ) + + @cached(num_args=2, max_entries=100) + def get_public_room_ids_at_stream_id(self, stream_id, network_tuple): + """Get pulbic rooms for a particular list, or across all lists. + + Args: + stream_id (int) + network_tuple (ThirdPartyInstanceID): The list to use (None, None) + means the main list, None means all lsits. + """ + return self.runInteraction( + "get_public_room_ids_at_stream_id", + self.get_public_room_ids_at_stream_id_txn, + stream_id, network_tuple=network_tuple + ) + + def get_public_room_ids_at_stream_id_txn(self, txn, stream_id, + network_tuple): + return { + rm + for rm, vis in self.get_published_at_stream_id_txn( + txn, stream_id, network_tuple=network_tuple + ).items() + if vis + } + + def get_published_at_stream_id_txn(self, txn, stream_id, network_tuple): + if network_tuple: + # We want to get from a particular list. No aggregation required. + + sql = (""" + SELECT room_id, visibility FROM public_room_list_stream + INNER JOIN ( + SELECT room_id, max(stream_id) AS stream_id + FROM public_room_list_stream + WHERE stream_id <= ? %s + GROUP BY room_id + ) grouped USING (room_id, stream_id) + """) + + if network_tuple.appservice_id is not None: + txn.execute( + sql % ("AND appservice_id = ? AND network_id = ?",), + (stream_id, network_tuple.appservice_id, network_tuple.network_id,) + ) + else: + txn.execute( + sql % ("AND appservice_id IS NULL",), + (stream_id,) + ) + return dict(txn) + else: + # We want to get from all lists, so we need to aggregate the results + + logger.info("Executing full list") + + sql = (""" + SELECT room_id, visibility + FROM public_room_list_stream + INNER JOIN ( + SELECT + room_id, max(stream_id) AS stream_id, appservice_id, + network_id + FROM public_room_list_stream + WHERE stream_id <= ? + GROUP BY room_id, appservice_id, network_id + ) grouped USING (room_id, stream_id) + """) + + txn.execute( + sql, + (stream_id,) + ) + + results = {} + # A room is visible if its visible on any list. + for room_id, visibility in txn: + results[room_id] = bool(visibility) or results.get(room_id, False) + + return results + + def get_public_room_changes(self, prev_stream_id, new_stream_id, + network_tuple): + def get_public_room_changes_txn(txn): + then_rooms = self.get_public_room_ids_at_stream_id_txn( + txn, prev_stream_id, network_tuple + ) + + now_rooms_dict = self.get_published_at_stream_id_txn( + txn, new_stream_id, network_tuple + ) + + now_rooms_visible = set( + rm for rm, vis in now_rooms_dict.items() if vis + ) + now_rooms_not_visible = set( + rm for rm, vis in now_rooms_dict.items() if not vis + ) + + newly_visible = now_rooms_visible - then_rooms + newly_unpublished = now_rooms_not_visible & then_rooms + + return newly_visible, newly_unpublished + + return self.runInteraction( + "get_public_room_changes", get_public_room_changes_txn + ) + + +class RoomStore(RoomWorkerStore, SearchStore): @defer.inlineCallbacks def store_room(self, room_id, room_creator_user_id, is_public): @@ -225,16 +345,6 @@ class RoomStore(SearchStore): ) self.hs.get_notifier().on_new_replication_data() - def get_public_room_ids(self): - return self._simple_select_onecol( - table="rooms", - keyvalues={ - "is_public": True, - }, - retcol="room_id", - desc="get_public_room_ids", - ) - def get_room_count(self): """Retrieve a list of all rooms """ @@ -326,113 +436,6 @@ class RoomStore(SearchStore): def get_current_public_room_stream_id(self): return self._public_room_id_gen.get_current_token() - @cached(num_args=2, max_entries=100) - def get_public_room_ids_at_stream_id(self, stream_id, network_tuple): - """Get pulbic rooms for a particular list, or across all lists. - - Args: - stream_id (int) - network_tuple (ThirdPartyInstanceID): The list to use (None, None) - means the main list, None means all lsits. - """ - return self.runInteraction( - "get_public_room_ids_at_stream_id", - self.get_public_room_ids_at_stream_id_txn, - stream_id, network_tuple=network_tuple - ) - - def get_public_room_ids_at_stream_id_txn(self, txn, stream_id, - network_tuple): - return { - rm - for rm, vis in self.get_published_at_stream_id_txn( - txn, stream_id, network_tuple=network_tuple - ).items() - if vis - } - - def get_published_at_stream_id_txn(self, txn, stream_id, network_tuple): - if network_tuple: - # We want to get from a particular list. No aggregation required. - - sql = (""" - SELECT room_id, visibility FROM public_room_list_stream - INNER JOIN ( - SELECT room_id, max(stream_id) AS stream_id - FROM public_room_list_stream - WHERE stream_id <= ? %s - GROUP BY room_id - ) grouped USING (room_id, stream_id) - """) - - if network_tuple.appservice_id is not None: - txn.execute( - sql % ("AND appservice_id = ? AND network_id = ?",), - (stream_id, network_tuple.appservice_id, network_tuple.network_id,) - ) - else: - txn.execute( - sql % ("AND appservice_id IS NULL",), - (stream_id,) - ) - return dict(txn) - else: - # We want to get from all lists, so we need to aggregate the results - - logger.info("Executing full list") - - sql = (""" - SELECT room_id, visibility - FROM public_room_list_stream - INNER JOIN ( - SELECT - room_id, max(stream_id) AS stream_id, appservice_id, - network_id - FROM public_room_list_stream - WHERE stream_id <= ? - GROUP BY room_id, appservice_id, network_id - ) grouped USING (room_id, stream_id) - """) - - txn.execute( - sql, - (stream_id,) - ) - - results = {} - # A room is visible if its visible on any list. - for room_id, visibility in txn: - results[room_id] = bool(visibility) or results.get(room_id, False) - - return results - - def get_public_room_changes(self, prev_stream_id, new_stream_id, - network_tuple): - def get_public_room_changes_txn(txn): - then_rooms = self.get_public_room_ids_at_stream_id_txn( - txn, prev_stream_id, network_tuple - ) - - now_rooms_dict = self.get_published_at_stream_id_txn( - txn, new_stream_id, network_tuple - ) - - now_rooms_visible = set( - rm for rm, vis in now_rooms_dict.items() if vis - ) - now_rooms_not_visible = set( - rm for rm, vis in now_rooms_dict.items() if not vis - ) - - newly_visible = now_rooms_visible - then_rooms - newly_unpublished = now_rooms_not_visible & then_rooms - - return newly_visible, newly_unpublished - - return self.runInteraction( - "get_public_room_changes", get_public_room_changes_txn - ) - def get_all_new_public_rooms(self, prev_id, current_id, limit): def get_all_new_public_rooms(txn): sql = (""" From a9a2d66cdd0abc2339641808698e63cb06c4a038 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 14:16:02 +0000 Subject: [PATCH 254/348] Split out SignatureStore and EventFederationStore --- synapse/replication/slave/storage/events.py | 50 +--- synapse/storage/event_federation.py | 264 ++++++++++---------- synapse/storage/signatures.py | 8 +- 3 files changed, 148 insertions(+), 174 deletions(-) diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index de0b26f437..d2495ff994 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -17,13 +17,13 @@ import logging from synapse.api.constants import EventTypes from synapse.storage import DataStore -from synapse.storage.event_federation import EventFederationStore +from synapse.storage.event_federation import EventFederationWorkerStore from synapse.storage.event_push_actions import EventPushActionsWorkerStore from synapse.storage.events_worker import EventsWorkerStore from synapse.storage.roommember import RoomMemberWorkerStore from synapse.storage.state import StateGroupWorkerStore from synapse.storage.stream import StreamStore -from synapse.storage.signatures import SignatureStore +from synapse.storage.signatures import SignatureWorkerStore from synapse.util.caches.stream_change_cache import StreamChangeCache from ._base import BaseSlavedStore from ._slaved_id_tracker import SlavedIdTracker @@ -40,8 +40,12 @@ logger = logging.getLogger(__name__) # the method descriptor on the DataStore and chuck them into our class. -class SlavedEventStore(RoomMemberWorkerStore, EventPushActionsWorkerStore, - EventsWorkerStore, StateGroupWorkerStore, +class SlavedEventStore(EventFederationWorkerStore, + RoomMemberWorkerStore, + EventPushActionsWorkerStore, + EventsWorkerStore, + StateGroupWorkerStore, + SignatureWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): @@ -72,9 +76,6 @@ class SlavedEventStore(RoomMemberWorkerStore, EventPushActionsWorkerStore, # Cached functions can't be accessed through a class instance so we need # to reach inside the __dict__ to extract them. - get_latest_event_ids_in_room = EventFederationStore.__dict__[ - "get_latest_event_ids_in_room" - ] get_recent_event_ids_for_room = ( StreamStore.__dict__["get_recent_event_ids_for_room"] @@ -100,48 +101,13 @@ class SlavedEventStore(RoomMemberWorkerStore, EventPushActionsWorkerStore, _get_events_around_txn = DataStore._get_events_around_txn.__func__ - get_backfill_events = DataStore.get_backfill_events.__func__ - _get_backfill_events = DataStore._get_backfill_events.__func__ - get_missing_events = DataStore.get_missing_events.__func__ - _get_missing_events = DataStore._get_missing_events.__func__ - - get_auth_chain = DataStore.get_auth_chain.__func__ - get_auth_chain_ids = DataStore.get_auth_chain_ids.__func__ - _get_auth_chain_ids_txn = DataStore._get_auth_chain_ids_txn.__func__ - get_room_max_stream_ordering = DataStore.get_room_max_stream_ordering.__func__ - get_forward_extremeties_for_room = ( - DataStore.get_forward_extremeties_for_room.__func__ - ) - _get_forward_extremeties_for_room = ( - EventFederationStore.__dict__["_get_forward_extremeties_for_room"] - ) - get_all_new_events_stream = DataStore.get_all_new_events_stream.__func__ get_federation_out_pos = DataStore.get_federation_out_pos.__func__ update_federation_out_pos = DataStore.update_federation_out_pos.__func__ - get_latest_event_ids_and_hashes_in_room = ( - DataStore.get_latest_event_ids_and_hashes_in_room.__func__ - ) - _get_latest_event_ids_and_hashes_in_room = ( - DataStore._get_latest_event_ids_and_hashes_in_room.__func__ - ) - _get_event_reference_hashes_txn = ( - DataStore._get_event_reference_hashes_txn.__func__ - ) - add_event_hashes = ( - DataStore.add_event_hashes.__func__ - ) - get_event_reference_hashes = ( - SignatureStore.__dict__["get_event_reference_hashes"] - ) - get_event_reference_hash = ( - SignatureStore.__dict__["get_event_reference_hash"] - ) - def stream_positions(self): result = super(SlavedEventStore, self).stream_positions() result["events"] = self._stream_id_gen.get_current_token() diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py index 55a05c59d5..00ee82d300 100644 --- a/synapse/storage/event_federation.py +++ b/synapse/storage/event_federation.py @@ -15,7 +15,10 @@ from twisted.internet import defer -from ._base import SQLBaseStore +from synapse.storage._base import SQLBaseStore +from synapse.storage.events import EventsWorkerStore +from synapse.storage.signatures import SignatureWorkerStore + from synapse.api.errors import StoreError from synapse.util.caches.descriptors import cached from unpaddedbase64 import encode_base64 @@ -27,30 +30,8 @@ from Queue import PriorityQueue, Empty logger = logging.getLogger(__name__) -class EventFederationStore(SQLBaseStore): - """ Responsible for storing and serving up the various graphs associated - with an event. Including the main event graph and the auth chains for an - event. - - Also has methods for getting the front (latest) and back (oldest) edges - of the event graphs. These are used to generate the parents for new events - and backfilling from another server respectively. - """ - - EVENT_AUTH_STATE_ONLY = "event_auth_state_only" - - def __init__(self, db_conn, hs): - super(EventFederationStore, self).__init__(db_conn, hs) - - self.register_background_update_handler( - self.EVENT_AUTH_STATE_ONLY, - self._background_delete_non_state_event_auth, - ) - - hs.get_clock().looping_call( - self._delete_old_forward_extrem_cache, 60 * 60 * 1000 - ) - +class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, + SQLBaseStore): def get_auth_chain(self, event_ids, include_given=False): """Get auth events for given event_ids. The events *must* be state events. @@ -228,88 +209,6 @@ class EventFederationStore(SQLBaseStore): return int(min_depth) if min_depth is not None else None - def _update_min_depth_for_room_txn(self, txn, room_id, depth): - min_depth = self._get_min_depth_interaction(txn, room_id) - - if min_depth and depth >= min_depth: - return - - self._simple_upsert_txn( - txn, - table="room_depth", - keyvalues={ - "room_id": room_id, - }, - values={ - "min_depth": depth, - }, - ) - - def _handle_mult_prev_events(self, txn, events): - """ - For the given event, update the event edges table and forward and - backward extremities tables. - """ - self._simple_insert_many_txn( - txn, - table="event_edges", - values=[ - { - "event_id": ev.event_id, - "prev_event_id": e_id, - "room_id": ev.room_id, - "is_state": False, - } - for ev in events - for e_id, _ in ev.prev_events - ], - ) - - self._update_backward_extremeties(txn, events) - - def _update_backward_extremeties(self, txn, events): - """Updates the event_backward_extremities tables based on the new/updated - events being persisted. - - This is called for new events *and* for events that were outliers, but - are now being persisted as non-outliers. - - Forward extremities are handled when we first start persisting the events. - """ - events_by_room = {} - for ev in events: - events_by_room.setdefault(ev.room_id, []).append(ev) - - query = ( - "INSERT INTO event_backward_extremities (event_id, room_id)" - " SELECT ?, ? WHERE NOT EXISTS (" - " SELECT 1 FROM event_backward_extremities" - " WHERE event_id = ? AND room_id = ?" - " )" - " AND NOT EXISTS (" - " SELECT 1 FROM events WHERE event_id = ? AND room_id = ? " - " AND outlier = ?" - " )" - ) - - txn.executemany(query, [ - (e_id, ev.room_id, e_id, ev.room_id, e_id, ev.room_id, False) - for ev in events for e_id, _ in ev.prev_events - if not ev.internal_metadata.is_outlier() - ]) - - query = ( - "DELETE FROM event_backward_extremities" - " WHERE event_id = ? AND room_id = ?" - ) - txn.executemany( - query, - [ - (ev.event_id, ev.room_id) for ev in events - if not ev.internal_metadata.is_outlier() - ] - ) - def get_forward_extremeties_for_room(self, room_id, stream_ordering): """For a given room_id and stream_ordering, return the forward extremeties of the room at that point in "time". @@ -371,28 +270,6 @@ class EventFederationStore(SQLBaseStore): get_forward_extremeties_for_room_txn ) - def _delete_old_forward_extrem_cache(self): - def _delete_old_forward_extrem_cache_txn(txn): - # Delete entries older than a month, while making sure we don't delete - # the only entries for a room. - sql = (""" - DELETE FROM stream_ordering_to_exterm - WHERE - room_id IN ( - SELECT room_id - FROM stream_ordering_to_exterm - WHERE stream_ordering > ? - ) AND stream_ordering < ? - """) - txn.execute( - sql, - (self.stream_ordering_month_ago, self.stream_ordering_month_ago,) - ) - return self.runInteraction( - "_delete_old_forward_extrem_cache", - _delete_old_forward_extrem_cache_txn - ) - def get_backfill_events(self, room_id, event_list, limit): """Get a list of Events for a given topic that occurred before (and including) the events in event_list. Return a list of max size `limit` @@ -522,6 +399,135 @@ class EventFederationStore(SQLBaseStore): return event_results + +class EventFederationStore(EventFederationWorkerStore): + """ Responsible for storing and serving up the various graphs associated + with an event. Including the main event graph and the auth chains for an + event. + + Also has methods for getting the front (latest) and back (oldest) edges + of the event graphs. These are used to generate the parents for new events + and backfilling from another server respectively. + """ + + EVENT_AUTH_STATE_ONLY = "event_auth_state_only" + + def __init__(self, db_conn, hs): + super(EventFederationStore, self).__init__(db_conn, hs) + + self.register_background_update_handler( + self.EVENT_AUTH_STATE_ONLY, + self._background_delete_non_state_event_auth, + ) + + hs.get_clock().looping_call( + self._delete_old_forward_extrem_cache, 60 * 60 * 1000 + ) + + def _update_min_depth_for_room_txn(self, txn, room_id, depth): + min_depth = self._get_min_depth_interaction(txn, room_id) + + if min_depth and depth >= min_depth: + return + + self._simple_upsert_txn( + txn, + table="room_depth", + keyvalues={ + "room_id": room_id, + }, + values={ + "min_depth": depth, + }, + ) + + def _handle_mult_prev_events(self, txn, events): + """ + For the given event, update the event edges table and forward and + backward extremities tables. + """ + self._simple_insert_many_txn( + txn, + table="event_edges", + values=[ + { + "event_id": ev.event_id, + "prev_event_id": e_id, + "room_id": ev.room_id, + "is_state": False, + } + for ev in events + for e_id, _ in ev.prev_events + ], + ) + + self._update_backward_extremeties(txn, events) + + def _update_backward_extremeties(self, txn, events): + """Updates the event_backward_extremities tables based on the new/updated + events being persisted. + + This is called for new events *and* for events that were outliers, but + are now being persisted as non-outliers. + + Forward extremities are handled when we first start persisting the events. + """ + events_by_room = {} + for ev in events: + events_by_room.setdefault(ev.room_id, []).append(ev) + + query = ( + "INSERT INTO event_backward_extremities (event_id, room_id)" + " SELECT ?, ? WHERE NOT EXISTS (" + " SELECT 1 FROM event_backward_extremities" + " WHERE event_id = ? AND room_id = ?" + " )" + " AND NOT EXISTS (" + " SELECT 1 FROM events WHERE event_id = ? AND room_id = ? " + " AND outlier = ?" + " )" + ) + + txn.executemany(query, [ + (e_id, ev.room_id, e_id, ev.room_id, e_id, ev.room_id, False) + for ev in events for e_id, _ in ev.prev_events + if not ev.internal_metadata.is_outlier() + ]) + + query = ( + "DELETE FROM event_backward_extremities" + " WHERE event_id = ? AND room_id = ?" + ) + txn.executemany( + query, + [ + (ev.event_id, ev.room_id) for ev in events + if not ev.internal_metadata.is_outlier() + ] + ) + + def _delete_old_forward_extrem_cache(self): + def _delete_old_forward_extrem_cache_txn(txn): + # Delete entries older than a month, while making sure we don't delete + # the only entries for a room. + sql = (""" + DELETE FROM stream_ordering_to_exterm + WHERE + room_id IN ( + SELECT room_id + FROM stream_ordering_to_exterm + WHERE stream_ordering > ? + ) AND stream_ordering < ? + """) + txn.execute( + sql, + (self.stream_ordering_month_ago, self.stream_ordering_month_ago,) + ) + return self.runInteraction( + "_delete_old_forward_extrem_cache", + _delete_old_forward_extrem_cache_txn + ) + def clean_room_for_join(self, room_id): return self.runInteraction( "clean_room_for_join", diff --git a/synapse/storage/signatures.py b/synapse/storage/signatures.py index 67d5d9969a..e6eeb1b641 100644 --- a/synapse/storage/signatures.py +++ b/synapse/storage/signatures.py @@ -22,9 +22,7 @@ from synapse.crypto.event_signing import compute_event_reference_hash from synapse.util.caches.descriptors import cached, cachedList -class SignatureStore(SQLBaseStore): - """Persistence for event signatures and hashes""" - +class SignatureWorkerStore(SQLBaseStore): @cached() def get_event_reference_hash(self, event_id): return self._get_event_reference_hashes_txn(event_id) @@ -74,6 +72,10 @@ class SignatureStore(SQLBaseStore): txn.execute(query, (event_id, )) return {k: v for k, v in txn} + +class SignatureStore(SignatureWorkerStore): + """Persistence for event signatures and hashes""" + def _store_event_reference_hashes_txn(self, txn, events): """Store a hash for a PDU Args: From 6411f725bedbc4701e9c624ae23f47d52ff0bd7c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 14:05:41 +0000 Subject: [PATCH 255/348] Calculate stream_ordering_month_ago correctly on workers --- synapse/replication/slave/storage/events.py | 1 - synapse/storage/__init__.py | 15 -- synapse/storage/event_push_actions.py | 149 +++++++++++--------- 3 files changed, 85 insertions(+), 80 deletions(-) diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index de0b26f437..a4d7430f9b 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -67,7 +67,6 @@ class SlavedEventStore(RoomMemberWorkerStore, EventPushActionsWorkerStore, "MembershipStreamChangeCache", events_max, ) - self.stream_ordering_month_ago = 0 self._stream_order_on_start = self.get_room_max_stream_ordering() # Cached functions can't be accessed through a class instance so we need diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 0f136f8a06..b3cdcfdc21 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -20,7 +20,6 @@ from synapse.storage.devices import DeviceStore from .appservice import ( ApplicationServiceStore, ApplicationServiceTransactionStore ) -from ._base import LoggingTransaction from .directory import DirectoryStore from .events import EventsStore from .presence import PresenceStore, UserPresenceState @@ -228,20 +227,6 @@ class DataStore(RoomMemberStore, RoomStore, prefilled_cache=_group_updates_prefill, ) - cur = LoggingTransaction( - db_conn.cursor(), - name="_find_stream_orderings_for_times_txn", - database_engine=self.database_engine, - after_callbacks=[], - final_callbacks=[], - ) - self._find_stream_orderings_for_times_txn(cur) - cur.close() - - self.find_stream_orderings_looping_call = self._clock.looping_call( - self._find_stream_orderings_for_times, 10 * 60 * 1000 - ) - self._stream_order_on_start = self.get_room_max_stream_ordering() self._min_stream_order_on_start = self.get_room_min_stream_ordering() diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index 6454045c2d..c08bebe112 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -14,7 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ._base import SQLBaseStore +from synapse.storage._base import SQLBaseStore, LoggingTransaction from twisted.internet import defer from synapse.util.async import sleep from synapse.util.caches.descriptors import cachedInlineCallbacks @@ -64,6 +64,27 @@ def _deserialize_action(actions, is_highlight): class EventPushActionsWorkerStore(SQLBaseStore): + def __init__(self, db_conn, hs): + super(EventPushActionsWorkerStore, self).__init__(db_conn, hs) + + # These get correctly ste by _find_stream_orderings_for_times_txn + self.stream_ordering_month_ago = 0 + self.stream_ordering_day_ago = 0 + + cur = LoggingTransaction( + db_conn.cursor(), + name="_find_stream_orderings_for_times_txn", + database_engine=self.database_engine, + after_callbacks=[], + final_callbacks=[], + ) + self._find_stream_orderings_for_times_txn(cur) + cur.close() + + self.find_stream_orderings_looping_call = self._clock.looping_call( + self._find_stream_orderings_for_times, 10 * 60 * 1000 + ) + @cachedInlineCallbacks(num_args=3, tree=True, max_entries=5000) def get_unread_event_push_actions_by_room_for_user( self, room_id, user_id, last_read_event_id @@ -443,6 +464,69 @@ class EventPushActionsWorkerStore(SQLBaseStore): desc="remove_push_actions_from_staging", ) + @defer.inlineCallbacks + def _find_stream_orderings_for_times(self): + yield self.runInteraction( + "_find_stream_orderings_for_times", + self._find_stream_orderings_for_times_txn + ) + + def _find_stream_orderings_for_times_txn(self, txn): + logger.info("Searching for stream ordering 1 month ago") + self.stream_ordering_month_ago = self._find_first_stream_ordering_after_ts_txn( + txn, self._clock.time_msec() - 30 * 24 * 60 * 60 * 1000 + ) + logger.info( + "Found stream ordering 1 month ago: it's %d", + self.stream_ordering_month_ago + ) + logger.info("Searching for stream ordering 1 day ago") + self.stream_ordering_day_ago = self._find_first_stream_ordering_after_ts_txn( + txn, self._clock.time_msec() - 24 * 60 * 60 * 1000 + ) + logger.info( + "Found stream ordering 1 day ago: it's %d", + self.stream_ordering_day_ago + ) + + def _find_first_stream_ordering_after_ts_txn(self, txn, ts): + """ + Find the stream_ordering of the first event that was received after + a given timestamp. This is relatively slow as there is no index on + received_ts but we can then use this to delete push actions before + this. + + received_ts must necessarily be in the same order as stream_ordering + and stream_ordering is indexed, so we manually binary search using + stream_ordering + """ + txn.execute("SELECT MAX(stream_ordering) FROM events") + max_stream_ordering = txn.fetchone()[0] + + if max_stream_ordering is None: + return 0 + + range_start = 0 + range_end = max_stream_ordering + + sql = ( + "SELECT received_ts FROM events" + " WHERE stream_ordering > ?" + " ORDER BY stream_ordering" + " LIMIT 1" + ) + + while range_end - range_start > 1: + middle = int((range_end + range_start) / 2) + txn.execute(sql, (middle,)) + middle_ts = txn.fetchone()[0] + if ts > middle_ts: + range_start = middle + else: + range_end = middle + + return range_end + class EventPushActionsStore(EventPushActionsWorkerStore): EPA_HIGHLIGHT_INDEX = "epa_highlight_index" @@ -650,69 +734,6 @@ class EventPushActionsStore(EventPushActionsWorkerStore): WHERE room_id = ? AND user_id = ? AND stream_ordering <= ? """, (room_id, user_id, stream_ordering)) - @defer.inlineCallbacks - def _find_stream_orderings_for_times(self): - yield self.runInteraction( - "_find_stream_orderings_for_times", - self._find_stream_orderings_for_times_txn - ) - - def _find_stream_orderings_for_times_txn(self, txn): - logger.info("Searching for stream ordering 1 month ago") - self.stream_ordering_month_ago = self._find_first_stream_ordering_after_ts_txn( - txn, self._clock.time_msec() - 30 * 24 * 60 * 60 * 1000 - ) - logger.info( - "Found stream ordering 1 month ago: it's %d", - self.stream_ordering_month_ago - ) - logger.info("Searching for stream ordering 1 day ago") - self.stream_ordering_day_ago = self._find_first_stream_ordering_after_ts_txn( - txn, self._clock.time_msec() - 24 * 60 * 60 * 1000 - ) - logger.info( - "Found stream ordering 1 day ago: it's %d", - self.stream_ordering_day_ago - ) - - def _find_first_stream_ordering_after_ts_txn(self, txn, ts): - """ - Find the stream_ordering of the first event that was received after - a given timestamp. This is relatively slow as there is no index on - received_ts but we can then use this to delete push actions before - this. - - received_ts must necessarily be in the same order as stream_ordering - and stream_ordering is indexed, so we manually binary search using - stream_ordering - """ - txn.execute("SELECT MAX(stream_ordering) FROM events") - max_stream_ordering = txn.fetchone()[0] - - if max_stream_ordering is None: - return 0 - - range_start = 0 - range_end = max_stream_ordering - - sql = ( - "SELECT received_ts FROM events" - " WHERE stream_ordering > ?" - " ORDER BY stream_ordering" - " LIMIT 1" - ) - - while range_end - range_start > 1: - middle = int((range_end + range_start) / 2) - txn.execute(sql, (middle,)) - middle_ts = txn.fetchone()[0] - if ts > middle_ts: - range_start = middle - else: - range_end = middle - - return range_end - @defer.inlineCallbacks def _rotate_notifs(self): if self._doing_notif_rotation or self.stream_ordering_day_ago is None: From 784f036306a020fcde495887c2881209b913b9b8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 10:54:37 +0000 Subject: [PATCH 256/348] Move RoomMemberHandler out of Handlers --- synapse/handlers/__init__.py | 2 - synapse/handlers/_base.py | 2 +- synapse/handlers/federation.py | 4 +- synapse/handlers/profile.py | 2 +- synapse/handlers/room.py | 4 +- synapse/handlers/room_member.py | 52 ++++++++++++++---------- synapse/rest/client/v1/admin.py | 7 ++-- synapse/rest/client/v1/room.py | 19 +++++---- synapse/rest/client/v2_alpha/register.py | 2 +- synapse/server.py | 5 +++ tests/rest/client/v1/test_typing.py | 2 +- 11 files changed, 57 insertions(+), 44 deletions(-) diff --git a/synapse/handlers/__init__.py b/synapse/handlers/__init__.py index 53213cdccf..8f8fd82eb0 100644 --- a/synapse/handlers/__init__.py +++ b/synapse/handlers/__init__.py @@ -17,7 +17,6 @@ from .register import RegistrationHandler from .room import ( RoomCreationHandler, RoomContextHandler, ) -from .room_member import RoomMemberHandler from .message import MessageHandler from .federation import FederationHandler from .directory import DirectoryHandler @@ -49,7 +48,6 @@ class Handlers(object): self.registration_handler = RegistrationHandler(hs) self.message_handler = MessageHandler(hs) self.room_creation_handler = RoomCreationHandler(hs) - self.room_member_handler = RoomMemberHandler(hs) self.federation_handler = FederationHandler(hs) self.directory_handler = DirectoryHandler(hs) self.admin_handler = AdminHandler(hs) diff --git a/synapse/handlers/_base.py b/synapse/handlers/_base.py index faa5609c0c..e089e66fde 100644 --- a/synapse/handlers/_base.py +++ b/synapse/handlers/_base.py @@ -158,7 +158,7 @@ class BaseHandler(object): # homeserver. requester = synapse.types.create_requester( target_user, is_guest=True) - handler = self.hs.get_handlers().room_member_handler + handler = self.hs.get_room_member_handler() yield handler.update_membership( requester, target_user, diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 8832ba58bc..520612683e 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -2153,7 +2153,7 @@ class FederationHandler(BaseHandler): raise e yield self._check_signature(event, context) - member_handler = self.hs.get_handlers().room_member_handler + member_handler = self.hs.get_room_member_handler() yield member_handler.send_membership_event(None, event, context) else: destinations = set(x.split(":", 1)[-1] for x in (sender_user_id, room_id)) @@ -2197,7 +2197,7 @@ class FederationHandler(BaseHandler): # TODO: Make sure the signatures actually are correct. event.signatures.update(returned_invite.signatures) - member_handler = self.hs.get_handlers().room_member_handler + member_handler = self.hs.get_room_member_handler() yield member_handler.send_membership_event(None, event, context) @defer.inlineCallbacks diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py index 9800e24453..c9c2879038 100644 --- a/synapse/handlers/profile.py +++ b/synapse/handlers/profile.py @@ -233,7 +233,7 @@ class ProfileHandler(BaseHandler): ) for room_id in room_ids: - handler = self.hs.get_handlers().room_member_handler + handler = self.hs.get_room_member_handler() try: # Assume the target_user isn't a guest, # because we don't let guests set profile or avatar data. diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 6ab020bf41..6c425828c1 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -165,7 +165,7 @@ class RoomCreationHandler(BaseHandler): creation_content = config.get("creation_content", {}) - room_member_handler = self.hs.get_handlers().room_member_handler + room_member_handler = self.hs.get_room_member_handler() yield self._send_events_for_new_room( requester, @@ -224,7 +224,7 @@ class RoomCreationHandler(BaseHandler): id_server = invite_3pid["id_server"] address = invite_3pid["address"] medium = invite_3pid["medium"] - yield self.hs.get_handlers().room_member_handler.do_3pid_invite( + yield self.hs.get_room_member_handler().do_3pid_invite( room_id, requester.user, medium, diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 37dc5e99ab..0329432f5c 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -30,24 +30,33 @@ from synapse.api.errors import AuthError, SynapseError, Codes from synapse.types import UserID, RoomID from synapse.util.async import Linearizer from synapse.util.distributor import user_left_room, user_joined_room -from ._base import BaseHandler logger = logging.getLogger(__name__) id_server_scheme = "https://" -class RoomMemberHandler(BaseHandler): +class RoomMemberHandler(object): # TODO(paul): This handler currently contains a messy conflation of # low-level API that works on UserID objects and so on, and REST-level # API that takes ID strings and returns pagination chunks. These concerns # ought to be separated out a lot better. def __init__(self, hs): - super(RoomMemberHandler, self).__init__(hs) + self.store = hs.get_datastore() + self.auth = hs.get_auth() + self.state_handler = hs.get_state_handler() + self.config = hs.config + self.is_mine = hs.is_mine + self.is_mine_id = hs.is_mine_id + self.simple_http_client = hs.get_simple_http_client() + self.federation_handler = hs.get_handlers().federation_handler + self.directory_handler = hs.get_handlers().directory_handler + self.registration_handler = hs.get_handlers().registration_handler self.profile_handler = hs.get_profile_handler() self.event_creation_hander = hs.get_event_creation_handler() + self.replication_layer = hs.get_replication_layer() self.member_linearizer = Linearizer(name="member") @@ -138,7 +147,7 @@ class RoomMemberHandler(BaseHandler): # join dance for now, since we're kinda implicitly checking # that we are allowed to join when we decide whether or not we # need to do the invite/join dance. - yield self.hs.get_handlers().federation_handler.do_invite_join( + yield self.federation_handler.do_invite_join( remote_room_hosts, room_id, user.to_string(), @@ -204,8 +213,7 @@ class RoomMemberHandler(BaseHandler): # if this is a join with a 3pid signature, we may need to turn a 3pid # invite into a normal invite before we can handle the join. if third_party_signed is not None: - replication = self.hs.get_replication_layer() - yield replication.exchange_third_party_invite( + yield self.replication_layer.exchange_third_party_invite( third_party_signed["sender"], target.to_string(), room_id, @@ -226,7 +234,7 @@ class RoomMemberHandler(BaseHandler): requester.user, ) if not is_requester_admin: - if self.hs.config.block_non_admin_invites: + if self.config.block_non_admin_invites: logger.info( "Blocking invite: user is not admin and non-admin " "invites disabled" @@ -286,7 +294,7 @@ class RoomMemberHandler(BaseHandler): if not is_host_in_room: inviter = yield self.get_inviter(target.to_string(), room_id) - if inviter and not self.hs.is_mine(inviter): + if inviter and not self.is_mine(inviter): remote_room_hosts.append(inviter.domain) content["membership"] = Membership.JOIN @@ -311,7 +319,7 @@ class RoomMemberHandler(BaseHandler): if not inviter: raise SynapseError(404, "Not a known room") - if self.hs.is_mine(inviter): + if self.is_mine(inviter): # the inviter was on our server, but has now left. Carry on # with the normal rejection codepath. # @@ -321,7 +329,7 @@ class RoomMemberHandler(BaseHandler): else: # send the rejection to the inviter's HS. remote_room_hosts = remote_room_hosts + [inviter.domain] - fed_handler = self.hs.get_handlers().federation_handler + fed_handler = self.federation_handler try: ret = yield fed_handler.do_remotely_reject_invite( remote_room_hosts, @@ -393,7 +401,7 @@ class RoomMemberHandler(BaseHandler): "Sender (%s) must be same as requester (%s)" % (sender, requester.user) ) - assert self.hs.is_mine(sender), "Sender must be our own: %s" % (sender,) + assert self.is_mine(sender), "Sender must be our own: %s" % (sender,) else: requester = synapse.types.create_requester(target_user) @@ -477,7 +485,7 @@ class RoomMemberHandler(BaseHandler): Raises: SynapseError if room alias could not be found. """ - directory_handler = self.hs.get_handlers().directory_handler + directory_handler = self.directory_handler mapping = yield directory_handler.get_association(room_alias) if not mapping: @@ -508,7 +516,7 @@ class RoomMemberHandler(BaseHandler): requester, txn_id ): - if self.hs.config.block_non_admin_invites: + if self.config.block_non_admin_invites: is_requester_admin = yield self.auth.is_server_admin( requester.user, ) @@ -555,7 +563,7 @@ class RoomMemberHandler(BaseHandler): str: the matrix ID of the 3pid, or None if it is not recognized. """ try: - data = yield self.hs.get_simple_http_client().get_json( + data = yield self.simple_http_client.get_json( "%s%s/_matrix/identity/api/v1/lookup" % (id_server_scheme, id_server,), { "medium": medium, @@ -578,7 +586,7 @@ class RoomMemberHandler(BaseHandler): if server_hostname not in data["signatures"]: raise AuthError(401, "No signature from server %s" % (server_hostname,)) for key_name, signature in data["signatures"][server_hostname].items(): - key_data = yield self.hs.get_simple_http_client().get_json( + key_data = yield self.simple_http_client.get_json( "%s%s/_matrix/identity/api/v1/pubkey/%s" % (id_server_scheme, server_hostname, key_name,), ) @@ -603,7 +611,7 @@ class RoomMemberHandler(BaseHandler): user, txn_id ): - room_state = yield self.hs.get_state_handler().get_current_state(room_id) + room_state = yield self.state_handler.get_current_state(room_id) inviter_display_name = "" inviter_avatar_url = "" @@ -727,15 +735,15 @@ class RoomMemberHandler(BaseHandler): "sender_avatar_url": inviter_avatar_url, } - if self.hs.config.invite_3pid_guest: - registration_handler = self.hs.get_handlers().registration_handler + if self.config.invite_3pid_guest: + registration_handler = self.registration_handler guest_access_token = yield registration_handler.guest_access_token_for( medium=medium, address=address, inviter_user_id=inviter_user_id, ) - guest_user_info = yield self.hs.get_auth().get_user_by_access_token( + guest_user_info = yield self.auth.get_user_by_access_token( guest_access_token ) @@ -744,7 +752,7 @@ class RoomMemberHandler(BaseHandler): "guest_user_id": guest_user_info["user"].to_string(), }) - data = yield self.hs.get_simple_http_client().post_urlencoded_get_json( + data = yield self.simple_http_client.post_urlencoded_get_json( is_url, invite_config ) @@ -793,10 +801,10 @@ class RoomMemberHandler(BaseHandler): # first member event? create_event_id = current_state_ids.get(("m.room.create", "")) if len(current_state_ids) == 1 and create_event_id: - defer.returnValue(self.hs.is_mine_id(create_event_id)) + defer.returnValue(self.is_mine_id(create_event_id)) for etype, state_key in current_state_ids: - if etype != EventTypes.Member or not self.hs.is_mine_id(state_key): + if etype != EventTypes.Member or not self.is_mine_id(state_key): continue event_id = current_state_ids[(etype, state_key)] diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index 6073cc6fa2..3917eee42d 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -180,6 +180,7 @@ class ShutdownRoomRestServlet(ClientV1RestServlet): self.handlers = hs.get_handlers() self.state = hs.get_state_handler() self.event_creation_handler = hs.get_event_creation_handler() + self.room_member_handler = hs.get_room_member_handler() @defer.inlineCallbacks def on_POST(self, request, room_id): @@ -238,7 +239,7 @@ class ShutdownRoomRestServlet(ClientV1RestServlet): logger.info("Kicking %r from %r...", user_id, room_id) target_requester = create_requester(user_id) - yield self.handlers.room_member_handler.update_membership( + yield self.room_member_handler.update_membership( requester=target_requester, target=target_requester.user, room_id=room_id, @@ -247,9 +248,9 @@ class ShutdownRoomRestServlet(ClientV1RestServlet): ratelimit=False ) - yield self.handlers.room_member_handler.forget(target_requester.user, room_id) + yield self.room_member_handler.forget(target_requester.user, room_id) - yield self.handlers.room_member_handler.update_membership( + yield self.room_member_handler.update_membership( requester=target_requester, target=target_requester.user, room_id=new_room_id, diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 817fd47842..9d745174c7 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -84,6 +84,7 @@ class RoomStateEventRestServlet(ClientV1RestServlet): super(RoomStateEventRestServlet, self).__init__(hs) self.handlers = hs.get_handlers() self.event_creation_hander = hs.get_event_creation_handler() + self.room_member_handler = hs.get_room_member_handler() def register(self, http_server): # /room/$roomid/state/$eventtype @@ -156,7 +157,7 @@ class RoomStateEventRestServlet(ClientV1RestServlet): if event_type == EventTypes.Member: membership = content.get("membership", None) - event = yield self.handlers.room_member_handler.update_membership( + event = yield self.room_member_handler.update_membership( requester, target=UserID.from_string(state_key), room_id=room_id, @@ -229,7 +230,7 @@ class RoomSendEventRestServlet(ClientV1RestServlet): class JoinRoomAliasServlet(ClientV1RestServlet): def __init__(self, hs): super(JoinRoomAliasServlet, self).__init__(hs) - self.handlers = hs.get_handlers() + self.room_member_handler = hs.get_room_member_handler() def register(self, http_server): # /join/$room_identifier[/$txn_id] @@ -257,7 +258,7 @@ class JoinRoomAliasServlet(ClientV1RestServlet): except Exception: remote_room_hosts = None elif RoomAlias.is_valid(room_identifier): - handler = self.handlers.room_member_handler + handler = self.room_member_handler room_alias = RoomAlias.from_string(room_identifier) room_id, remote_room_hosts = yield handler.lookup_room_alias(room_alias) room_id = room_id.to_string() @@ -266,7 +267,7 @@ class JoinRoomAliasServlet(ClientV1RestServlet): room_identifier, )) - yield self.handlers.room_member_handler.update_membership( + yield self.room_member_handler.update_membership( requester=requester, target=requester.user, room_id=room_id, @@ -562,7 +563,7 @@ class RoomEventContextServlet(ClientV1RestServlet): class RoomForgetRestServlet(ClientV1RestServlet): def __init__(self, hs): super(RoomForgetRestServlet, self).__init__(hs) - self.handlers = hs.get_handlers() + self.room_member_handler = hs.get_room_member_handler() def register(self, http_server): PATTERNS = ("/rooms/(?P[^/]*)/forget") @@ -575,7 +576,7 @@ class RoomForgetRestServlet(ClientV1RestServlet): allow_guest=False, ) - yield self.handlers.room_member_handler.forget( + yield self.room_member_handler.forget( user=requester.user, room_id=room_id, ) @@ -593,7 +594,7 @@ class RoomMembershipRestServlet(ClientV1RestServlet): def __init__(self, hs): super(RoomMembershipRestServlet, self).__init__(hs) - self.handlers = hs.get_handlers() + self.room_member_handler = hs.get_room_member_handler() def register(self, http_server): # /rooms/$roomid/[invite|join|leave] @@ -622,7 +623,7 @@ class RoomMembershipRestServlet(ClientV1RestServlet): content = {} if membership_action == "invite" and self._has_3pid_invite_keys(content): - yield self.handlers.room_member_handler.do_3pid_invite( + yield self.room_member_handler.do_3pid_invite( room_id, requester.user, content["medium"], @@ -644,7 +645,7 @@ class RoomMembershipRestServlet(ClientV1RestServlet): if 'reason' in content and membership_action in ['kick', 'ban']: event_content = {'reason': content['reason']} - yield self.handlers.room_member_handler.update_membership( + yield self.room_member_handler.update_membership( requester=requester, target=target, room_id=room_id, diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index c6f4680a76..0ba62bddc1 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -183,7 +183,7 @@ class RegisterRestServlet(RestServlet): self.auth_handler = hs.get_auth_handler() self.registration_handler = hs.get_handlers().registration_handler self.identity_handler = hs.get_handlers().identity_handler - self.room_member_handler = hs.get_handlers().room_member_handler + self.room_member_handler = hs.get_room_member_handler() self.device_handler = hs.get_device_handler() self.macaroon_gen = hs.get_macaroon_generator() diff --git a/synapse/server.py b/synapse/server.py index fbd602d40e..5b6effbe31 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -45,6 +45,7 @@ from synapse.handlers.device import DeviceHandler from synapse.handlers.e2e_keys import E2eKeysHandler from synapse.handlers.presence import PresenceHandler from synapse.handlers.room_list import RoomListHandler +from synapse.handlers.room_member import RoomMemberHandler from synapse.handlers.set_password import SetPasswordHandler from synapse.handlers.sync import SyncHandler from synapse.handlers.typing import TypingHandler @@ -145,6 +146,7 @@ class HomeServer(object): 'groups_attestation_signing', 'groups_attestation_renewer', 'spam_checker', + 'room_member_handler', ] def __init__(self, hostname, **kwargs): @@ -382,6 +384,9 @@ class HomeServer(object): def build_spam_checker(self): return SpamChecker(self) + def build_room_member_handler(self): + return RoomMemberHandler(self) + def remove_pusher(self, app_id, push_key, user_id): return self.get_pusherpool().remove_pusher(app_id, push_key, user_id) diff --git a/tests/rest/client/v1/test_typing.py b/tests/rest/client/v1/test_typing.py index a269e6f56e..e46534cd35 100644 --- a/tests/rest/client/v1/test_typing.py +++ b/tests/rest/client/v1/test_typing.py @@ -95,7 +95,7 @@ class RoomTypingTestCase(RestTestCase): else: if remotedomains is not None: remotedomains.add(member.domain) - hs.get_handlers().room_member_handler.fetch_room_distributions_into = ( + hs.get_room_member_handler().fetch_room_distributions_into = ( fetch_room_distributions_into ) From f793bc38770caf81dc34b9033d7dd2c9bfc0d79b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 13:56:03 +0000 Subject: [PATCH 257/348] Split out stream store --- synapse/replication/slave/storage/events.py | 54 +-- synapse/storage/__init__.py | 8 - synapse/storage/stream.py | 350 +++++++++++--------- 3 files changed, 202 insertions(+), 210 deletions(-) diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index de0b26f437..517a9f0ec6 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -22,9 +22,8 @@ from synapse.storage.event_push_actions import EventPushActionsWorkerStore from synapse.storage.events_worker import EventsWorkerStore from synapse.storage.roommember import RoomMemberWorkerStore from synapse.storage.state import StateGroupWorkerStore -from synapse.storage.stream import StreamStore +from synapse.storage.stream import StreamWorkerStore from synapse.storage.signatures import SignatureStore -from synapse.util.caches.stream_change_cache import StreamChangeCache from ._base import BaseSlavedStore from ._slaved_id_tracker import SlavedIdTracker @@ -41,34 +40,20 @@ logger = logging.getLogger(__name__) class SlavedEventStore(RoomMemberWorkerStore, EventPushActionsWorkerStore, - EventsWorkerStore, StateGroupWorkerStore, + StreamWorkerStore, EventsWorkerStore, StateGroupWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): - super(SlavedEventStore, self).__init__(db_conn, hs) self._stream_id_gen = SlavedIdTracker( db_conn, "events", "stream_ordering", ) self._backfill_id_gen = SlavedIdTracker( db_conn, "events", "stream_ordering", step=-1 ) - events_max = self._stream_id_gen.get_current_token() - event_cache_prefill, min_event_val = self._get_cache_dict( - db_conn, "events", - entity_column="room_id", - stream_column="stream_ordering", - max_value=events_max, - ) - self._events_stream_cache = StreamChangeCache( - "EventsRoomStreamChangeCache", min_event_val, - prefilled_cache=event_cache_prefill, - ) - self._membership_stream_cache = StreamChangeCache( - "MembershipStreamChangeCache", events_max, - ) + + super(SlavedEventStore, self).__init__(db_conn, hs) self.stream_ordering_month_ago = 0 - self._stream_order_on_start = self.get_room_max_stream_ordering() # Cached functions can't be accessed through a class instance so we need # to reach inside the __dict__ to extract them. @@ -76,30 +61,6 @@ class SlavedEventStore(RoomMemberWorkerStore, EventPushActionsWorkerStore, "get_latest_event_ids_in_room" ] - get_recent_event_ids_for_room = ( - StreamStore.__dict__["get_recent_event_ids_for_room"] - ) - has_room_changed_since = DataStore.has_room_changed_since.__func__ - - get_membership_changes_for_user = ( - DataStore.get_membership_changes_for_user.__func__ - ) - get_room_events_max_id = DataStore.get_room_events_max_id.__func__ - get_room_events_stream_for_room = ( - DataStore.get_room_events_stream_for_room.__func__ - ) - get_events_around = DataStore.get_events_around.__func__ - - get_recent_events_for_room = DataStore.get_recent_events_for_room.__func__ - get_room_events_stream_for_rooms = ( - DataStore.get_room_events_stream_for_rooms.__func__ - ) - get_stream_token_for_event = DataStore.get_stream_token_for_event.__func__ - - _set_before_and_after = staticmethod(DataStore._set_before_and_after) - - _get_events_around_txn = DataStore._get_events_around_txn.__func__ - get_backfill_events = DataStore.get_backfill_events.__func__ _get_backfill_events = DataStore._get_backfill_events.__func__ get_missing_events = DataStore.get_missing_events.__func__ @@ -120,8 +81,11 @@ class SlavedEventStore(RoomMemberWorkerStore, EventPushActionsWorkerStore, get_all_new_events_stream = DataStore.get_all_new_events_stream.__func__ - get_federation_out_pos = DataStore.get_federation_out_pos.__func__ - update_federation_out_pos = DataStore.update_federation_out_pos.__func__ + def get_room_max_stream_ordering(self): + return self._stream_id_gen.get_current_token() + + def get_room_min_stream_ordering(self): + return self._backfill_id_gen.get_current_token() get_latest_event_ids_and_hashes_in_room = ( DataStore.get_latest_event_ids_and_hashes_in_room.__func__ diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 0f136f8a06..0ce76d7a8c 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -148,14 +148,6 @@ class DataStore(RoomMemberStore, RoomStore, stream_column="stream_ordering", max_value=events_max, ) - self._events_stream_cache = StreamChangeCache( - "EventsRoomStreamChangeCache", min_event_val, - prefilled_cache=event_cache_prefill, - ) - - self._membership_stream_cache = StreamChangeCache( - "MembershipStreamChangeCache", events_max, - ) self._presence_on_startup = self._get_active_presence(db_conn) diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index 52bdce5be2..057f30db33 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -35,13 +35,17 @@ what sort order was used: from twisted.internet import defer -from ._base import SQLBaseStore +from synapse.storage._base import SQLBaseStore +from synapse.storage.events import EventsWorkerStore + from synapse.util.caches.descriptors import cached from synapse.api.constants import EventTypes from synapse.types import RoomStreamToken +from synapse.util.caches.stream_change_cache import StreamChangeCache from synapse.util.logcontext import make_deferred_yieldable, preserve_fn from synapse.storage.engines import PostgresEngine, Sqlite3Engine +import abc import logging @@ -143,81 +147,28 @@ def filter_to_clause(event_filter): return " AND ".join(clauses), args -class StreamStore(SQLBaseStore): - @defer.inlineCallbacks - def get_appservice_room_stream(self, service, from_key, to_key, limit=0): - # NB this lives here instead of appservice.py so we can reuse the - # 'private' StreamToken class in this file. - if limit: - limit = max(limit, MAX_STREAM_SIZE) - else: - limit = MAX_STREAM_SIZE +class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): + __metaclass__ = abc.ABCMeta - # From and to keys should be integers from ordering. - from_id = RoomStreamToken.parse_stream_token(from_key) - to_id = RoomStreamToken.parse_stream_token(to_key) + def __init__(self, db_conn, hs): + super(StreamWorkerStore, self).__init__(db_conn, hs) - if from_key == to_key: - defer.returnValue(([], to_key)) - return - - # select all the events between from/to with a sensible limit - sql = ( - "SELECT e.event_id, e.room_id, e.type, s.state_key, " - "e.stream_ordering FROM events AS e " - "LEFT JOIN state_events as s ON " - "e.event_id = s.event_id " - "WHERE e.stream_ordering > ? AND e.stream_ordering <= ? " - "ORDER BY stream_ordering ASC LIMIT %(limit)d " - ) % { - "limit": limit - } - - def f(txn): - # pull out all the events between the tokens - txn.execute(sql, (from_id.stream, to_id.stream,)) - rows = self.cursor_to_dict(txn) - - # Logic: - # - We want ALL events which match the AS room_id regex - # - We want ALL events which match the rooms represented by the AS - # room_alias regex - # - We want ALL events for rooms that AS users have joined. - # This is currently supported via get_app_service_rooms (which is - # used for the Notifier listener rooms). We can't reasonably make a - # SQL query for these room IDs, so we'll pull all the events between - # from/to and filter in python. - rooms_for_as = self._get_app_service_rooms_txn(txn, service) - room_ids_for_as = [r.room_id for r in rooms_for_as] - - def app_service_interested(row): - if row["room_id"] in room_ids_for_as: - return True - - if row["type"] == EventTypes.Member: - if service.is_interested_in_user(row.get("state_key")): - return True - return False - - return [r for r in rows if app_service_interested(r)] - - rows = yield self.runInteraction("get_appservice_room_stream", f) - - ret = yield self._get_events( - [r["event_id"] for r in rows], - get_prev_content=True + events_max = self.get_room_max_stream_ordering() + event_cache_prefill, min_event_val = self._get_cache_dict( + db_conn, "events", + entity_column="room_id", + stream_column="stream_ordering", + max_value=events_max, + ) + self._events_stream_cache = StreamChangeCache( + "EventsRoomStreamChangeCache", min_event_val, + prefilled_cache=event_cache_prefill, + ) + self._membership_stream_cache = StreamChangeCache( + "MembershipStreamChangeCache", events_max, ) - self._set_before_and_after(ret, rows, topo_order=from_id is None) - - if rows: - key = "s%d" % max(r["stream_ordering"] for r in rows) - else: - # Assume we didn't get anything because there was nothing to - # get. - key = to_key - - defer.returnValue((ret, key)) + self._stream_order_on_start = self.get_room_max_stream_ordering() @defer.inlineCallbacks def get_room_events_stream_for_rooms(self, room_ids, from_key, to_key, limit=0, @@ -380,88 +331,6 @@ class StreamStore(SQLBaseStore): defer.returnValue(ret) - @defer.inlineCallbacks - def paginate_room_events(self, room_id, from_key, to_key=None, - direction='b', limit=-1, event_filter=None): - # Tokens really represent positions between elements, but we use - # the convention of pointing to the event before the gap. Hence - # we have a bit of asymmetry when it comes to equalities. - args = [False, room_id] - if direction == 'b': - order = "DESC" - bounds = upper_bound( - RoomStreamToken.parse(from_key), self.database_engine - ) - if to_key: - bounds = "%s AND %s" % (bounds, lower_bound( - RoomStreamToken.parse(to_key), self.database_engine - )) - else: - order = "ASC" - bounds = lower_bound( - RoomStreamToken.parse(from_key), self.database_engine - ) - if to_key: - bounds = "%s AND %s" % (bounds, upper_bound( - RoomStreamToken.parse(to_key), self.database_engine - )) - - filter_clause, filter_args = filter_to_clause(event_filter) - - if filter_clause: - bounds += " AND " + filter_clause - args.extend(filter_args) - - if int(limit) > 0: - args.append(int(limit)) - limit_str = " LIMIT ?" - else: - limit_str = "" - - sql = ( - "SELECT * FROM events" - " WHERE outlier = ? AND room_id = ? AND %(bounds)s" - " ORDER BY topological_ordering %(order)s," - " stream_ordering %(order)s %(limit)s" - ) % { - "bounds": bounds, - "order": order, - "limit": limit_str - } - - def f(txn): - txn.execute(sql, args) - - rows = self.cursor_to_dict(txn) - - if rows: - topo = rows[-1]["topological_ordering"] - toke = rows[-1]["stream_ordering"] - if direction == 'b': - # Tokens are positions between events. - # This token points *after* the last event in the chunk. - # We need it to point to the event before it in the chunk - # when we are going backwards so we subtract one from the - # stream part. - toke -= 1 - next_token = str(RoomStreamToken(topo, toke)) - else: - # TODO (erikj): We should work out what to do here instead. - next_token = to_key if to_key else from_key - - return rows, next_token, - - rows, token = yield self.runInteraction("paginate_room_events", f) - - events = yield self._get_events( - [r["event_id"] for r in rows], - get_prev_content=True - ) - - self._set_before_and_after(events, rows) - - defer.returnValue((events, token)) - @defer.inlineCallbacks def get_recent_events_for_room(self, room_id, limit, end_token, from_token=None): rows, token = yield self.get_recent_event_ids_for_room( @@ -542,7 +411,7 @@ class StreamStore(SQLBaseStore): `room_id` causes it to return the current room specific topological token. """ - token = yield self._stream_id_gen.get_current_token() + token = yield self.get_room_max_stream_ordering() if room_id is None: defer.returnValue("s%d" % (token,)) else: @@ -552,11 +421,13 @@ class StreamStore(SQLBaseStore): ) defer.returnValue("t%d-%d" % (topo, token)) + @abc.abstractmethod def get_room_max_stream_ordering(self): - return self._stream_id_gen.get_current_token() + raise NotImplementedError() + @abc.abstractmethod def get_room_min_stream_ordering(self): - return self._backfill_id_gen.get_current_token() + raise NotImplementedError() def get_stream_token_for_event(self, event_id): """The stream token for an event @@ -832,3 +703,168 @@ class StreamStore(SQLBaseStore): def has_room_changed_since(self, room_id, stream_id): return self._events_stream_cache.has_entity_changed(room_id, stream_id) + + +class StreamStore(StreamWorkerStore): + def get_room_max_stream_ordering(self): + return self._stream_id_gen.get_current_token() + + def get_room_min_stream_ordering(self): + return self._backfill_id_gen.get_current_token() + + @defer.inlineCallbacks + def get_appservice_room_stream(self, service, from_key, to_key, limit=0): + # NB this lives here instead of appservice.py so we can reuse the + # 'private' StreamToken class in this file. + if limit: + limit = max(limit, MAX_STREAM_SIZE) + else: + limit = MAX_STREAM_SIZE + + # From and to keys should be integers from ordering. + from_id = RoomStreamToken.parse_stream_token(from_key) + to_id = RoomStreamToken.parse_stream_token(to_key) + + if from_key == to_key: + defer.returnValue(([], to_key)) + return + + # select all the events between from/to with a sensible limit + sql = ( + "SELECT e.event_id, e.room_id, e.type, s.state_key, " + "e.stream_ordering FROM events AS e " + "LEFT JOIN state_events as s ON " + "e.event_id = s.event_id " + "WHERE e.stream_ordering > ? AND e.stream_ordering <= ? " + "ORDER BY stream_ordering ASC LIMIT %(limit)d " + ) % { + "limit": limit + } + + def f(txn): + # pull out all the events between the tokens + txn.execute(sql, (from_id.stream, to_id.stream,)) + rows = self.cursor_to_dict(txn) + + # Logic: + # - We want ALL events which match the AS room_id regex + # - We want ALL events which match the rooms represented by the AS + # room_alias regex + # - We want ALL events for rooms that AS users have joined. + # This is currently supported via get_app_service_rooms (which is + # used for the Notifier listener rooms). We can't reasonably make a + # SQL query for these room IDs, so we'll pull all the events between + # from/to and filter in python. + rooms_for_as = self._get_app_service_rooms_txn(txn, service) + room_ids_for_as = [r.room_id for r in rooms_for_as] + + def app_service_interested(row): + if row["room_id"] in room_ids_for_as: + return True + + if row["type"] == EventTypes.Member: + if service.is_interested_in_user(row.get("state_key")): + return True + return False + + return [r for r in rows if app_service_interested(r)] + + rows = yield self.runInteraction("get_appservice_room_stream", f) + + ret = yield self._get_events( + [r["event_id"] for r in rows], + get_prev_content=True + ) + + self._set_before_and_after(ret, rows, topo_order=from_id is None) + + if rows: + key = "s%d" % max(r["stream_ordering"] for r in rows) + else: + # Assume we didn't get anything because there was nothing to + # get. + key = to_key + + defer.returnValue((ret, key)) + + @defer.inlineCallbacks + def paginate_room_events(self, room_id, from_key, to_key=None, + direction='b', limit=-1, event_filter=None): + # Tokens really represent positions between elements, but we use + # the convention of pointing to the event before the gap. Hence + # we have a bit of asymmetry when it comes to equalities. + args = [False, room_id] + if direction == 'b': + order = "DESC" + bounds = upper_bound( + RoomStreamToken.parse(from_key), self.database_engine + ) + if to_key: + bounds = "%s AND %s" % (bounds, lower_bound( + RoomStreamToken.parse(to_key), self.database_engine + )) + else: + order = "ASC" + bounds = lower_bound( + RoomStreamToken.parse(from_key), self.database_engine + ) + if to_key: + bounds = "%s AND %s" % (bounds, upper_bound( + RoomStreamToken.parse(to_key), self.database_engine + )) + + filter_clause, filter_args = filter_to_clause(event_filter) + + if filter_clause: + bounds += " AND " + filter_clause + args.extend(filter_args) + + if int(limit) > 0: + args.append(int(limit)) + limit_str = " LIMIT ?" + else: + limit_str = "" + + sql = ( + "SELECT * FROM events" + " WHERE outlier = ? AND room_id = ? AND %(bounds)s" + " ORDER BY topological_ordering %(order)s," + " stream_ordering %(order)s %(limit)s" + ) % { + "bounds": bounds, + "order": order, + "limit": limit_str + } + + def f(txn): + txn.execute(sql, args) + + rows = self.cursor_to_dict(txn) + + if rows: + topo = rows[-1]["topological_ordering"] + toke = rows[-1]["stream_ordering"] + if direction == 'b': + # Tokens are positions between events. + # This token points *after* the last event in the chunk. + # We need it to point to the event before it in the chunk + # when we are going backwards so we subtract one from the + # stream part. + toke -= 1 + next_token = str(RoomStreamToken(topo, toke)) + else: + # TODO (erikj): We should work out what to do here instead. + next_token = to_key if to_key else from_key + + return rows, next_token, + + rows, token = yield self.runInteraction("paginate_room_events", f) + + events = yield self._get_events( + [r["event_id"] for r in rows], + get_prev_content=True + ) + + self._set_before_and_after(events, rows) + + defer.returnValue((events, token)) From 22004b524e5264afb0e883bee486f669ea58833c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 15:59:40 +0000 Subject: [PATCH 258/348] Fix comment typo --- synapse/storage/event_push_actions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index c08bebe112..848d8bd728 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -67,7 +67,7 @@ class EventPushActionsWorkerStore(SQLBaseStore): def __init__(self, db_conn, hs): super(EventPushActionsWorkerStore, self).__init__(db_conn, hs) - # These get correctly ste by _find_stream_orderings_for_times_txn + # These get correctly set by _find_stream_orderings_for_times_txn self.stream_ordering_month_ago = 0 self.stream_ordering_day_ago = 0 From 872ff95ed49c9cb30ab5f256c5ff539430e658db Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 16:00:05 +0000 Subject: [PATCH 259/348] Default stream_ordering_*_ago to None --- synapse/storage/event_push_actions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index 848d8bd728..7164293568 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -68,8 +68,8 @@ class EventPushActionsWorkerStore(SQLBaseStore): super(EventPushActionsWorkerStore, self).__init__(db_conn, hs) # These get correctly set by _find_stream_orderings_for_times_txn - self.stream_ordering_month_ago = 0 - self.stream_ordering_day_ago = 0 + self.stream_ordering_month_ago = None + self.stream_ordering_day_ago = None cur = LoggingTransaction( db_conn.cursor(), From 1b2af116502e6ebcd2ae24178754145e59ee7c24 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 16:20:57 +0000 Subject: [PATCH 260/348] Document abstract class and method better --- synapse/storage/stream.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index 057f30db33..a2527d2a36 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -148,6 +148,11 @@ def filter_to_clause(event_filter): class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): + """This is an abstract base class where subclasses must implement + `get_room_max_stream_ordering` and `get_room_min_stream_ordering` + which can be called in the initializer. + """ + __metaclass__ = abc.ABCMeta def __init__(self, db_conn, hs): @@ -170,6 +175,14 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): self._stream_order_on_start = self.get_room_max_stream_ordering() + @abc.abstractmethod + def get_room_max_stream_ordering(self): + raise NotImplementedError() + + @abc.abstractmethod + def get_room_min_stream_ordering(self): + raise NotImplementedError() + @defer.inlineCallbacks def get_room_events_stream_for_rooms(self, room_ids, from_key, to_key, limit=0, order='DESC'): @@ -421,14 +434,6 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): ) defer.returnValue("t%d-%d" % (topo, token)) - @abc.abstractmethod - def get_room_max_stream_ordering(self): - raise NotImplementedError() - - @abc.abstractmethod - def get_room_min_stream_ordering(self): - raise NotImplementedError() - def get_stream_token_for_event(self, event_id): """The stream token for an event Args: From 884b26ae4150f19bd1e020c3eed934e978518a09 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 16:23:48 +0000 Subject: [PATCH 261/348] Remove unused variables --- synapse/storage/__init__.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 0ce76d7a8c..22c156c15b 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -141,14 +141,6 @@ class DataStore(RoomMemberStore, RoomStore, else: self._cache_id_gen = None - events_max = self._stream_id_gen.get_current_token() - event_cache_prefill, min_event_val = self._get_cache_dict( - db_conn, "events", - entity_column="room_id", - stream_column="stream_ordering", - max_value=events_max, - ) - self._presence_on_startup = self._get_active_presence(db_conn) presence_cache_prefill, min_presence_val = self._get_cache_dict( @@ -196,6 +188,7 @@ class DataStore(RoomMemberStore, RoomStore, "DeviceListFederationStreamChangeCache", device_list_max, ) + events_max = self._stream_id_gen.get_current_token() curr_state_delta_prefill, min_curr_state_delta_id = self._get_cache_dict( db_conn, "current_state_delta_stream", entity_column="room_id", From 7c371834ccbdf33f7070981bf23cbf11d1c1c333 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 16:40:27 +0000 Subject: [PATCH 262/348] Stub out broken function only used for cache --- synapse/storage/signatures.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/synapse/storage/signatures.py b/synapse/storage/signatures.py index e6eeb1b641..9e6eaaa532 100644 --- a/synapse/storage/signatures.py +++ b/synapse/storage/signatures.py @@ -25,7 +25,9 @@ from synapse.util.caches.descriptors import cached, cachedList class SignatureWorkerStore(SQLBaseStore): @cached() def get_event_reference_hash(self, event_id): - return self._get_event_reference_hashes_txn(event_id) + # This is a dummy function to allow get_event_reference_hashes + # to use its cache + raise NotImplementedError() @cachedList(cached_method_name="get_event_reference_hash", list_name="event_ids", num_args=1) From 529c026ac149705d0c9948fed71e3f0ca069b759 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 16:49:12 +0000 Subject: [PATCH 263/348] Move back to hs.is_mine --- synapse/handlers/room_member.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 0329432f5c..7ecdf87246 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -43,12 +43,11 @@ class RoomMemberHandler(object): # ought to be separated out a lot better. def __init__(self, hs): + self.hs = hs self.store = hs.get_datastore() self.auth = hs.get_auth() self.state_handler = hs.get_state_handler() self.config = hs.config - self.is_mine = hs.is_mine - self.is_mine_id = hs.is_mine_id self.simple_http_client = hs.get_simple_http_client() self.federation_handler = hs.get_handlers().federation_handler @@ -294,7 +293,7 @@ class RoomMemberHandler(object): if not is_host_in_room: inviter = yield self.get_inviter(target.to_string(), room_id) - if inviter and not self.is_mine(inviter): + if inviter and not self.hs.is_mine(inviter): remote_room_hosts.append(inviter.domain) content["membership"] = Membership.JOIN @@ -319,7 +318,7 @@ class RoomMemberHandler(object): if not inviter: raise SynapseError(404, "Not a known room") - if self.is_mine(inviter): + if self.hs.is_mine(inviter): # the inviter was on our server, but has now left. Carry on # with the normal rejection codepath. # @@ -401,7 +400,7 @@ class RoomMemberHandler(object): "Sender (%s) must be same as requester (%s)" % (sender, requester.user) ) - assert self.is_mine(sender), "Sender must be our own: %s" % (sender,) + assert self.hs.is_mine(sender), "Sender must be our own: %s" % (sender,) else: requester = synapse.types.create_requester(target_user) @@ -801,10 +800,10 @@ class RoomMemberHandler(object): # first member event? create_event_id = current_state_ids.get(("m.room.create", "")) if len(current_state_ids) == 1 and create_event_id: - defer.returnValue(self.is_mine_id(create_event_id)) + defer.returnValue(self.hs.is_mine_id(create_event_id)) for etype, state_key in current_state_ids: - if etype != EventTypes.Member or not self.is_mine_id(state_key): + if etype != EventTypes.Member or not self.hs.is_mine_id(state_key): continue event_id = current_state_ids[(etype, state_key)] From 33bebb63f34aa947a9a48920589cd63d27f1235c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 15:30:57 +0000 Subject: [PATCH 264/348] Add some caches to help read marker API --- synapse/replication/slave/storage/account_data.py | 1 + synapse/storage/account_data.py | 2 ++ synapse/storage/events.py | 4 ++-- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/synapse/replication/slave/storage/account_data.py b/synapse/replication/slave/storage/account_data.py index 6c8d2954d7..970ac4c24d 100644 --- a/synapse/replication/slave/storage/account_data.py +++ b/synapse/replication/slave/storage/account_data.py @@ -56,6 +56,7 @@ class SlavedAccountDataStore(TagsWorkerStore, AccountDataWorkerStore, BaseSlaved (row.data_type, row.user_id,) ) self.get_account_data_for_user.invalidate((row.user_id,)) + self.get_account_data_for_room.invalidate((row.user_id, row.room_id,)) self._account_data_stream_cache.entity_has_changed( row.user_id, token ) diff --git a/synapse/storage/account_data.py b/synapse/storage/account_data.py index 466194e96f..c062e03d13 100644 --- a/synapse/storage/account_data.py +++ b/synapse/storage/account_data.py @@ -133,6 +133,7 @@ class AccountDataWorkerStore(SQLBaseStore): for row in rows }) + @cached(num_args=2) def get_account_data_for_room(self, user_id, room_id): """Get all the client account_data for a user for a room. @@ -310,6 +311,7 @@ class AccountDataStore(AccountDataWorkerStore): self._account_data_stream_cache.entity_has_changed(user_id, next_id) self.get_account_data_for_user.invalidate((user_id,)) + self.get_account_data_for_room.invalidate((user_id, room_id,)) result = self._account_data_id_gen.get_current_token() defer.returnValue(result) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index b63392a6cd..057b1be4d5 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -28,7 +28,7 @@ from synapse.util.logutils import log_function from synapse.util.metrics import Measure from synapse.api.constants import EventTypes from synapse.api.errors import SynapseError -from synapse.util.caches.descriptors import cached +from synapse.util.caches.descriptors import cached, cachedInlineCallbacks from synapse.types import get_domain_from_id from canonicaljson import encode_canonical_json @@ -2033,7 +2033,7 @@ class EventsStore(EventsWorkerStore): to_2, so_2 = yield self._get_event_ordering(event_id2) defer.returnValue((to_1, so_1) > (to_2, so_2)) - @defer.inlineCallbacks + @cachedInlineCallbacks(max_entries=5000) def _get_event_ordering(self, event_id): res = yield self._simple_select_one( table="events", From a83c514d1f8e1573246e0235c97ba140cbff12db Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 15:53:04 +0000 Subject: [PATCH 265/348] Improve caching for read_marker API We add a new storage function to get a paritcular type of room account data. This allows us to prefill the cache when updating that acount data. --- synapse/handlers/read_marker.py | 6 ++-- .../replication/slave/storage/account_data.py | 3 ++ synapse/storage/account_data.py | 35 +++++++++++++++++++ 3 files changed, 41 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/read_marker.py b/synapse/handlers/read_marker.py index b5b0303d54..5142ae153d 100644 --- a/synapse/handlers/read_marker.py +++ b/synapse/handlers/read_marker.py @@ -41,9 +41,9 @@ class ReadMarkerHandler(BaseHandler): """ with (yield self.read_marker_linearizer.queue((room_id, user_id))): - account_data = yield self.store.get_account_data_for_room(user_id, room_id) - - existing_read_marker = account_data.get("m.fully_read", None) + existing_read_marker = yield self.store.get_account_data_for_room_and_type( + user_id, room_id, "m.fully_read", + ) should_update = True diff --git a/synapse/replication/slave/storage/account_data.py b/synapse/replication/slave/storage/account_data.py index 970ac4c24d..355d14ff7c 100644 --- a/synapse/replication/slave/storage/account_data.py +++ b/synapse/replication/slave/storage/account_data.py @@ -57,6 +57,9 @@ class SlavedAccountDataStore(TagsWorkerStore, AccountDataWorkerStore, BaseSlaved ) self.get_account_data_for_user.invalidate((row.user_id,)) self.get_account_data_for_room.invalidate((row.user_id, row.room_id,)) + self.get_account_data_for_room_and_type.invalidate( + (row.user_id, row.room_id, row.account_data_type,), + ) self._account_data_stream_cache.entity_has_changed( row.user_id, token ) diff --git a/synapse/storage/account_data.py b/synapse/storage/account_data.py index c062e03d13..077b4faa5f 100644 --- a/synapse/storage/account_data.py +++ b/synapse/storage/account_data.py @@ -157,6 +157,38 @@ class AccountDataWorkerStore(SQLBaseStore): "get_account_data_for_room", get_account_data_for_room_txn ) + @cached(num_args=3, max_entries=5000) + def get_account_data_for_room_and_type(self, user_id, room_id, account_data_type): + """Get all the client account_data for a user for a room. + + Args: + user_id(str): The user to get the account_data for. + room_id(str): The room to get the account_data for. + account_data_type (str): The account data type to get. + Returns: + A deferred dict of the room account_data for that type, or None if + there isn't any set. + """ + def get_account_data_for_room_and_type_txn(txn): + content_json = self._simple_select_one_onecol_txn( + txn, + table="room_account_data", + keyvalues={ + "user_id": user_id, + "room_id": room_id, + "account_data_type": account_data_type, + }, + retcol="content", + allow_none=True + ) + + return json.loads(content_json) if content_json else None + + return self.runInteraction( + "get_account_data_for_room_and_type", + get_account_data_for_room_and_type_txn, + ) + def get_all_updated_account_data(self, last_global_id, last_room_id, current_id, limit): """Get all the client account_data that has changed on the server @@ -312,6 +344,9 @@ class AccountDataStore(AccountDataWorkerStore): self._account_data_stream_cache.entity_has_changed(user_id, next_id) self.get_account_data_for_user.invalidate((user_id,)) self.get_account_data_for_room.invalidate((user_id, room_id,)) + self.get_account_data_for_room_and_type.prefill( + (user_id, room_id, account_data_type,), content, + ) result = self._account_data_id_gen.get_current_token() defer.returnValue(result) From 4b44f05f1941478e2274b4894bfd025deedb9992 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 17:00:35 +0000 Subject: [PATCH 266/348] Fewer lies are better --- synapse/storage/account_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/account_data.py b/synapse/storage/account_data.py index 077b4faa5f..e70c9423e3 100644 --- a/synapse/storage/account_data.py +++ b/synapse/storage/account_data.py @@ -159,14 +159,14 @@ class AccountDataWorkerStore(SQLBaseStore): @cached(num_args=3, max_entries=5000) def get_account_data_for_room_and_type(self, user_id, room_id, account_data_type): - """Get all the client account_data for a user for a room. + """Get the client account_data of given type for a user for a room. Args: user_id(str): The user to get the account_data for. room_id(str): The room to get the account_data for. account_data_type (str): The account data type to get. Returns: - A deferred dict of the room account_data for that type, or None if + A deferred of the room account_data for that type, or None if there isn't any set. """ def get_account_data_for_room_and_type_txn(txn): From 65cf454fd1dda2def0920973e90de3ff305462d4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 17:10:40 +0000 Subject: [PATCH 267/348] Remove unused DataStore --- synapse/replication/slave/storage/events.py | 1 - 1 file changed, 1 deletion(-) diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index f0d70a6734..b1f64ef0d8 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -16,7 +16,6 @@ import logging from synapse.api.constants import EventTypes -from synapse.storage import DataStore from synapse.storage.event_federation import EventFederationWorkerStore from synapse.storage.event_push_actions import EventPushActionsWorkerStore from synapse.storage.events_worker import EventsWorkerStore From 89b7232ff8ac7a7d3dad74b437d06678a6adca5e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 17:49:37 +0000 Subject: [PATCH 268/348] Fix typo in getting replication account data processing --- synapse/replication/slave/storage/account_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/replication/slave/storage/account_data.py b/synapse/replication/slave/storage/account_data.py index 355d14ff7c..d9ba6d69b1 100644 --- a/synapse/replication/slave/storage/account_data.py +++ b/synapse/replication/slave/storage/account_data.py @@ -58,7 +58,7 @@ class SlavedAccountDataStore(TagsWorkerStore, AccountDataWorkerStore, BaseSlaved self.get_account_data_for_user.invalidate((row.user_id,)) self.get_account_data_for_room.invalidate((row.user_id, row.room_id,)) self.get_account_data_for_room_and_type.invalidate( - (row.user_id, row.room_id, row.account_data_type,), + (row.user_id, row.room_id, row.data_type,), ) self._account_data_stream_cache.entity_has_changed( row.user_id, token From d960d23830cb8bffe49a1eafed21bf66c25e4235 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 2 Mar 2018 11:03:18 +0000 Subject: [PATCH 269/348] Add missing yield during 3pid signature checks --- synapse/handlers/room_member.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 7ecdf87246..ed3b97730d 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -573,7 +573,7 @@ class RoomMemberHandler(object): if "mxid" in data: if "signatures" not in data: raise AuthError(401, "No signatures on 3pid binding") - self.verify_any_signature(data, id_server) + yield self.verify_any_signature(data, id_server) defer.returnValue(data["mxid"]) except IOError as e: From fafa3e7114bdadd52f190892821ae61862ebfb2c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 18:19:34 +0000 Subject: [PATCH 270/348] Split registration store --- .../replication/slave/storage/registration.py | 18 +-- synapse/storage/registration.py | 118 +++++++++--------- 2 files changed, 64 insertions(+), 72 deletions(-) diff --git a/synapse/replication/slave/storage/registration.py b/synapse/replication/slave/storage/registration.py index e27c7332d2..7323bf0f1e 100644 --- a/synapse/replication/slave/storage/registration.py +++ b/synapse/replication/slave/storage/registration.py @@ -14,20 +14,8 @@ # limitations under the License. from ._base import BaseSlavedStore -from synapse.storage import DataStore -from synapse.storage.registration import RegistrationStore +from synapse.storage.registration import RegistrationWorkerStore -class SlavedRegistrationStore(BaseSlavedStore): - def __init__(self, db_conn, hs): - super(SlavedRegistrationStore, self).__init__(db_conn, hs) - - # TODO: use the cached version and invalidate deleted tokens - get_user_by_access_token = RegistrationStore.__dict__[ - "get_user_by_access_token" - ] - - _query_for_auth = DataStore._query_for_auth.__func__ - get_user_by_id = RegistrationStore.__dict__[ - "get_user_by_id" - ] +class SlavedRegistrationStore(RegistrationWorkerStore, BaseSlavedStore): + pass diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 95f75d6df1..d809b2ba46 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -19,10 +19,70 @@ from twisted.internet import defer from synapse.api.errors import StoreError, Codes from synapse.storage import background_updates +from synapse.storage._base import SQLBaseStore from synapse.util.caches.descriptors import cached, cachedInlineCallbacks -class RegistrationStore(background_updates.BackgroundUpdateStore): +class RegistrationWorkerStore(SQLBaseStore): + @cached() + def get_user_by_id(self, user_id): + return self._simple_select_one( + table="users", + keyvalues={ + "name": user_id, + }, + retcols=["name", "password_hash", "is_guest"], + allow_none=True, + desc="get_user_by_id", + ) + + @cached() + def get_user_by_access_token(self, token): + """Get a user from the given access token. + + Args: + token (str): The access token of a user. + Returns: + defer.Deferred: None, if the token did not match, otherwise dict + including the keys `name`, `is_guest`, `device_id`, `token_id`. + """ + return self.runInteraction( + "get_user_by_access_token", + self._query_for_auth, + token + ) + + @defer.inlineCallbacks + def is_server_admin(self, user): + res = yield self._simple_select_one_onecol( + table="users", + keyvalues={"name": user.to_string()}, + retcol="admin", + allow_none=True, + desc="is_server_admin", + ) + + defer.returnValue(res if res else False) + + def _query_for_auth(self, txn, token): + sql = ( + "SELECT users.name, users.is_guest, access_tokens.id as token_id," + " access_tokens.device_id" + " FROM users" + " INNER JOIN access_tokens on users.name = access_tokens.user_id" + " WHERE token = ?" + ) + + txn.execute(sql, (token,)) + rows = self.cursor_to_dict(txn) + if rows: + return rows[0] + + return None + + +class RegistrationStore(RegistrationWorkerStore, + background_updates.BackgroundUpdateStore): def __init__(self, db_conn, hs): super(RegistrationStore, self).__init__(db_conn, hs) @@ -187,18 +247,6 @@ class RegistrationStore(background_updates.BackgroundUpdateStore): ) txn.call_after(self.is_guest.invalidate, (user_id,)) - @cached() - def get_user_by_id(self, user_id): - return self._simple_select_one( - table="users", - keyvalues={ - "name": user_id, - }, - retcols=["name", "password_hash", "is_guest"], - allow_none=True, - desc="get_user_by_id", - ) - def get_users_by_id_case_insensitive(self, user_id): """Gets users that match user_id case insensitively. Returns a mapping of user_id -> password_hash. @@ -304,34 +352,6 @@ class RegistrationStore(background_updates.BackgroundUpdateStore): return self.runInteraction("delete_access_token", f) - @cached() - def get_user_by_access_token(self, token): - """Get a user from the given access token. - - Args: - token (str): The access token of a user. - Returns: - defer.Deferred: None, if the token did not match, otherwise dict - including the keys `name`, `is_guest`, `device_id`, `token_id`. - """ - return self.runInteraction( - "get_user_by_access_token", - self._query_for_auth, - token - ) - - @defer.inlineCallbacks - def is_server_admin(self, user): - res = yield self._simple_select_one_onecol( - table="users", - keyvalues={"name": user.to_string()}, - retcol="admin", - allow_none=True, - desc="is_server_admin", - ) - - defer.returnValue(res if res else False) - @cachedInlineCallbacks() def is_guest(self, user_id): res = yield self._simple_select_one_onecol( @@ -344,22 +364,6 @@ class RegistrationStore(background_updates.BackgroundUpdateStore): defer.returnValue(res if res else False) - def _query_for_auth(self, txn, token): - sql = ( - "SELECT users.name, users.is_guest, access_tokens.id as token_id," - " access_tokens.device_id" - " FROM users" - " INNER JOIN access_tokens on users.name = access_tokens.user_id" - " WHERE token = ?" - ) - - txn.execute(sql, (token,)) - rows = self.cursor_to_dict(txn) - if rows: - return rows[0] - - return None - @defer.inlineCallbacks def user_add_threepid(self, user_id, medium, address, validated_at, added_at): yield self._simple_upsert("user_threepids", { From efb79820b4924e13b2c7d1145cf891fd5d441c2a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 2 Mar 2018 14:43:29 +0000 Subject: [PATCH 271/348] Fix bug with delayed cache invalidation stream We poked the notifier before updated the current token for the cache invalidation stream. This mean that sometimes the update wouldn't be sent until the next time a cache was invalidated. --- synapse/storage/_base.py | 26 ++++++++++++++------------ synapse/storage/event_push_actions.py | 2 +- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 68125006eb..2fbebd4907 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -48,16 +48,16 @@ class LoggingTransaction(object): passed to the constructor. Adds logging and metrics to the .execute() method.""" __slots__ = [ - "txn", "name", "database_engine", "after_callbacks", "final_callbacks", + "txn", "name", "database_engine", "after_callbacks", "exception_callbacks", ] def __init__(self, txn, name, database_engine, after_callbacks, - final_callbacks): + exception_callbacks): object.__setattr__(self, "txn", txn) object.__setattr__(self, "name", name) object.__setattr__(self, "database_engine", database_engine) object.__setattr__(self, "after_callbacks", after_callbacks) - object.__setattr__(self, "final_callbacks", final_callbacks) + object.__setattr__(self, "exception_callbacks", exception_callbacks) def call_after(self, callback, *args, **kwargs): """Call the given callback on the main twisted thread after the @@ -66,8 +66,8 @@ class LoggingTransaction(object): """ self.after_callbacks.append((callback, args, kwargs)) - def call_finally(self, callback, *args, **kwargs): - self.final_callbacks.append((callback, args, kwargs)) + def call_on_exception(self, callback, *args, **kwargs): + self.exception_callbacks.append((callback, args, kwargs)) def __getattr__(self, name): return getattr(self.txn, name) @@ -215,7 +215,7 @@ class SQLBaseStore(object): self._clock.looping_call(loop, 10000) - def _new_transaction(self, conn, desc, after_callbacks, final_callbacks, + def _new_transaction(self, conn, desc, after_callbacks, exception_callbacks, logging_context, func, *args, **kwargs): start = time.time() * 1000 txn_id = self._TXN_ID @@ -236,7 +236,7 @@ class SQLBaseStore(object): txn = conn.cursor() txn = LoggingTransaction( txn, name, self.database_engine, after_callbacks, - final_callbacks, + exception_callbacks, ) r = func(txn, *args, **kwargs) conn.commit() @@ -308,11 +308,11 @@ class SQLBaseStore(object): current_context = LoggingContext.current_context() after_callbacks = [] - final_callbacks = [] + exception_callbacks = [] def inner_func(conn, *args, **kwargs): return self._new_transaction( - conn, desc, after_callbacks, final_callbacks, current_context, + conn, desc, after_callbacks, exception_callbacks, current_context, func, *args, **kwargs ) @@ -321,9 +321,10 @@ class SQLBaseStore(object): for after_callback, after_args, after_kwargs in after_callbacks: after_callback(*after_args, **after_kwargs) - finally: - for after_callback, after_args, after_kwargs in final_callbacks: + except: # noqa: E722, as we reraise the exception this is fine. + for after_callback, after_args, after_kwargs in exception_callbacks: after_callback(*after_args, **after_kwargs) + raise defer.returnValue(result) @@ -1000,7 +1001,8 @@ class SQLBaseStore(object): # __exit__ called after the transaction finishes. ctx = self._cache_id_gen.get_next() stream_id = ctx.__enter__() - txn.call_finally(ctx.__exit__, None, None, None) + txn.call_on_exception(ctx.__exit__, None, None, None) + txn.call_after(ctx.__exit__, None, None, None) txn.call_after(self.hs.get_notifier().on_new_replication_data) self._simple_insert_txn( diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index 7164293568..912e8db1d3 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -76,7 +76,7 @@ class EventPushActionsWorkerStore(SQLBaseStore): name="_find_stream_orderings_for_times_txn", database_engine=self.database_engine, after_callbacks=[], - final_callbacks=[], + exception_callbacks=[], ) self._find_stream_orderings_for_times_txn(cur) cur.close() From 06a14876e5d78891a5b3ae6c0f454faefd696e79 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 5 Mar 2018 11:53:39 +0000 Subject: [PATCH 272/348] Add find_first_stream_ordering_after_ts Expose this as a public function which can be called outside a txn --- synapse/storage/event_push_actions.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index 7164293568..54b54dcc6a 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -489,6 +489,27 @@ class EventPushActionsWorkerStore(SQLBaseStore): self.stream_ordering_day_ago ) + def find_first_stream_ordering_after_ts(self, ts): + """Gets the stream ordering corresponding to a given timestamp. + + Specifically, finds the stream_ordering of the first event that was + received after the timestamp. This is done by a binary search on the + events table, since there is no index on received_ts, so is + relatively slow. + + Args: + ts (int): timestamp in millis + + Returns: + Deferred[int]: stream ordering of the first event received after + the timestamp + """ + return self.runInteraction( + "_find_first_stream_ordering_after_ts_txn", + self._find_first_stream_ordering_after_ts_txn, + ts, + ) + def _find_first_stream_ordering_after_ts_txn(self, txn, ts): """ Find the stream_ordering of the first event that was received after From c818fcab1152439d79b03494816775b9ee3db613 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 5 Mar 2018 11:47:48 +0000 Subject: [PATCH 273/348] Test and fix find_first_stream_ordering_after_ts It seemed to suffer from a bunch of off-by-one errors. --- synapse/storage/event_push_actions.py | 66 ++++++++++++++++++----- tests/storage/test_event_push_actions.py | 67 ++++++++++++++++++++++++ 2 files changed, 119 insertions(+), 14 deletions(-) diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index 54b54dcc6a..a8c303e11e 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -493,15 +493,15 @@ class EventPushActionsWorkerStore(SQLBaseStore): """Gets the stream ordering corresponding to a given timestamp. Specifically, finds the stream_ordering of the first event that was - received after the timestamp. This is done by a binary search on the - events table, since there is no index on received_ts, so is + received on or after the timestamp. This is done by a binary search on + the events table, since there is no index on received_ts, so is relatively slow. Args: ts (int): timestamp in millis Returns: - Deferred[int]: stream ordering of the first event received after + Deferred[int]: stream ordering of the first event received on/after the timestamp """ return self.runInteraction( @@ -510,16 +510,24 @@ class EventPushActionsWorkerStore(SQLBaseStore): ts, ) - def _find_first_stream_ordering_after_ts_txn(self, txn, ts): + @staticmethod + def _find_first_stream_ordering_after_ts_txn(txn, ts): """ - Find the stream_ordering of the first event that was received after - a given timestamp. This is relatively slow as there is no index on - received_ts but we can then use this to delete push actions before + Find the stream_ordering of the first event that was received on or + after a given timestamp. This is relatively slow as there is no index + on received_ts but we can then use this to delete push actions before this. received_ts must necessarily be in the same order as stream_ordering and stream_ordering is indexed, so we manually binary search using stream_ordering + + Args: + txn (twisted.enterprise.adbapi.Transaction): + ts (int): timestamp to search for + + Returns: + int: stream ordering """ txn.execute("SELECT MAX(stream_ordering) FROM events") max_stream_ordering = txn.fetchone()[0] @@ -527,23 +535,53 @@ class EventPushActionsWorkerStore(SQLBaseStore): if max_stream_ordering is None: return 0 + # We want the first stream_ordering in which received_ts is greater + # than or equal to ts. Call this point X. + # + # We maintain the invariants: + # + # range_start <= X <= range_end + # range_start = 0 - range_end = max_stream_ordering + range_end = max_stream_ordering + 1 + # Given a stream_ordering, look up the timestamp at that + # stream_ordering. + # + # The array may be sparse (we may be missing some stream_orderings). + # We treat the gaps as the same as having the same value as the + # preceding entry, because we will pick the lowest stream_ordering + # which satisfies our requirement of received_ts >= ts. + # + # For example, if our array of events indexed by stream_ordering is + # [10, , 20], we should treat this as being equivalent to + # [10, 10, 20]. + # sql = ( "SELECT received_ts FROM events" - " WHERE stream_ordering > ?" - " ORDER BY stream_ordering" + " WHERE stream_ordering <= ?" + " ORDER BY stream_ordering DESC" " LIMIT 1" ) - while range_end - range_start > 1: - middle = int((range_end + range_start) / 2) + while range_end - range_start > 0: + middle = (range_end + range_start) // 2 txn.execute(sql, (middle,)) - middle_ts = txn.fetchone()[0] + row = txn.fetchone() + if row is None: + # no rows with stream_ordering<=middle + range_start = middle + 1 + continue + + middle_ts = row[0] if ts > middle_ts: - range_start = middle + # we got a timestamp lower than the one we were looking for. + # definitely need to look higher: X > middle. + range_start = middle + 1 else: + # we got a timestamp higher than (or the same as) the one we + # were looking for. We aren't yet sure about the point we + # looked up, but we can be sure that X <= middle. range_end = middle return range_end diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py index 6c1aad149b..dbaaa12e23 100644 --- a/tests/storage/test_event_push_actions.py +++ b/tests/storage/test_event_push_actions.py @@ -127,3 +127,70 @@ class EventPushActionsStoreTestCase(tests.unittest.TestCase): yield _assert_counts(1, 1) yield _rotate(10) yield _assert_counts(1, 1) + + @tests.unittest.DEBUG + @defer.inlineCallbacks + def test_find_first_stream_ordering_after_ts(self): + def add_event(so, ts): + return self.store._simple_insert("events", { + "stream_ordering": so, + "received_ts": ts, + "event_id": "event%i" % so, + "type": "", + "room_id": "", + "content": "", + "processed": True, + "outlier": False, + "topological_ordering": 0, + "depth": 0, + }) + + # start with the base case where there are no events in the table + r = yield self.store.find_first_stream_ordering_after_ts(11) + self.assertEqual(r, 0) + + # now with one event + yield add_event(2, 10) + r = yield self.store.find_first_stream_ordering_after_ts(9) + self.assertEqual(r, 2) + r = yield self.store.find_first_stream_ordering_after_ts(10) + self.assertEqual(r, 2) + r = yield self.store.find_first_stream_ordering_after_ts(11) + self.assertEqual(r, 3) + + # add a bunch of dummy events to the events table + for (stream_ordering, ts) in ( + (3, 110), + (4, 120), + (5, 120), + (10, 130), + (20, 140), + ): + yield add_event(stream_ordering, ts) + + r = yield self.store.find_first_stream_ordering_after_ts(110) + self.assertEqual(r, 3, + "First event after 110ms should be 3, was %i" % r) + + # 4 and 5 are both after 12: we want 4 rather than 5 + r = yield self.store.find_first_stream_ordering_after_ts(120) + self.assertEqual(r, 4, + "First event after 120ms should be 4, was %i" % r) + + r = yield self.store.find_first_stream_ordering_after_ts(129) + self.assertEqual(r, 10, + "First event after 129ms should be 10, was %i" % r) + + # check we can get the last event + r = yield self.store.find_first_stream_ordering_after_ts(140) + self.assertEqual(r, 20, + "First event after 14ms should be 20, was %i" % r) + + # off the end + r = yield self.store.find_first_stream_ordering_after_ts(160) + self.assertEqual(r, 21) + + # check we can find an event at ordering zero + yield add_event(0, 5) + r = yield self.store.find_first_stream_ordering_after_ts(1) + self.assertEqual(r, 0) From 2c911d75e820e321c3e9c885f74ffeabd666b60f Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 5 Mar 2018 12:24:49 +0000 Subject: [PATCH 274/348] Fix comment typo --- tests/storage/test_event_push_actions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py index dbaaa12e23..575374c6a6 100644 --- a/tests/storage/test_event_push_actions.py +++ b/tests/storage/test_event_push_actions.py @@ -172,7 +172,7 @@ class EventPushActionsStoreTestCase(tests.unittest.TestCase): self.assertEqual(r, 3, "First event after 110ms should be 3, was %i" % r) - # 4 and 5 are both after 12: we want 4 rather than 5 + # 4 and 5 are both after 120: we want 4 rather than 5 r = yield self.store.find_first_stream_ordering_after_ts(120) self.assertEqual(r, 4, "First event after 120ms should be 4, was %i" % r) From f8bfcd7e0d2fc6399eb654a41773cd603b4037fc Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 1 Mar 2018 23:20:54 +0000 Subject: [PATCH 275/348] Provide a means to pass a timestamp to purge_history --- docs/admin_api/purge_history_api.rst | 11 +++++- synapse/handlers/message.py | 14 +++---- synapse/rest/client/v1/admin.py | 58 ++++++++++++++++++++++++++-- synapse/storage/stream.py | 27 +++++++++++++ 4 files changed, 96 insertions(+), 14 deletions(-) diff --git a/docs/admin_api/purge_history_api.rst b/docs/admin_api/purge_history_api.rst index a3a17e9f9f..acf1bc5749 100644 --- a/docs/admin_api/purge_history_api.rst +++ b/docs/admin_api/purge_history_api.rst @@ -8,9 +8,9 @@ Depending on the amount of history being purged a call to the API may take several minutes or longer. During this period users will not be able to paginate further back in the room from the point being purged from. -The API is simply: +The API is: -``POST /_matrix/client/r0/admin/purge_history//`` +``POST /_matrix/client/r0/admin/purge_history/[/]`` including an ``access_token`` of a server admin. @@ -25,3 +25,10 @@ To delete local events as well, set ``delete_local_events`` in the body: { "delete_local_events": true } + +The caller must specify the point in the room to purge up to. This can be +specified by including an event_id in the URI, or by setting a +``purge_up_to_event_id`` or ``purge_up_to_ts`` in the request body. If an event +id is given, that event (and others at the same graph depth) will be retained. +If ``purge_up_to_ts`` is given, it should be a timestamp since the unix epoch, +in milliseconds. diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 7d28c2745c..dd00d8a86c 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -52,16 +52,12 @@ class MessageHandler(BaseHandler): self.pagination_lock = ReadWriteLock() @defer.inlineCallbacks - def purge_history(self, room_id, event_id, delete_local_events=False): - event = yield self.store.get_event(event_id) - - if event.room_id != room_id: - raise SynapseError(400, "Event is for wrong room.") - - depth = event.depth - + def purge_history(self, room_id, topological_ordering, + delete_local_events=False): with (yield self.pagination_lock.write(room_id)): - yield self.store.purge_history(room_id, depth, delete_local_events) + yield self.store.purge_history( + room_id, topological_ordering, delete_local_events, + ) @defer.inlineCallbacks def get_messages(self, requester, room_id=None, pagin_config=None, diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index 3917eee42d..dcf6215dad 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -17,7 +17,7 @@ from twisted.internet import defer from synapse.api.constants import Membership -from synapse.api.errors import AuthError, SynapseError +from synapse.api.errors import AuthError, SynapseError, Codes from synapse.types import UserID, create_requester from synapse.http.servlet import parse_json_object_from_request @@ -114,12 +114,18 @@ class PurgeMediaCacheRestServlet(ClientV1RestServlet): class PurgeHistoryRestServlet(ClientV1RestServlet): PATTERNS = client_path_patterns( - "/admin/purge_history/(?P[^/]*)/(?P[^/]*)" + "/admin/purge_history/(?P[^/]*)(/(?P[^/]+))?" ) def __init__(self, hs): + """ + + Args: + hs (synapse.server.HomeServer) + """ super(PurgeHistoryRestServlet, self).__init__(hs) self.handlers = hs.get_handlers() + self.store = hs.get_datastore() @defer.inlineCallbacks def on_POST(self, request, room_id, event_id): @@ -133,8 +139,54 @@ class PurgeHistoryRestServlet(ClientV1RestServlet): delete_local_events = bool(body.get("delete_local_events", False)) + # establish the topological ordering we should keep events from. The + # user can provide an event_id in the URL or the request body, or can + # provide a timestamp in the request body. + if event_id is None: + event_id = body.get('purge_up_to_event_id') + + if event_id is not None: + event = yield self.store.get_event(event_id) + + if event.room_id != room_id: + raise SynapseError(400, "Event is for wrong room.") + + depth = event.depth + logger.info( + "[purge] purging up to depth %i (event_id %s)", + depth, event_id, + ) + elif 'purge_up_to_ts' in body: + ts = body['purge_up_to_ts'] + if not isinstance(ts, int): + raise SynapseError( + 400, "purge_up_to_ts must be an int", + errcode=Codes.BAD_JSON, + ) + + stream_ordering = ( + yield self.store.find_first_stream_ordering_after_ts(ts) + ) + + (_, depth, _) = ( + yield self.store.get_room_event_after_stream_ordering( + room_id, stream_ordering, + ) + ) + logger.info( + "[purge] purging up to depth %i (received_ts %i => " + "stream_ordering %i)", + depth, ts, stream_ordering, + ) + else: + raise SynapseError( + 400, + "must specify purge_up_to_event_id or purge_up_to_ts", + errcode=Codes.BAD_JSON, + ) + yield self.handlers.message_handler.purge_history( - room_id, event_id, + room_id, depth, delete_local_events=delete_local_events, ) diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index a2527d2a36..515a04699a 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -416,6 +416,33 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): "get_recent_events_for_room", get_recent_events_for_room_txn ) + def get_room_event_after_stream_ordering(self, room_id, stream_ordering): + """Gets details of the first event in a room at or after a stream ordering + + Args: + room_id (str): + stream_ordering (int): + + Returns: + Deferred[(int, int, str)]: + (stream ordering, topological ordering, event_id) + """ + def _f(txn): + sql = ( + "SELECT stream_ordering, topological_ordering, event_id" + " FROM events" + " WHERE room_id = ? AND stream_ordering >= ?" + " AND NOT outlier" + " ORDER BY stream_ordering" + " LIMIT 1" + ) + txn.execute(sql, (room_id, stream_ordering, )) + return txn.fetchone() + + return self.runInteraction( + "get_room_event_after_stream_ordering", _f, + ) + @defer.inlineCallbacks def get_room_events_max_id(self, room_id=None): """Returns the current token for rooms stream. From 2e223163ff59fa7d6030654dd8c74e58f8aa1deb Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 5 Mar 2018 15:11:30 +0000 Subject: [PATCH 276/348] Split Directory store --- .../replication/slave/storage/directory.py | 8 ++-- synapse/storage/directory.py | 45 ++++++++++--------- 2 files changed, 26 insertions(+), 27 deletions(-) diff --git a/synapse/replication/slave/storage/directory.py b/synapse/replication/slave/storage/directory.py index 7301d885f2..6deecd3963 100644 --- a/synapse/replication/slave/storage/directory.py +++ b/synapse/replication/slave/storage/directory.py @@ -14,10 +14,8 @@ # limitations under the License. from ._base import BaseSlavedStore -from synapse.storage.directory import DirectoryStore +from synapse.storage.directory import DirectoryWorkerStore -class DirectoryStore(BaseSlavedStore): - get_aliases_for_room = DirectoryStore.__dict__[ - "get_aliases_for_room" - ] +class DirectoryStore(DirectoryWorkerStore, BaseSlavedStore): + pass diff --git a/synapse/storage/directory.py b/synapse/storage/directory.py index 79e7c540ad..041b0b457e 100644 --- a/synapse/storage/directory.py +++ b/synapse/storage/directory.py @@ -29,8 +29,7 @@ RoomAliasMapping = namedtuple( ) -class DirectoryStore(SQLBaseStore): - +class DirectoryWorkerStore(SQLBaseStore): @defer.inlineCallbacks def get_association_from_room_alias(self, room_alias): """ Get's the room_id and server list for a given room_alias @@ -69,6 +68,28 @@ class DirectoryStore(SQLBaseStore): RoomAliasMapping(room_id, room_alias.to_string(), servers) ) + def get_room_alias_creator(self, room_alias): + return self._simple_select_one_onecol( + table="room_aliases", + keyvalues={ + "room_alias": room_alias, + }, + retcol="creator", + desc="get_room_alias_creator", + allow_none=True + ) + + @cached(max_entries=5000) + def get_aliases_for_room(self, room_id): + return self._simple_select_onecol( + "room_aliases", + {"room_id": room_id}, + "room_alias", + desc="get_aliases_for_room", + ) + + +class DirectoryStore(DirectoryWorkerStore): @defer.inlineCallbacks def create_room_alias_association(self, room_alias, room_id, servers, creator=None): """ Creates an associatin between a room alias and room_id/servers @@ -116,17 +137,6 @@ class DirectoryStore(SQLBaseStore): ) defer.returnValue(ret) - def get_room_alias_creator(self, room_alias): - return self._simple_select_one_onecol( - table="room_aliases", - keyvalues={ - "room_alias": room_alias, - }, - retcol="creator", - desc="get_room_alias_creator", - allow_none=True - ) - @defer.inlineCallbacks def delete_room_alias(self, room_alias): room_id = yield self.runInteraction( @@ -162,15 +172,6 @@ class DirectoryStore(SQLBaseStore): return room_id - @cached(max_entries=5000) - def get_aliases_for_room(self, room_id): - return self._simple_select_onecol( - "room_aliases", - {"room_id": room_id}, - "room_alias", - desc="get_aliases_for_room", - ) - def update_aliases_for_room(self, old_room_id, new_room_id, creator): def _update_aliases_for_room_txn(txn): sql = "UPDATE room_aliases SET room_id = ?, creator = ? WHERE room_id = ?" From 69ce365b793ac35c8e9ee6c3b3a0d04d0e61db4f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 5 Mar 2018 15:12:22 +0000 Subject: [PATCH 277/348] Fix cache invalidation on deletion --- synapse/storage/directory.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/synapse/storage/directory.py b/synapse/storage/directory.py index 041b0b457e..d0c0059757 100644 --- a/synapse/storage/directory.py +++ b/synapse/storage/directory.py @@ -145,7 +145,6 @@ class DirectoryStore(DirectoryWorkerStore): room_alias, ) - self.get_aliases_for_room.invalidate((room_id,)) defer.returnValue(room_id) def _delete_room_alias_txn(self, txn, room_alias): @@ -170,6 +169,10 @@ class DirectoryStore(DirectoryWorkerStore): (room_alias.to_string(),) ) + self._invalidate_cache_and_stream( + txn, self.get_aliases_for_room, (room_id,) + ) + return room_id def update_aliases_for_room(self, old_room_id, new_room_id, creator): From d4ffe61d4fb71953bff0f94ff5d1603afe7d46f7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 5 Mar 2018 15:42:57 +0000 Subject: [PATCH 278/348] Remove ability for AS users to call /events and /sync This functionality has been deprecated for a while as well as being broken for a while. Instead of fixing it lets just remove it entirely. See: https://github.com/matrix-org/matrix-doc/issues/1144 --- synapse/handlers/room.py | 9 ++-- synapse/handlers/sync.py | 10 +++-- synapse/storage/appservice.py | 82 +++-------------------------------- synapse/storage/stream.py | 76 -------------------------------- 4 files changed, 14 insertions(+), 163 deletions(-) diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 6c425828c1..8df8fcbbad 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -475,12 +475,9 @@ class RoomEventSource(object): user.to_string() ) if app_service: - events, end_key = yield self.store.get_appservice_room_stream( - service=app_service, - from_key=from_key, - to_key=to_key, - limit=limit, - ) + # We no longer support AS users using /sync directly. + # See https://github.com/matrix-org/matrix-doc/issues/1144 + raise NotImplementedError() else: room_events = yield self.store.get_membership_changes_for_user( user.to_string(), from_key, to_key diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index b12988f3c9..56b86356f2 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -998,8 +998,9 @@ class SyncHandler(object): app_service = self.store.get_app_service_by_user_id(user_id) if app_service: - rooms = yield self.store.get_app_service_rooms(app_service) - joined_room_ids = set(r.room_id for r in rooms) + # We no longer support AS users using /sync directly. + # See https://github.com/matrix-org/matrix-doc/issues/1144 + raise NotImplementedError() else: joined_room_ids = yield self.store.get_rooms_for_user(user_id) @@ -1030,8 +1031,9 @@ class SyncHandler(object): app_service = self.store.get_app_service_by_user_id(user_id) if app_service: - rooms = yield self.store.get_app_service_rooms(app_service) - joined_room_ids = set(r.room_id for r in rooms) + # We no longer support AS users using /sync directly. + # See https://github.com/matrix-org/matrix-doc/issues/1144 + raise NotImplementedError() else: joined_room_ids = yield self.store.get_rooms_for_user(user_id) diff --git a/synapse/storage/appservice.py b/synapse/storage/appservice.py index 90fb51d43c..12ea8a158c 100644 --- a/synapse/storage/appservice.py +++ b/synapse/storage/appservice.py @@ -18,11 +18,9 @@ import re import simplejson as json from twisted.internet import defer -from synapse.api.constants import Membership from synapse.appservice import AppServiceTransaction from synapse.config.appservice import load_appservices from synapse.storage.events import EventsWorkerStore -from synapse.storage.roommember import RoomsForUser from ._base import SQLBaseStore @@ -115,81 +113,11 @@ class ApplicationServiceWorkerStore(SQLBaseStore): class ApplicationServiceStore(ApplicationServiceWorkerStore): - - def __init__(self, db_conn, hs): - super(ApplicationServiceStore, self).__init__(db_conn, hs) - self.hostname = hs.hostname - - def get_app_service_rooms(self, service): - """Get a list of RoomsForUser for this application service. - - Application services may be "interested" in lots of rooms depending on - the room ID, the room aliases, or the members in the room. This function - takes all of these into account and returns a list of RoomsForUser which - represent the entire list of room IDs that this application service - wants to know about. - - Args: - service: The application service to get a room list for. - Returns: - A list of RoomsForUser. - """ - return self.runInteraction( - "get_app_service_rooms", - self._get_app_service_rooms_txn, - service, - ) - - def _get_app_service_rooms_txn(self, txn, service): - # get all rooms matching the room ID regex. - room_entries = self._simple_select_list_txn( - txn=txn, table="rooms", keyvalues=None, retcols=["room_id"] - ) - matching_room_list = set([ - r["room_id"] for r in room_entries if - service.is_interested_in_room(r["room_id"]) - ]) - - # resolve room IDs for matching room alias regex. - room_alias_mappings = self._simple_select_list_txn( - txn=txn, table="room_aliases", keyvalues=None, - retcols=["room_id", "room_alias"] - ) - matching_room_list |= set([ - r["room_id"] for r in room_alias_mappings if - service.is_interested_in_alias(r["room_alias"]) - ]) - - # get all rooms for every user for this AS. This is scoped to users on - # this HS only. - user_list = self._simple_select_list_txn( - txn=txn, table="users", keyvalues=None, retcols=["name"] - ) - user_list = [ - u["name"] for u in user_list if - service.is_interested_in_user(u["name"]) - ] - rooms_for_user_matching_user_id = set() # RoomsForUser list - for user_id in user_list: - # FIXME: This assumes this store is linked with RoomMemberStore :( - rooms_for_user = self._get_rooms_for_user_where_membership_is_txn( - txn=txn, - user_id=user_id, - membership_list=[Membership.JOIN] - ) - rooms_for_user_matching_user_id |= set(rooms_for_user) - - # make RoomsForUser tuples for room ids and aliases which are not in the - # main rooms_for_user_list - e.g. they are rooms which do not have AS - # registered users in it. - known_room_ids = [r.room_id for r in rooms_for_user_matching_user_id] - missing_rooms_for_user = [ - RoomsForUser(r, service.sender, "join") for r in - matching_room_list if r not in known_room_ids - ] - rooms_for_user_matching_user_id |= set(missing_rooms_for_user) - - return rooms_for_user_matching_user_id + # This is currently empty due to there not being any AS storage functions + # that can't be run on the workers. Since this may change in future, and + # to keep consistency with the other stores, we keep this empty class for + # now. + pass class ApplicationServiceTransactionWorkerStore(ApplicationServiceWorkerStore, diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index a2527d2a36..b78151cd82 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -39,7 +39,6 @@ from synapse.storage._base import SQLBaseStore from synapse.storage.events import EventsWorkerStore from synapse.util.caches.descriptors import cached -from synapse.api.constants import EventTypes from synapse.types import RoomStreamToken from synapse.util.caches.stream_change_cache import StreamChangeCache from synapse.util.logcontext import make_deferred_yieldable, preserve_fn @@ -717,81 +716,6 @@ class StreamStore(StreamWorkerStore): def get_room_min_stream_ordering(self): return self._backfill_id_gen.get_current_token() - @defer.inlineCallbacks - def get_appservice_room_stream(self, service, from_key, to_key, limit=0): - # NB this lives here instead of appservice.py so we can reuse the - # 'private' StreamToken class in this file. - if limit: - limit = max(limit, MAX_STREAM_SIZE) - else: - limit = MAX_STREAM_SIZE - - # From and to keys should be integers from ordering. - from_id = RoomStreamToken.parse_stream_token(from_key) - to_id = RoomStreamToken.parse_stream_token(to_key) - - if from_key == to_key: - defer.returnValue(([], to_key)) - return - - # select all the events between from/to with a sensible limit - sql = ( - "SELECT e.event_id, e.room_id, e.type, s.state_key, " - "e.stream_ordering FROM events AS e " - "LEFT JOIN state_events as s ON " - "e.event_id = s.event_id " - "WHERE e.stream_ordering > ? AND e.stream_ordering <= ? " - "ORDER BY stream_ordering ASC LIMIT %(limit)d " - ) % { - "limit": limit - } - - def f(txn): - # pull out all the events between the tokens - txn.execute(sql, (from_id.stream, to_id.stream,)) - rows = self.cursor_to_dict(txn) - - # Logic: - # - We want ALL events which match the AS room_id regex - # - We want ALL events which match the rooms represented by the AS - # room_alias regex - # - We want ALL events for rooms that AS users have joined. - # This is currently supported via get_app_service_rooms (which is - # used for the Notifier listener rooms). We can't reasonably make a - # SQL query for these room IDs, so we'll pull all the events between - # from/to and filter in python. - rooms_for_as = self._get_app_service_rooms_txn(txn, service) - room_ids_for_as = [r.room_id for r in rooms_for_as] - - def app_service_interested(row): - if row["room_id"] in room_ids_for_as: - return True - - if row["type"] == EventTypes.Member: - if service.is_interested_in_user(row.get("state_key")): - return True - return False - - return [r for r in rows if app_service_interested(r)] - - rows = yield self.runInteraction("get_appservice_room_stream", f) - - ret = yield self._get_events( - [r["event_id"] for r in rows], - get_prev_content=True - ) - - self._set_before_and_after(ret, rows, topo_order=from_id is None) - - if rows: - key = "s%d" % max(r["stream_ordering"] for r in rows) - else: - # Assume we didn't get anything because there was nothing to - # get. - key = to_key - - defer.returnValue((ret, key)) - @defer.inlineCallbacks def paginate_room_events(self, room_id, from_key, to_key=None, direction='b', limit=-1, event_filter=None): From ed9b5eced4f17dfb0a92167a6281e13054821d6f Mon Sep 17 00:00:00 2001 From: Krombel Date: Mon, 5 Mar 2018 17:51:09 +0100 Subject: [PATCH 279/348] use bcrypt.checkpw in bcrypt 3.1.0 checkpw got introduced (already 2 years ago) This makes use of that with enhancements which might get introduced by that Signed-Off-by: Matthias Kesler --- synapse/handlers/auth.py | 6 ++++-- synapse/python_dependencies.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 258cc345dc..a5365c4fe4 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -863,8 +863,10 @@ class AuthHandler(BaseHandler): """ def _do_validate_hash(): - return bcrypt.hashpw(password.encode('utf8') + self.hs.config.password_pepper, - stored_hash.encode('utf8')) == stored_hash + return bcrypt.checkpw( + password.encode('utf8') + self.hs.config.password_pepper, + stored_hash.encode('utf8') + ) if stored_hash: return make_deferred_yieldable(threads.deferToThread(_do_validate_hash)) diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index 5d65b5fd6e..91179ce532 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -31,7 +31,7 @@ REQUIREMENTS = { "pyyaml": ["yaml"], "pyasn1": ["pyasn1"], "daemonize": ["daemonize"], - "bcrypt": ["bcrypt"], + "bcrypt": ["bcrypt>=3.1.0"], "pillow": ["PIL"], "pydenticon": ["pydenticon"], "ujson": ["ujson"], From 8cb44da4aa569188faa2a94aae6bc093aa8e22ec Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 5 Mar 2018 12:06:19 +0000 Subject: [PATCH 280/348] Fix race in sync when joining room The race happens when the user joins a room at the same time as doing a sync. We fetch the current token and then get the rooms the user is in. If the join happens after the current token, but before we get the rooms we end up sending down a partial room entry in the sync. This is fixed by looking at the stream ordering of the membership returned by get_rooms_for_user, and handling the case when that stream ordering is after the current token. --- synapse/handlers/sync.py | 103 +++++++++++++++++++++++++--------- synapse/storage/events.py | 2 +- synapse/storage/roommember.py | 27 ++++++++- 3 files changed, 102 insertions(+), 30 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 56b86356f2..163d80417e 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -235,10 +235,10 @@ class SyncHandler(object): defer.returnValue(rules) @defer.inlineCallbacks - def ephemeral_by_room(self, sync_config, now_token, since_token=None): + def ephemeral_by_room(self, sync_result_builder, now_token, since_token=None): """Get the ephemeral events for each room the user is in Args: - sync_config (SyncConfig): The flags, filters and user for the sync. + sync_result_builder(SyncResultBuilder) now_token (StreamToken): Where the server is currently up to. since_token (StreamToken): Where the server was when the client last synced. @@ -248,10 +248,12 @@ class SyncHandler(object): typing events for that room. """ + sync_config = sync_result_builder.sync_config + with Measure(self.clock, "ephemeral_by_room"): typing_key = since_token.typing_key if since_token else "0" - room_ids = yield self.store.get_rooms_for_user(sync_config.user.to_string()) + room_ids = sync_result_builder.joined_room_ids typing_source = self.event_sources.sources["typing"] typing, typing_key = yield typing_source.get_new_events( @@ -565,10 +567,22 @@ class SyncHandler(object): # Always use the `now_token` in `SyncResultBuilder` now_token = yield self.event_sources.get_current_token() + user_id = sync_config.user.to_string() + app_service = self.store.get_app_service_by_user_id(user_id) + if app_service: + # We no longer support AS users using /sync directly. + # See https://github.com/matrix-org/matrix-doc/issues/1144 + raise NotImplementedError() + else: + joined_room_ids = yield self.get_rooms_for_user_at( + user_id, now_token.room_stream_id, + ) + sync_result_builder = SyncResultBuilder( sync_config, full_state, since_token=since_token, now_token=now_token, + joined_room_ids=joined_room_ids, ) account_data_by_room = yield self._generate_sync_entry_for_account_data( @@ -603,7 +617,6 @@ class SyncHandler(object): device_id = sync_config.device_id one_time_key_counts = {} if device_id: - user_id = sync_config.user.to_string() one_time_key_counts = yield self.store.count_e2e_one_time_keys( user_id, device_id ) @@ -891,7 +904,7 @@ class SyncHandler(object): ephemeral_by_room = {} else: now_token, ephemeral_by_room = yield self.ephemeral_by_room( - sync_result_builder.sync_config, + sync_result_builder, now_token=sync_result_builder.now_token, since_token=sync_result_builder.since_token, ) @@ -996,16 +1009,8 @@ class SyncHandler(object): if rooms_changed: defer.returnValue(True) - app_service = self.store.get_app_service_by_user_id(user_id) - if app_service: - # We no longer support AS users using /sync directly. - # See https://github.com/matrix-org/matrix-doc/issues/1144 - raise NotImplementedError() - else: - joined_room_ids = yield self.store.get_rooms_for_user(user_id) - stream_id = RoomStreamToken.parse_stream_token(since_token.room_key).stream - for room_id in joined_room_ids: + for room_id in sync_result_builder.joined_room_ids: if self.store.has_room_changed_since(room_id, stream_id): defer.returnValue(True) defer.returnValue(False) @@ -1029,14 +1034,6 @@ class SyncHandler(object): assert since_token - app_service = self.store.get_app_service_by_user_id(user_id) - if app_service: - # We no longer support AS users using /sync directly. - # See https://github.com/matrix-org/matrix-doc/issues/1144 - raise NotImplementedError() - else: - joined_room_ids = yield self.store.get_rooms_for_user(user_id) - # Get a list of membership change events that have happened. rooms_changed = yield self.store.get_membership_changes_for_user( user_id, since_token.room_key, now_token.room_key @@ -1059,7 +1056,7 @@ class SyncHandler(object): # we do send down the room, and with full state, where necessary old_state_ids = None - if room_id in joined_room_ids and non_joins: + if room_id in sync_result_builder.joined_room_ids and non_joins: # Always include if the user (re)joined the room, especially # important so that device list changes are calculated correctly. # If there are non join member events, but we are still in the room, @@ -1069,7 +1066,7 @@ class SyncHandler(object): # User is in the room so we don't need to do the invite/leave checks continue - if room_id in joined_room_ids or has_join: + if room_id in sync_result_builder.joined_room_ids or has_join: old_state_ids = yield self.get_state_at(room_id, since_token) old_mem_ev_id = old_state_ids.get((EventTypes.Member, user_id), None) old_mem_ev = None @@ -1081,7 +1078,7 @@ class SyncHandler(object): newly_joined_rooms.append(room_id) # If user is in the room then we don't need to do the invite/leave checks - if room_id in joined_room_ids: + if room_id in sync_result_builder.joined_room_ids: continue if not non_joins: @@ -1148,7 +1145,7 @@ class SyncHandler(object): # Get all events for rooms we're currently joined to. room_to_events = yield self.store.get_room_events_stream_for_rooms( - room_ids=joined_room_ids, + room_ids=sync_result_builder.joined_room_ids, from_key=since_token.room_key, to_key=now_token.room_key, limit=timeline_limit + 1, @@ -1156,7 +1153,7 @@ class SyncHandler(object): # We loop through all room ids, even if there are no new events, in case # there are non room events taht we need to notify about. - for room_id in joined_room_ids: + for room_id in sync_result_builder.joined_room_ids: room_entry = room_to_events.get(room_id, None) if room_entry: @@ -1364,6 +1361,54 @@ class SyncHandler(object): else: raise Exception("Unrecognized rtype: %r", room_builder.rtype) + @defer.inlineCallbacks + def get_rooms_for_user_at(self, user_id, stream_ordering): + """Get set of joined rooms for a user at the given stream ordering. + + The stream ordering *must* be recent, otherwise this may throw an + exception if older than a month. (This function is called with the + current token, which should be perfectly fine). + + Args: + user_id (str) + stream_ordering (int) + + ReturnValue: + Deferred[frozenset[str]]: Set of room_ids the user is in at given + stream_ordering. + """ + joined_rooms = yield self.store.get_rooms_for_user_with_stream_ordering( + user_id, + ) + + joined_room_ids = set() + + # We need to check that the stream ordering of the join for each room + # is before the stream_ordering asked for. This might not be the case + # if the user joins a room between us getting the current token and + # calling `get_rooms_for_user_with_stream_ordering`. + # If the membership's stream ordering is after the given stream + # ordering, we need to go and work out if the user was in the room + # before. + for room_id, membeship_stream_ordering in joined_rooms: + if membeship_stream_ordering <= stream_ordering: + joined_room_ids.add(room_id) + continue + + logger.info("SH joined_room_ids membership after current token") + + extrems = yield self.store.get_forward_extremeties_for_room( + room_id, stream_ordering, + ) + users_in_room = yield self.state.get_current_user_in_room( + room_id, extrems, + ) + if user_id in users_in_room: + joined_room_ids.add(room_id) + + joined_room_ids = frozenset(joined_room_ids) + defer.returnValue(joined_room_ids) + def _action_has_highlight(actions): for action in actions: @@ -1413,7 +1458,8 @@ def _calculate_state(timeline_contains, timeline_start, previous, current): class SyncResultBuilder(object): "Used to help build up a new SyncResult for a user" - def __init__(self, sync_config, full_state, since_token, now_token): + def __init__(self, sync_config, full_state, since_token, now_token, + joined_room_ids): """ Args: sync_config(SyncConfig) @@ -1425,6 +1471,7 @@ class SyncResultBuilder(object): self.full_state = full_state self.since_token = since_token self.now_token = now_token + self.joined_room_ids = joined_room_ids self.presence = [] self.account_data = [] diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 057b1be4d5..826fad307e 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -754,7 +754,7 @@ class EventsStore(EventsWorkerStore): for member in members_changed: self._invalidate_cache_and_stream( - txn, self.get_rooms_for_user, (member,) + txn, self.get_rooms_for_user_with_stream_ordering, (member,) ) for host in set(get_domain_from_id(u) for u in members_changed): diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index d79877dac7..52e19e16b0 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -38,6 +38,11 @@ RoomsForUser = namedtuple( ("room_id", "sender", "membership", "event_id", "stream_ordering") ) +GetRoomsForUserWithStreamOrdering = namedtuple( + "_GetRoomsForUserWithStreamOrdering", + ("room_id", "stream_ordering",) +) + # We store this using a namedtuple so that we save about 3x space over using a # dict. @@ -181,12 +186,32 @@ class RoomMemberWorkerStore(EventsWorkerStore): return results @cachedInlineCallbacks(max_entries=500000, iterable=True) - def get_rooms_for_user(self, user_id): + def get_rooms_for_user_with_stream_ordering(self, user_id): """Returns a set of room_ids the user is currently joined to + + Args: + user_id (str) + + Returns: + Deferred[frozenset[GetRoomsForUserWithStreamOrdering]]: Returns + the rooms the user is in currently, along with the stream ordering + of the most recent join for that user and room. """ rooms = yield self.get_rooms_for_user_where_membership_is( user_id, membership_list=[Membership.JOIN], ) + defer.returnValue(frozenset( + GetRoomsForUserWithStreamOrdering(r.room_id, r.stream_ordering) + for r in rooms + )) + + @defer.inlineCallbacks + def get_rooms_for_user(self, user_id, on_invalidate=None): + """Returns a set of room_ids the user is currently joined to + """ + rooms = yield self.get_rooms_for_user_with_stream_ordering( + user_id, on_invalidate=on_invalidate, + ) defer.returnValue(frozenset(r.room_id for r in rooms)) @cachedInlineCallbacks(max_entries=500000, cache_context=True, iterable=True) From 02a1296ad634ff8200abe539d27a53a6f850081d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 5 Mar 2018 13:12:08 +0000 Subject: [PATCH 281/348] Fix typo --- synapse/handlers/sync.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 163d80417e..b323e0e6b0 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1390,8 +1390,8 @@ class SyncHandler(object): # If the membership's stream ordering is after the given stream # ordering, we need to go and work out if the user was in the room # before. - for room_id, membeship_stream_ordering in joined_rooms: - if membeship_stream_ordering <= stream_ordering: + for room_id, membership_stream_ordering in joined_rooms: + if membership_stream_ordering <= stream_ordering: joined_room_ids.add(room_id) continue From a56d54dcb7d3d42fc417d6af82a46d86edf6f73d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 5 Mar 2018 13:29:49 +0000 Subject: [PATCH 282/348] Fix up log message --- synapse/handlers/sync.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index b323e0e6b0..0f713ce038 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1395,7 +1395,7 @@ class SyncHandler(object): joined_room_ids.add(room_id) continue - logger.info("SH joined_room_ids membership after current token") + logger.info("User joined room after current token: %s", room_id) extrems = yield self.store.get_forward_extremeties_for_room( room_id, stream_ordering, From 20f40348d4ea55cc5b98528673e26bac7396a3cb Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 7 Mar 2018 19:59:24 +0000 Subject: [PATCH 283/348] Factor run_in_background out from preserve_fn It annoys me that we create temporary function objects when there's really no need for it. Let's factor the gubbins out of preserve_fn and start using it. --- docs/log_contexts.rst | 8 +++--- synapse/util/logcontext.py | 51 +++++++++++++++++++++----------------- 2 files changed, 32 insertions(+), 27 deletions(-) diff --git a/docs/log_contexts.rst b/docs/log_contexts.rst index b19b7fa1ea..82ac4f91e5 100644 --- a/docs/log_contexts.rst +++ b/docs/log_contexts.rst @@ -279,9 +279,9 @@ Obviously that option means that the operations done in that might be fixed by setting a different logcontext via a ``with LoggingContext(...)`` in ``background_operation``). -The second option is to use ``logcontext.preserve_fn``, which wraps a function -so that it doesn't reset the logcontext even when it returns an incomplete -deferred, and adds a callback to the returned deferred to reset the +The second option is to use ``logcontext.run_in_background``, which wraps a +function so that it doesn't reset the logcontext even when it returns an +incomplete deferred, and adds a callback to the returned deferred to reset the logcontext. In other words, it turns a function that follows the Synapse rules about logcontexts and Deferreds into one which behaves more like an external function — the opposite operation to that described in the previous section. @@ -293,7 +293,7 @@ It can be used like this: def do_request_handling(): yield foreground_operation() - logcontext.preserve_fn(background_operation)() + logcontext.run_in_background(background_operation) # this will now be logged against the request context logger.debug("Request handling complete") diff --git a/synapse/util/logcontext.py b/synapse/util/logcontext.py index a8dea15c1b..d660ec785b 100644 --- a/synapse/util/logcontext.py +++ b/synapse/util/logcontext.py @@ -292,36 +292,41 @@ class PreserveLoggingContext(object): def preserve_fn(f): - """Wraps a function, to ensure that the current context is restored after + """Function decorator which wraps the function with run_in_background""" + def g(*args, **kwargs): + return run_in_background(f, *args, **kwargs) + return g + + +def run_in_background(f, *args, **kwargs): + """Calls a function, ensuring that the current context is restored after return from the function, and that the sentinel context is set once the deferred returned by the funtion completes. Useful for wrapping functions that return a deferred which you don't yield on. """ - def g(*args, **kwargs): - current = LoggingContext.current_context() - res = f(*args, **kwargs) - if isinstance(res, defer.Deferred) and not res.called: - # The function will have reset the context before returning, so - # we need to restore it now. - LoggingContext.set_current_context(current) + current = LoggingContext.current_context() + res = f(*args, **kwargs) + if isinstance(res, defer.Deferred) and not res.called: + # The function will have reset the context before returning, so + # we need to restore it now. + LoggingContext.set_current_context(current) - # The original context will be restored when the deferred - # completes, but there is nothing waiting for it, so it will - # get leaked into the reactor or some other function which - # wasn't expecting it. We therefore need to reset the context - # here. - # - # (If this feels asymmetric, consider it this way: we are - # effectively forking a new thread of execution. We are - # probably currently within a ``with LoggingContext()`` block, - # which is supposed to have a single entry and exit point. But - # by spawning off another deferred, we are effectively - # adding a new exit point.) - res.addBoth(_set_context_cb, LoggingContext.sentinel) - return res - return g + # The original context will be restored when the deferred + # completes, but there is nothing waiting for it, so it will + # get leaked into the reactor or some other function which + # wasn't expecting it. We therefore need to reset the context + # here. + # + # (If this feels asymmetric, consider it this way: we are + # effectively forking a new thread of execution. We are + # probably currently within a ``with LoggingContext()`` block, + # which is supposed to have a single entry and exit point. But + # by spawning off another deferred, we are effectively + # adding a new exit point.) + res.addBoth(_set_context_cb, LoggingContext.sentinel) + return res def make_deferred_yieldable(deferred): From dbe80a286b85f9427763344c260921745c2ca78d Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 9 Mar 2018 16:17:27 +0000 Subject: [PATCH 284/348] refactor JsonResource rephrase the OPTIONS and unrecognised request handling so that they look similar to the common flow. --- synapse/http/server.py | 84 ++++++++++++++++++++++++------------------ 1 file changed, 49 insertions(+), 35 deletions(-) diff --git a/synapse/http/server.py b/synapse/http/server.py index 165c684d0d..d774476e5b 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -276,49 +277,54 @@ class JsonResource(HttpServer, resource.Resource): This checks if anyone has registered a callback for that method and path. """ + callback, group_dict = self._get_handler_for_request(request) + + servlet_instance = getattr(callback, "__self__", None) + if servlet_instance is not None: + servlet_classname = servlet_instance.__class__.__name__ + else: + servlet_classname = "%r" % callback + + request_metrics.name = servlet_classname + + # Now trigger the callback. If it returns a response, we send it + # here. If it throws an exception, that is handled by the wrapper + # installed by @request_handler. + + kwargs = intern_dict({ + name: urllib.unquote(value).decode("UTF-8") if value else value + for name, value in group_dict.items() + }) + + callback_return = yield callback(request, **kwargs) + if callback_return is not None: + code, response = callback_return + self._send_response(request, code, response) + + def _get_handler_for_request(self, request): + """Finds a callback method to handle the given request + + Args: + request (twisted.web.http.Request): + + Returns: + Tuple[Callable, dict[str, str]]: callback method, and the dict + mapping keys to path components as specified in the handler's + path match regexp + """ if request.method == "OPTIONS": - self._send_response(request, 200, {}) - return + return _options_handler, {} # Loop through all the registered callbacks to check if the method # and path regex match for path_entry in self.path_regexs.get(request.method, []): m = path_entry.pattern.match(request.path) - if not m: - continue - - # We found a match! First update the metrics object to indicate - # which servlet is handling the request. - - callback = path_entry.callback - - servlet_instance = getattr(callback, "__self__", None) - if servlet_instance is not None: - servlet_classname = servlet_instance.__class__.__name__ - else: - servlet_classname = "%r" % callback - - request_metrics.name = servlet_classname - - # Now trigger the callback. If it returns a response, we send it - # here. If it throws an exception, that is handled by the wrapper - # installed by @request_handler. - - kwargs = intern_dict({ - name: urllib.unquote(value).decode("UTF-8") if value else value - for name, value in m.groupdict().items() - }) - - callback_return = yield callback(request, **kwargs) - if callback_return is not None: - code, response = callback_return - self._send_response(request, code, response) - - return + if m: + # We found a match! + return path_entry.callback, m.groupdict() # Huh. No one wanted to handle that? Fiiiiiine. Send 400. - request_metrics.name = self.__class__.__name__ + ".UnrecognizedRequest" - raise UnrecognizedRequestError() + return _unrecognised_request_handler, {} def _send_response(self, request, code, response_json_object, response_code_message=None): @@ -335,6 +341,14 @@ class JsonResource(HttpServer, resource.Resource): ) +def _options_handler(request): + return {} + + +def _unrecognised_request_handler(request): + raise UnrecognizedRequestError() + + class RequestMetrics(object): def start(self, clock, name): self.start = clock.time_msec() From 88541f9009a7ca39c85cac7483d6a240ef497d33 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 9 Mar 2018 16:19:18 +0000 Subject: [PATCH 285/348] Add a metric which increments when a request is received It's useful to know when there are peaks in incoming requests - which isn't quite the same as there being peaks in outgoing responses, due to the time taken to handle requests. --- synapse/http/server.py | 12 ++++++++++-- synapse/metrics/__init__.py | 16 ++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/synapse/http/server.py b/synapse/http/server.py index d774476e5b..6c5d8bb556 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -60,6 +60,11 @@ response_count = metrics.register_counter( ) ) +requests_counter = metrics.register_counter( + "requests_received", + labels=["method", "servlet", ], +) + outgoing_responses_counter = metrics.register_counter( "responses", labels=["method", "code"], @@ -146,7 +151,8 @@ def wrap_request_handler(request_handler, include_metrics=False): # at the servlet name. For most requests that name will be # JsonResource (or a subclass), and JsonResource._async_render # will update it once it picks a servlet. - request_metrics.start(self.clock, name=self.__class__.__name__) + servlet_name = self.__class__.__name__ + request_metrics.start(self.clock, name=servlet_name) request_context.request = request_id with request.processing(): @@ -155,6 +161,7 @@ def wrap_request_handler(request_handler, include_metrics=False): if include_metrics: yield request_handler(self, request, request_metrics) else: + requests_counter.inc(request.method, servlet_name) yield request_handler(self, request) except CodeMessageException as e: code = e.code @@ -286,6 +293,7 @@ class JsonResource(HttpServer, resource.Resource): servlet_classname = "%r" % callback request_metrics.name = servlet_classname + requests_counter.inc(request.method, servlet_classname) # Now trigger the callback. If it returns a response, we send it # here. If it throws an exception, that is handled by the wrapper @@ -342,7 +350,7 @@ class JsonResource(HttpServer, resource.Resource): def _options_handler(request): - return {} + return 200, {} def _unrecognised_request_handler(request): diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index e0cfb7d08f..50d99d7a5c 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -57,15 +57,31 @@ class Metrics(object): return metric def register_counter(self, *args, **kwargs): + """ + Returns: + CounterMetric + """ return self._register(CounterMetric, *args, **kwargs) def register_callback(self, *args, **kwargs): + """ + Returns: + CallbackMetric + """ return self._register(CallbackMetric, *args, **kwargs) def register_distribution(self, *args, **kwargs): + """ + Returns: + DistributionMetric + """ return self._register(DistributionMetric, *args, **kwargs) def register_cache(self, *args, **kwargs): + """ + Returns: + CacheMetric + """ return self._register(CacheMetric, *args, **kwargs) From 58dd148c4f67e1cb6b150bf43a437b33089cfe5e Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 9 Mar 2018 18:05:41 +0000 Subject: [PATCH 286/348] Add some docstrings to help figure this out --- synapse/http/server.py | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/synapse/http/server.py b/synapse/http/server.py index 6c5d8bb556..4b567215c8 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -237,7 +237,7 @@ class JsonResource(HttpServer, resource.Resource): """ This implements the HttpServer interface and provides JSON support for Resources. - Register callbacks via register_path() + Register callbacks via register_paths() Callbacks can return a tuple of status code and a dict in which case the the dict will automatically be sent to the client as a JSON object. @@ -318,7 +318,11 @@ class JsonResource(HttpServer, resource.Resource): Returns: Tuple[Callable, dict[str, str]]: callback method, and the dict mapping keys to path components as specified in the handler's - path match regexp + path match regexp. + + The callback will normally be a method registered via + register_paths, so will return (possibly via Deferred) either + None, or a tuple of (http code, response body). """ if request.method == "OPTIONS": return _options_handler, {} @@ -350,10 +354,30 @@ class JsonResource(HttpServer, resource.Resource): def _options_handler(request): + """Request handler for OPTIONS requests + + This is a request handler suitable for return from + _get_handler_for_request. It returns a 200 and an empty body. + + Args: + request (twisted.web.http.Request): + + Returns: + Tuple[int, dict]: http code, response body. + """ return 200, {} def _unrecognised_request_handler(request): + """Request handler for unrecognised requests + + This is a request handler suitable for return from + _get_handler_for_request. It actually just raises an + UnrecognizedRequestError. + + Args: + request (twisted.web.http.Request): + """ raise UnrecognizedRequestError() From 1708412f569dc28931a3704d679b41b92ac788b9 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 7 Mar 2018 17:32:46 +0000 Subject: [PATCH 287/348] Return an error when doing two purges on a room Queuing up purges doesn't sound like a good thing. --- synapse/handlers/message.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index dd00d8a86c..6eb8d19dc9 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -50,15 +50,26 @@ class MessageHandler(BaseHandler): self.clock = hs.get_clock() self.pagination_lock = ReadWriteLock() + self._purges_in_progress_by_room = set() @defer.inlineCallbacks def purge_history(self, room_id, topological_ordering, delete_local_events=False): - with (yield self.pagination_lock.write(room_id)): - yield self.store.purge_history( - room_id, topological_ordering, delete_local_events, + if room_id in self._purges_in_progress_by_room: + raise SynapseError( + 400, + "History purge already in progress for %s" % (room_id, ), ) + self._purges_in_progress_by_room.add(room_id) + try: + with (yield self.pagination_lock.write(room_id)): + yield self.store.purge_history( + room_id, topological_ordering, delete_local_events, + ) + finally: + self._purges_in_progress_by_room.discard(room_id) + @defer.inlineCallbacks def get_messages(self, requester, room_id=None, pagin_config=None, as_client_event=True, event_filter=None): From e48c7aac4d827b66182adf80ab9804f42db186c9 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 8 Mar 2018 11:47:28 +0000 Subject: [PATCH 288/348] Add transactional API to history purge Make the purge request return quickly, and allow scripts to poll for updates. --- docs/admin_api/purge_history_api.rst | 27 +++++++ synapse/handlers/message.py | 104 +++++++++++++++++++++++++-- synapse/rest/client/v1/admin.py | 38 +++++++++- 3 files changed, 161 insertions(+), 8 deletions(-) diff --git a/docs/admin_api/purge_history_api.rst b/docs/admin_api/purge_history_api.rst index acf1bc5749..ea2922da5c 100644 --- a/docs/admin_api/purge_history_api.rst +++ b/docs/admin_api/purge_history_api.rst @@ -32,3 +32,30 @@ specified by including an event_id in the URI, or by setting a id is given, that event (and others at the same graph depth) will be retained. If ``purge_up_to_ts`` is given, it should be a timestamp since the unix epoch, in milliseconds. + +The API starts the purge running, and returns immediately with a JSON body with +a purge id: + +.. code:: json + + { + "purge_id": "" + } + +Purge status query +------------------ + +It is possible to poll for updates on recent purges with a second API; + +``GET /_matrix/client/r0/admin/purge_history_status/`` + +(again, with a suitable ``access_token``). This API returns a JSON body like +the following: + +.. code:: json + + { + "status": "active" + } + +The status will be one of ``active``, ``complete``, or ``failed``. diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 6eb8d19dc9..42aab91c50 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -13,7 +13,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from twisted.internet import defer +from twisted.internet import defer, reactor +from twisted.python.failure import Failure from synapse.api.constants import EventTypes, Membership from synapse.api.errors import AuthError, Codes, SynapseError @@ -24,9 +25,10 @@ from synapse.types import ( UserID, RoomAlias, RoomStreamToken, ) from synapse.util.async import run_on_reactor, ReadWriteLock, Limiter -from synapse.util.logcontext import preserve_fn +from synapse.util.logcontext import preserve_fn, run_in_background from synapse.util.metrics import measure_func from synapse.util.frozenutils import unfreeze +from synapse.util.stringutils import random_string from synapse.visibility import filter_events_for_client from synapse.replication.http.send_event import send_event_to_master @@ -41,6 +43,36 @@ import ujson logger = logging.getLogger(__name__) +class PurgeStatus(object): + """Object tracking the status of a purge request + + This class contains information on the progress of a purge request, for + return by get_purge_status. + + Attributes: + status (int): Tracks whether this request has completed. One of + STATUS_{ACTIVE,COMPLETE,FAILED} + """ + + STATUS_ACTIVE = 0 + STATUS_COMPLETE = 1 + STATUS_FAILED = 2 + + STATUS_TEXT = { + STATUS_ACTIVE: "active", + STATUS_COMPLETE: "complete", + STATUS_FAILED: "failed", + } + + def __init__(self): + self.status = PurgeStatus.STATUS_ACTIVE + + def asdict(self): + return { + "status": PurgeStatus.STATUS_TEXT[self.status] + } + + class MessageHandler(BaseHandler): def __init__(self, hs): @@ -51,25 +83,87 @@ class MessageHandler(BaseHandler): self.pagination_lock = ReadWriteLock() self._purges_in_progress_by_room = set() + # map from purge id to PurgeStatus + self._purges_by_id = {} - @defer.inlineCallbacks - def purge_history(self, room_id, topological_ordering, - delete_local_events=False): + def start_purge_history(self, room_id, topological_ordering, + delete_local_events=False): + """Start off a history purge on a room. + + Args: + room_id (str): The room to purge from + + topological_ordering (int): minimum topo ordering to preserve + delete_local_events (bool): True to delete local events as well as + remote ones + + Returns: + str: unique ID for this purge transaction. + """ if room_id in self._purges_in_progress_by_room: raise SynapseError( 400, "History purge already in progress for %s" % (room_id, ), ) + purge_id = random_string(16) + + # we log the purge_id here so that it can be tied back to the + # request id in the log lines. + logger.info("[purge] starting purge_id %s", purge_id) + + self._purges_by_id[purge_id] = PurgeStatus() + run_in_background( + self._purge_history, + purge_id, room_id, topological_ordering, delete_local_events, + ) + return purge_id + + @defer.inlineCallbacks + def _purge_history(self, purge_id, room_id, topological_ordering, + delete_local_events): + """Carry out a history purge on a room. + + Args: + purge_id (str): The id for this purge + room_id (str): The room to purge from + topological_ordering (int): minimum topo ordering to preserve + delete_local_events (bool): True to delete local events as well as + remote ones + + Returns: + Deferred + """ self._purges_in_progress_by_room.add(room_id) try: with (yield self.pagination_lock.write(room_id)): yield self.store.purge_history( room_id, topological_ordering, delete_local_events, ) + logger.info("[purge] complete") + self._purges_by_id[purge_id].status = PurgeStatus.STATUS_COMPLETE + except Exception: + logger.error("[purge] failed: %s", Failure().getTraceback().rstrip()) + self._purges_by_id[purge_id].status = PurgeStatus.STATUS_FAILED finally: self._purges_in_progress_by_room.discard(room_id) + # remove the purge from the list 24 hours after it completes + def clear_purge(): + del self._purges_by_id[purge_id] + reactor.callLater(24 * 3600, clear_purge) + + def get_purge_status(self, purge_id): + """Get the current status of an active purge + + Args: + purge_id (str): purge_id returned by start_purge_history + + Returns: + PurgeStatus|None + """ + return self._purges_by_id.get(purge_id) + @defer.inlineCallbacks def get_messages(self, requester, room_id=None, pagin_config=None, as_client_event=True, event_filter=None): diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index dcf6215dad..303419d281 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -17,7 +17,7 @@ from twisted.internet import defer from synapse.api.constants import Membership -from synapse.api.errors import AuthError, SynapseError, Codes +from synapse.api.errors import AuthError, SynapseError, Codes, NotFoundError from synapse.types import UserID, create_requester from synapse.http.servlet import parse_json_object_from_request @@ -185,12 +185,43 @@ class PurgeHistoryRestServlet(ClientV1RestServlet): errcode=Codes.BAD_JSON, ) - yield self.handlers.message_handler.purge_history( + purge_id = yield self.handlers.message_handler.start_purge_history( room_id, depth, delete_local_events=delete_local_events, ) - defer.returnValue((200, {})) + defer.returnValue((200, { + "purge_id": purge_id, + })) + + +class PurgeHistoryStatusRestServlet(ClientV1RestServlet): + PATTERNS = client_path_patterns( + "/admin/purge_history_status/(?P[^/]+)" + ) + + def __init__(self, hs): + """ + + Args: + hs (synapse.server.HomeServer) + """ + super(PurgeHistoryStatusRestServlet, self).__init__(hs) + self.handlers = hs.get_handlers() + + @defer.inlineCallbacks + def on_GET(self, request, purge_id): + requester = yield self.auth.get_user_by_req(request) + is_admin = yield self.auth.is_server_admin(requester.user) + + if not is_admin: + raise AuthError(403, "You are not a server admin") + + purge_status = self.handlers.message_handler.get_purge_status(purge_id) + if purge_status is None: + raise NotFoundError("purge id '%s' not found" % purge_id) + + defer.returnValue((200, purge_status.asdict())) class DeactivateAccountRestServlet(ClientV1RestServlet): @@ -561,6 +592,7 @@ class SearchUsersRestServlet(ClientV1RestServlet): def register_servlets(hs, http_server): WhoisRestServlet(hs).register(http_server) PurgeMediaCacheRestServlet(hs).register(http_server) + PurgeHistoryStatusRestServlet(hs).register(http_server) DeactivateAccountRestServlet(hs).register(http_server) PurgeHistoryRestServlet(hs).register(http_server) UsersRestServlet(hs).register(http_server) From 889a2a853a83057d4f218bb828bf686db786d590 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 13 Mar 2018 09:57:54 +0000 Subject: [PATCH 289/348] Add Measure block for persist_events This seems like a useful thing to measure. --- synapse/storage/events.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 826fad307e..3890878170 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -283,10 +283,11 @@ class EventsStore(EventsWorkerStore): def _maybe_start_persisting(self, room_id): @defer.inlineCallbacks def persisting_queue(item): - yield self._persist_events( - item.events_and_contexts, - backfilled=item.backfilled, - ) + with Measure(self._clock, "persist_events"): + yield self._persist_events( + item.events_and_contexts, + backfilled=item.backfilled, + ) self._event_persist_queue.handle_queue(room_id, persisting_queue) From c3f79c9da56931453ab86a4c726da5a02f18fe1e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 12 Mar 2018 16:17:08 +0000 Subject: [PATCH 290/348] Split out edu/query registration to a separate class --- synapse/federation/federation_server.py | 117 ++++++++++++++---------- synapse/handlers/device.py | 6 +- synapse/handlers/devicemessage.py | 2 +- synapse/handlers/directory.py | 2 +- synapse/handlers/e2e_keys.py | 2 +- synapse/handlers/presence.py | 10 +- synapse/handlers/profile.py | 2 +- synapse/handlers/receipts.py | 2 +- synapse/handlers/typing.py | 2 +- synapse/server.py | 5 + 10 files changed, 90 insertions(+), 60 deletions(-) diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 9849953c9b..5b1914f2f4 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -17,7 +17,7 @@ import logging import simplejson as json from twisted.internet import defer -from synapse.api.errors import AuthError, FederationError, SynapseError +from synapse.api.errors import AuthError, FederationError, SynapseError, NotFoundError from synapse.crypto.event_signing import compute_event_signature from synapse.federation.federation_base import ( FederationBase, @@ -56,6 +56,8 @@ class FederationServer(FederationBase): self._server_linearizer = async.Linearizer("fed_server") self._transaction_linearizer = async.Linearizer("fed_txn_handler") + self.registry = hs.get_federation_registry() + # We cache responses to state queries, as they take a while and often # come in waves. self._state_resp_cache = ResponseCache(hs, timeout_ms=30000) @@ -67,35 +69,6 @@ class FederationServer(FederationBase): """ self.handler = handler - def register_edu_handler(self, edu_type, handler): - if edu_type in self.edu_handlers: - raise KeyError("Already have an EDU handler for %s" % (edu_type,)) - - self.edu_handlers[edu_type] = handler - - def register_query_handler(self, query_type, handler): - """Sets the handler callable that will be used to handle an incoming - federation Query of the given type. - - Args: - query_type (str): Category name of the query, which should match - the string used by make_query. - handler (callable): Invoked to handle incoming queries of this type - - handler is invoked as: - result = handler(args) - - where 'args' is a dict mapping strings to strings of the query - arguments. It should return a Deferred that will eventually yield an - object to encode as JSON. - """ - if query_type in self.query_handlers: - raise KeyError( - "Already have a Query handler for %s" % (query_type,) - ) - - self.query_handlers[query_type] = handler - @defer.inlineCallbacks @log_function def on_backfill_request(self, origin, room_id, versions, limit): @@ -229,16 +202,7 @@ class FederationServer(FederationBase): @defer.inlineCallbacks def received_edu(self, origin, edu_type, content): received_edus_counter.inc() - - if edu_type in self.edu_handlers: - try: - yield self.edu_handlers[edu_type](origin, content) - except SynapseError as e: - logger.info("Failed to handle edu %r: %r", edu_type, e) - except Exception as e: - logger.exception("Failed to handle edu %r", edu_type) - else: - logger.warn("Received EDU of type %s with no handler", edu_type) + yield self.registry.on_edu(edu_type, origin, content) @defer.inlineCallbacks @log_function @@ -328,14 +292,8 @@ class FederationServer(FederationBase): @defer.inlineCallbacks def on_query_request(self, query_type, args): received_queries_counter.inc(query_type) - - if query_type in self.query_handlers: - response = yield self.query_handlers[query_type](args) - defer.returnValue((200, response)) - else: - defer.returnValue( - (404, "No handler for Query type '%s'" % (query_type,)) - ) + resp = yield self.registry.on_query(query_type, args) + defer.returnValue((200, resp)) @defer.inlineCallbacks def on_make_join_request(self, room_id, user_id): @@ -607,3 +565,66 @@ class FederationServer(FederationBase): origin, room_id, event_dict ) defer.returnValue(ret) + + +class FederationHandlerRegistry(object): + """Allows classes to register themselves as handlers for a given EDU or + query type for incoming federation traffic. + """ + def __init__(self): + self.edu_handlers = {} + self.query_handlers = {} + + def register_edu_handler(self, edu_type, handler): + """Sets the handler callable that will be used to handle an incoming + federation EDU of the given type. + + Args: + edu_type (str): The type of the incoming EDU to register handler for + handler (Callable[str, dict]): A callable invoked on incoming EDU + of the given type. The arguments are the origin server name and + the EDU contents. + """ + if edu_type in self.edu_handlers: + raise KeyError("Already have an EDU handler for %s" % (edu_type,)) + + self.edu_handlers[edu_type] = handler + + def register_query_handler(self, query_type, handler): + """Sets the handler callable that will be used to handle an incoming + federation query of the given type. + + Args: + query_type (str): Category name of the query, which should match + the string used by make_query. + handler (Callable[dict] -> Deferred[dict]): Invoked to handle + incoming queries of this type. The return will be yielded + on and the result used as the response to the query request. + """ + if query_type in self.query_handlers: + raise KeyError( + "Already have a Query handler for %s" % (query_type,) + ) + + self.query_handlers[query_type] = handler + + @defer.inlineCallbacks + def on_edu(self, edu_type, origin, content): + handler = self.edu_handlers.get(edu_type) + if not handler: + logger.warn("No handler registered for EDU type %s", edu_type) + + try: + yield handler(origin, content) + except SynapseError as e: + logger.info("Failed to handle edu %r: %r", edu_type, e) + except Exception as e: + logger.exception("Failed to handle edu %r", edu_type) + + def on_query(self, query_type, args): + handler = self.query_handlers.get(query_type) + if not handler: + logger.warn("No handler registered for query type %s", query_type) + raise NotFoundError("No handler for Query type '%s'" % (query_type,)) + + return handler(args) diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index 0e83453851..9e58dbe64e 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -41,10 +41,12 @@ class DeviceHandler(BaseHandler): self._edu_updater = DeviceListEduUpdater(hs, self) - self.federation.register_edu_handler( + federation_registry = hs.get_federation_registry() + + federation_registry.register_edu_handler( "m.device_list_update", self._edu_updater.incoming_device_list_update, ) - self.federation.register_query_handler( + federation_registry.register_query_handler( "user_devices", self.on_federation_query_user_devices, ) diff --git a/synapse/handlers/devicemessage.py b/synapse/handlers/devicemessage.py index d996aa90bb..f147a20b73 100644 --- a/synapse/handlers/devicemessage.py +++ b/synapse/handlers/devicemessage.py @@ -37,7 +37,7 @@ class DeviceMessageHandler(object): self.is_mine = hs.is_mine self.federation = hs.get_federation_sender() - hs.get_replication_layer().register_edu_handler( + hs.get_federation_registry().register_edu_handler( "m.direct_to_device", self.on_direct_to_device_edu ) diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index 8580ada60a..e955cb1f3c 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -37,7 +37,7 @@ class DirectoryHandler(BaseHandler): self.event_creation_handler = hs.get_event_creation_handler() self.federation = hs.get_replication_layer() - self.federation.register_query_handler( + hs.get_federation_registry().register_query_handler( "directory", self.on_directory_query ) diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 9aa95f89e6..57f50a4e27 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -40,7 +40,7 @@ class E2eKeysHandler(object): # doesn't really work as part of the generic query API, because the # query request requires an object POST, but we abuse the # "query handler" interface. - self.federation.register_query_handler( + hs.get_federation_registry().register_query_handler( "client_keys", self.on_federation_query_client_keys ) diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index cb158ba962..b11ae78350 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -98,24 +98,26 @@ class PresenceHandler(object): self.state = hs.get_state_handler() - self.replication.register_edu_handler( + federation_registry = hs.get_federation_registry() + + federation_registry.register_edu_handler( "m.presence", self.incoming_presence ) - self.replication.register_edu_handler( + federation_registry.register_edu_handler( "m.presence_invite", lambda origin, content: self.invite_presence( observed_user=UserID.from_string(content["observed_user"]), observer_user=UserID.from_string(content["observer_user"]), ) ) - self.replication.register_edu_handler( + federation_registry.register_edu_handler( "m.presence_accept", lambda origin, content: self.accept_presence( observed_user=UserID.from_string(content["observed_user"]), observer_user=UserID.from_string(content["observer_user"]), ) ) - self.replication.register_edu_handler( + federation_registry.register_edu_handler( "m.presence_deny", lambda origin, content: self.deny_presence( observed_user=UserID.from_string(content["observed_user"]), diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py index c9c2879038..c386c79bbd 100644 --- a/synapse/handlers/profile.py +++ b/synapse/handlers/profile.py @@ -32,7 +32,7 @@ class ProfileHandler(BaseHandler): super(ProfileHandler, self).__init__(hs) self.federation = hs.get_replication_layer() - self.federation.register_query_handler( + hs.get_federation_registry().register_query_handler( "profile", self.on_profile_query ) diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index 0525765272..3f215c2b4e 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -35,7 +35,7 @@ class ReceiptsHandler(BaseHandler): self.store = hs.get_datastore() self.hs = hs self.federation = hs.get_federation_sender() - hs.get_replication_layer().register_edu_handler( + hs.get_federation_registry().register_edu_handler( "m.receipt", self._received_remote_receipt ) self.clock = self.hs.get_clock() diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py index 82dedbbc99..77c0cf146f 100644 --- a/synapse/handlers/typing.py +++ b/synapse/handlers/typing.py @@ -56,7 +56,7 @@ class TypingHandler(object): self.federation = hs.get_federation_sender() - hs.get_replication_layer().register_edu_handler("m.typing", self._recv_edu) + hs.get_federation_registry().register_edu_handler("m.typing", self._recv_edu) hs.get_distributor().observe("user_left_room", self.user_left_room) diff --git a/synapse/server.py b/synapse/server.py index 5b6effbe31..1bc8d6f702 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -34,6 +34,7 @@ from synapse.events.builder import EventBuilderFactory from synapse.events.spamcheck import SpamChecker from synapse.federation import initialize_http_replication from synapse.federation.send_queue import FederationRemoteSendQueue +from synapse.federation.federation_server import FederationHandlerRegistry from synapse.federation.transport.client import TransportLayerClient from synapse.federation.transaction_queue import TransactionQueue from synapse.handlers import Handlers @@ -147,6 +148,7 @@ class HomeServer(object): 'groups_attestation_renewer', 'spam_checker', 'room_member_handler', + 'federation_registry', ] def __init__(self, hostname, **kwargs): @@ -387,6 +389,9 @@ class HomeServer(object): def build_room_member_handler(self): return RoomMemberHandler(self) + def build_federation_registry(self): + return FederationHandlerRegistry() + def remove_pusher(self, app_id, push_key, user_id): return self.get_pusherpool().remove_pusher(app_id, push_key, user_id) From 631a73f7ef7d89c43be47cf01c7c27a1d633052d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 10:39:19 +0000 Subject: [PATCH 291/348] Fix tests --- tests/handlers/test_directory.py | 9 ++++----- tests/handlers/test_profile.py | 9 ++++----- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/tests/handlers/test_directory.py b/tests/handlers/test_directory.py index 5712773909..b103921498 100644 --- a/tests/handlers/test_directory.py +++ b/tests/handlers/test_directory.py @@ -35,21 +35,20 @@ class DirectoryTestCase(unittest.TestCase): @defer.inlineCallbacks def setUp(self): - self.mock_federation = Mock(spec=[ - "make_query", - "register_edu_handler", - ]) + self.mock_federation = Mock() + self.mock_registry = Mock() self.query_handlers = {} def register_query_handler(query_type, handler): self.query_handlers[query_type] = handler - self.mock_federation.register_query_handler = register_query_handler + self.mock_registry.register_query_handler = register_query_handler hs = yield setup_test_homeserver( http_client=None, resource_for_federation=Mock(), replication_layer=self.mock_federation, + federation_registry=self.mock_registry, ) hs.handlers = DirectoryHandlers(hs) diff --git a/tests/handlers/test_profile.py b/tests/handlers/test_profile.py index a5f47181d7..73223ffbd3 100644 --- a/tests/handlers/test_profile.py +++ b/tests/handlers/test_profile.py @@ -37,23 +37,22 @@ class ProfileTestCase(unittest.TestCase): @defer.inlineCallbacks def setUp(self): - self.mock_federation = Mock(spec=[ - "make_query", - "register_edu_handler", - ]) + self.mock_federation = Mock() + self.mock_registry = Mock() self.query_handlers = {} def register_query_handler(query_type, handler): self.query_handlers[query_type] = handler - self.mock_federation.register_query_handler = register_query_handler + self.mock_registry.register_query_handler = register_query_handler hs = yield setup_test_homeserver( http_client=None, handlers=None, resource_for_federation=Mock(), replication_layer=self.mock_federation, + federation_registry=self.mock_registry, ratelimiter=NonCallableMock(spec_set=[ "send_message", ]) From e05bf34117de19705b36a4803085ea93f7381928 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 12 Mar 2018 14:07:39 +0000 Subject: [PATCH 292/348] Move property setting from ReplicationLayer to FederationBase --- synapse/federation/federation_base.py | 6 ++++++ synapse/federation/federation_client.py | 1 + synapse/federation/federation_server.py | 6 ++++++ synapse/federation/replication.py | 22 ---------------------- 4 files changed, 13 insertions(+), 22 deletions(-) diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py index 7918d3e442..79eaa31031 100644 --- a/synapse/federation/federation_base.py +++ b/synapse/federation/federation_base.py @@ -27,7 +27,13 @@ logger = logging.getLogger(__name__) class FederationBase(object): def __init__(self, hs): + self.hs = hs + + self.server_name = hs.hostname + self.keyring = hs.get_keyring() self.spam_checker = hs.get_spam_checker() + self.store = hs.get_datastore() + self._clock = hs.get_clock() @defer.inlineCallbacks def _check_sigs_and_hash_and_fetch(self, origin, pdus, outlier=False, diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 813907f7f2..38440da5b5 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -58,6 +58,7 @@ class FederationClient(FederationBase): self._clear_tried_cache, 60 * 1000, ) self.state = hs.get_state_handler() + self.transport_layer = hs.get_federation_transport_client() def _clear_tried_cache(self): """Clear pdu_destination_tried cache""" diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 5b1914f2f4..dd73fc50b2 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -23,6 +23,8 @@ from synapse.federation.federation_base import ( FederationBase, event_from_pdu_json, ) + +from synapse.federation.persistence import TransactionActions from synapse.federation.units import Edu, Transaction import synapse.metrics from synapse.types import get_domain_from_id @@ -56,6 +58,10 @@ class FederationServer(FederationBase): self._server_linearizer = async.Linearizer("fed_server") self._transaction_linearizer = async.Linearizer("fed_txn_handler") + self.transaction_actions = TransactionActions(self.store) + + self.handler = None + self.registry = hs.get_federation_registry() # We cache responses to state queries, as they take a while and often diff --git a/synapse/federation/replication.py b/synapse/federation/replication.py index 62d865ec4b..b8b3a3f933 100644 --- a/synapse/federation/replication.py +++ b/synapse/federation/replication.py @@ -20,8 +20,6 @@ a given transport. from .federation_client import FederationClient from .federation_server import FederationServer -from .persistence import TransactionActions - import logging @@ -47,26 +45,6 @@ class ReplicationLayer(FederationClient, FederationServer): """ def __init__(self, hs, transport_layer): - self.server_name = hs.hostname - - self.keyring = hs.get_keyring() - - self.transport_layer = transport_layer - - self.federation_client = self - - self.store = hs.get_datastore() - - self.handler = None - self.edu_handlers = {} - self.query_handlers = {} - - self._clock = hs.get_clock() - - self.transaction_actions = TransactionActions(self.store) - - self.hs = hs - super(ReplicationLayer, self).__init__(hs) def __str__(self): From 265b993b8afd2501b2aa3a50670f39d6d97eddb7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 12 Mar 2018 14:34:31 +0000 Subject: [PATCH 293/348] Split replication layer into two --- synapse/app/homeserver.py | 2 +- synapse/federation/federation_server.py | 10 +--------- synapse/federation/transport/server.py | 2 +- synapse/handlers/device.py | 3 +-- synapse/handlers/directory.py | 2 +- synapse/handlers/e2e_keys.py | 2 +- synapse/handlers/federation.py | 4 +--- synapse/handlers/presence.py | 1 - synapse/handlers/profile.py | 2 +- synapse/handlers/room_list.py | 2 +- synapse/handlers/room_member.py | 3 +-- synapse/server.py | 13 +++++++++---- 12 files changed, 19 insertions(+), 27 deletions(-) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index e375f2bbcf..503f461ab4 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -348,7 +348,7 @@ def setup(config_options): hs.get_state_handler().start_caching() hs.get_datastore().start_profiling() hs.get_datastore().start_doing_background_updates() - hs.get_replication_layer().start_get_pdu_cache() + hs.get_replication_client().start_get_pdu_cache() register_memory_metrics(hs) diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index dd73fc50b2..740ef96280 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -54,27 +54,19 @@ class FederationServer(FederationBase): super(FederationServer, self).__init__(hs) self.auth = hs.get_auth() + self.handler = hs.get_handlers().federation_handler self._server_linearizer = async.Linearizer("fed_server") self._transaction_linearizer = async.Linearizer("fed_txn_handler") self.transaction_actions = TransactionActions(self.store) - self.handler = None - self.registry = hs.get_federation_registry() # We cache responses to state queries, as they take a while and often # come in waves. self._state_resp_cache = ResponseCache(hs, timeout_ms=30000) - def set_handler(self, handler): - """Sets the handler that the replication layer will use to communicate - receipt of new PDUs from other home servers. The required methods are - documented on :py:class:`.ReplicationHandler`. - """ - self.handler = handler - @defer.inlineCallbacks @log_function def on_backfill_request(self, origin, room_id, versions, limit): diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 06c16ba4fa..04b83e691a 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -1190,7 +1190,7 @@ GROUP_ATTESTATION_SERVLET_CLASSES = ( def register_servlets(hs, resource, authenticator, ratelimiter): for servletclass in FEDERATION_SERVLET_CLASSES: servletclass( - handler=hs.get_replication_layer(), + handler=hs.get_replication_server(), authenticator=authenticator, ratelimiter=ratelimiter, server_name=hs.hostname, diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index 9e58dbe64e..fcf41630d6 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -37,7 +37,6 @@ class DeviceHandler(BaseHandler): self.state = hs.get_state_handler() self._auth_handler = hs.get_auth_handler() self.federation_sender = hs.get_federation_sender() - self.federation = hs.get_replication_layer() self._edu_updater = DeviceListEduUpdater(hs, self) @@ -432,7 +431,7 @@ class DeviceListEduUpdater(object): def __init__(self, hs, device_handler): self.store = hs.get_datastore() - self.federation = hs.get_replication_layer() + self.federation = hs.get_replication_client() self.clock = hs.get_clock() self.device_handler = device_handler diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index e955cb1f3c..dfe04eb1c1 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -36,7 +36,7 @@ class DirectoryHandler(BaseHandler): self.appservice_handler = hs.get_application_service_handler() self.event_creation_handler = hs.get_event_creation_handler() - self.federation = hs.get_replication_layer() + self.federation = hs.get_replication_client() hs.get_federation_registry().register_query_handler( "directory", self.on_directory_query ) diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 57f50a4e27..0ca8d036ee 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -32,7 +32,7 @@ logger = logging.getLogger(__name__) class E2eKeysHandler(object): def __init__(self, hs): self.store = hs.get_datastore() - self.federation = hs.get_replication_layer() + self.federation = hs.get_replication_client() self.device_handler = hs.get_device_handler() self.is_mine = hs.is_mine self.clock = hs.get_clock() diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 520612683e..cfd4379160 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -68,7 +68,7 @@ class FederationHandler(BaseHandler): self.hs = hs self.store = hs.get_datastore() - self.replication_layer = hs.get_replication_layer() + self.replication_layer = hs.get_replication_client() self.state_handler = hs.get_state_handler() self.server_name = hs.hostname self.keyring = hs.get_keyring() @@ -78,8 +78,6 @@ class FederationHandler(BaseHandler): self.spam_checker = hs.get_spam_checker() self.event_creation_handler = hs.get_event_creation_handler() - self.replication_layer.set_handler(self) - # When joining a room we need to queue any events for that room up self.room_queues = {} self._room_pdu_linearizer = Linearizer("fed_room_pdu") diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index b11ae78350..a5e501897c 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -93,7 +93,6 @@ class PresenceHandler(object): self.store = hs.get_datastore() self.wheel_timer = WheelTimer() self.notifier = hs.get_notifier() - self.replication = hs.get_replication_layer() self.federation = hs.get_federation_sender() self.state = hs.get_state_handler() diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py index c386c79bbd..0cfac60d74 100644 --- a/synapse/handlers/profile.py +++ b/synapse/handlers/profile.py @@ -31,7 +31,7 @@ class ProfileHandler(BaseHandler): def __init__(self, hs): super(ProfileHandler, self).__init__(hs) - self.federation = hs.get_replication_layer() + self.federation = hs.get_replication_client() hs.get_federation_registry().register_query_handler( "profile", self.on_profile_query ) diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index dfa09141ed..f79bd8902f 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -409,7 +409,7 @@ class RoomListHandler(BaseHandler): def _get_remote_list_cached(self, server_name, limit=None, since_token=None, search_filter=None, include_all_networks=False, third_party_instance_id=None,): - repl_layer = self.hs.get_replication_layer() + repl_layer = self.hs.get_replication_client() if search_filter: # We can't cache when asking for search return repl_layer.get_public_rooms( diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index ed3b97730d..e2f0527712 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -55,7 +55,6 @@ class RoomMemberHandler(object): self.registration_handler = hs.get_handlers().registration_handler self.profile_handler = hs.get_profile_handler() self.event_creation_hander = hs.get_event_creation_handler() - self.replication_layer = hs.get_replication_layer() self.member_linearizer = Linearizer(name="member") @@ -212,7 +211,7 @@ class RoomMemberHandler(object): # if this is a join with a 3pid signature, we may need to turn a 3pid # invite into a normal invite before we can handle the join. if third_party_signed is not None: - yield self.replication_layer.exchange_third_party_invite( + yield self.federation_handler.exchange_third_party_invite( third_party_signed["sender"], target.to_string(), room_id, diff --git a/synapse/server.py b/synapse/server.py index 1bc8d6f702..894e9c2acf 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -32,7 +32,8 @@ from synapse.appservice.scheduler import ApplicationServiceScheduler from synapse.crypto.keyring import Keyring from synapse.events.builder import EventBuilderFactory from synapse.events.spamcheck import SpamChecker -from synapse.federation import initialize_http_replication +from synapse.federation.federation_client import FederationClient +from synapse.federation.federation_server import FederationServer from synapse.federation.send_queue import FederationRemoteSendQueue from synapse.federation.federation_server import FederationHandlerRegistry from synapse.federation.transport.client import TransportLayerClient @@ -100,7 +101,8 @@ class HomeServer(object): DEPENDENCIES = [ 'http_client', 'db_pool', - 'replication_layer', + 'replication_client', + 'replication_server', 'handlers', 'v1auth', 'auth', @@ -197,8 +199,11 @@ class HomeServer(object): def get_ratelimiter(self): return self.ratelimiter - def build_replication_layer(self): - return initialize_http_replication(self) + def build_replication_client(self): + return FederationClient(self) + + def build_replication_server(self): + return FederationServer(self) def build_handlers(self): return Handlers(self) From 6ea27fafad7c290b8f082fedfa8ff7948cf9f1fd Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 10:15:49 +0000 Subject: [PATCH 294/348] Fix tests --- tests/handlers/test_directory.py | 2 +- tests/handlers/test_e2e_keys.py | 2 +- tests/handlers/test_profile.py | 3 ++- tests/handlers/test_typing.py | 2 +- tests/replication/slave/storage/_base.py | 2 +- tests/rest/client/v1/test_events.py | 2 +- tests/rest/client/v1/test_profile.py | 2 +- tests/rest/client/v1/test_rooms.py | 16 ++++++++-------- tests/rest/client/v1/test_typing.py | 2 +- tests/storage/test_appservice.py | 10 +++++----- 10 files changed, 22 insertions(+), 21 deletions(-) diff --git a/tests/handlers/test_directory.py b/tests/handlers/test_directory.py index b103921498..b4f36b27a6 100644 --- a/tests/handlers/test_directory.py +++ b/tests/handlers/test_directory.py @@ -47,7 +47,7 @@ class DirectoryTestCase(unittest.TestCase): hs = yield setup_test_homeserver( http_client=None, resource_for_federation=Mock(), - replication_layer=self.mock_federation, + replication_client=self.mock_federation, federation_registry=self.mock_registry, ) hs.handlers = DirectoryHandlers(hs) diff --git a/tests/handlers/test_e2e_keys.py b/tests/handlers/test_e2e_keys.py index d92bf240b1..fe73f2b96c 100644 --- a/tests/handlers/test_e2e_keys.py +++ b/tests/handlers/test_e2e_keys.py @@ -34,7 +34,7 @@ class E2eKeysHandlerTestCase(unittest.TestCase): def setUp(self): self.hs = yield utils.setup_test_homeserver( handlers=None, - replication_layer=mock.Mock(), + replication_client=mock.Mock(), ) self.handler = synapse.handlers.e2e_keys.E2eKeysHandler(self.hs) diff --git a/tests/handlers/test_profile.py b/tests/handlers/test_profile.py index 73223ffbd3..c690437682 100644 --- a/tests/handlers/test_profile.py +++ b/tests/handlers/test_profile.py @@ -51,7 +51,8 @@ class ProfileTestCase(unittest.TestCase): http_client=None, handlers=None, resource_for_federation=Mock(), - replication_layer=self.mock_federation, + replication_client=self.mock_federation, + replication_server=Mock(), federation_registry=self.mock_registry, ratelimiter=NonCallableMock(spec_set=[ "send_message", diff --git a/tests/handlers/test_typing.py b/tests/handlers/test_typing.py index fcd380b03a..a433bbfa8a 100644 --- a/tests/handlers/test_typing.py +++ b/tests/handlers/test_typing.py @@ -81,7 +81,7 @@ class TypingNotificationsTestCase(unittest.TestCase): "get_current_state_deltas", ]), state_handler=self.state_handler, - handlers=None, + handlers=Mock(), notifier=mock_notifier, resource_for_client=Mock(), resource_for_federation=self.mock_federation_resource, diff --git a/tests/replication/slave/storage/_base.py b/tests/replication/slave/storage/_base.py index 74f104e3b8..ceffdaad54 100644 --- a/tests/replication/slave/storage/_base.py +++ b/tests/replication/slave/storage/_base.py @@ -31,7 +31,7 @@ class BaseSlavedStoreTestCase(unittest.TestCase): self.hs = yield setup_test_homeserver( "blue", http_client=None, - replication_layer=Mock(), + replication_client=Mock(), ratelimiter=NonCallableMock(spec_set=[ "send_message", ]), diff --git a/tests/rest/client/v1/test_events.py b/tests/rest/client/v1/test_events.py index e9698bfdc9..f04bf7dfde 100644 --- a/tests/rest/client/v1/test_events.py +++ b/tests/rest/client/v1/test_events.py @@ -114,7 +114,7 @@ class EventStreamPermissionsTestCase(RestTestCase): hs = yield setup_test_homeserver( http_client=None, - replication_layer=Mock(), + replication_client=Mock(), ratelimiter=NonCallableMock(spec_set=[ "send_message", ]), diff --git a/tests/rest/client/v1/test_profile.py b/tests/rest/client/v1/test_profile.py index dddcf51b69..feddcf024e 100644 --- a/tests/rest/client/v1/test_profile.py +++ b/tests/rest/client/v1/test_profile.py @@ -45,7 +45,7 @@ class ProfileTestCase(unittest.TestCase): http_client=None, resource_for_client=self.mock_resource, federation=Mock(), - replication_layer=Mock(), + replication_client=Mock(), profile_handler=self.mock_handler ) diff --git a/tests/rest/client/v1/test_rooms.py b/tests/rest/client/v1/test_rooms.py index 9f37255381..2c0708b0d8 100644 --- a/tests/rest/client/v1/test_rooms.py +++ b/tests/rest/client/v1/test_rooms.py @@ -46,7 +46,7 @@ class RoomPermissionsTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_layer=Mock(), + replication_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.ratelimiter = hs.get_ratelimiter() @@ -409,7 +409,7 @@ class RoomsMemberListTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_layer=Mock(), + replication_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.ratelimiter = hs.get_ratelimiter() @@ -493,7 +493,7 @@ class RoomsCreateTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_layer=Mock(), + replication_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.ratelimiter = hs.get_ratelimiter() @@ -582,7 +582,7 @@ class RoomTopicTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_layer=Mock(), + replication_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.ratelimiter = hs.get_ratelimiter() @@ -697,7 +697,7 @@ class RoomMemberStateTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_layer=Mock(), + replication_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.ratelimiter = hs.get_ratelimiter() @@ -829,7 +829,7 @@ class RoomMessagesTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_layer=Mock(), + replication_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.ratelimiter = hs.get_ratelimiter() @@ -929,7 +929,7 @@ class RoomInitialSyncTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_layer=Mock(), + replication_client=Mock(), ratelimiter=NonCallableMock(spec_set=[ "send_message", ]), @@ -1003,7 +1003,7 @@ class RoomMessageListTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_layer=Mock(), + replication_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.ratelimiter = hs.get_ratelimiter() diff --git a/tests/rest/client/v1/test_typing.py b/tests/rest/client/v1/test_typing.py index e46534cd35..62639e3adc 100644 --- a/tests/rest/client/v1/test_typing.py +++ b/tests/rest/client/v1/test_typing.py @@ -47,7 +47,7 @@ class RoomTypingTestCase(RestTestCase): "red", clock=self.clock, http_client=None, - replication_layer=Mock(), + replication_client=Mock(), ratelimiter=NonCallableMock(spec_set=[ "send_message", ]), diff --git a/tests/storage/test_appservice.py b/tests/storage/test_appservice.py index 13d81f972b..cc0df7f662 100644 --- a/tests/storage/test_appservice.py +++ b/tests/storage/test_appservice.py @@ -42,7 +42,7 @@ class ApplicationServiceStoreTestCase(unittest.TestCase): hs = yield setup_test_homeserver( config=config, federation_sender=Mock(), - replication_layer=Mock(), + replication_client=Mock(), ) self.as_token = "token1" @@ -119,7 +119,7 @@ class ApplicationServiceTransactionStoreTestCase(unittest.TestCase): hs = yield setup_test_homeserver( config=config, federation_sender=Mock(), - replication_layer=Mock(), + replication_client=Mock(), ) self.db_pool = hs.get_db_pool() @@ -455,7 +455,7 @@ class ApplicationServiceStoreConfigTestCase(unittest.TestCase): config=config, datastore=Mock(), federation_sender=Mock(), - replication_layer=Mock(), + replication_client=Mock(), ) ApplicationServiceStore(None, hs) @@ -473,7 +473,7 @@ class ApplicationServiceStoreConfigTestCase(unittest.TestCase): config=config, datastore=Mock(), federation_sender=Mock(), - replication_layer=Mock(), + replication_client=Mock(), ) with self.assertRaises(ConfigError) as cm: @@ -497,7 +497,7 @@ class ApplicationServiceStoreConfigTestCase(unittest.TestCase): config=config, datastore=Mock(), federation_sender=Mock(), - replication_layer=Mock(), + replication_client=Mock(), ) with self.assertRaises(ConfigError) as cm: From ea7b3c4b1b829703a780418e6bb6b7860a5b5451 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 11:00:04 +0000 Subject: [PATCH 295/348] Remove unused ReplicationLayer --- synapse/federation/__init__.py | 8 ----- synapse/federation/replication.py | 51 ------------------------------- 2 files changed, 59 deletions(-) delete mode 100644 synapse/federation/replication.py diff --git a/synapse/federation/__init__.py b/synapse/federation/__init__.py index 2e32d245ba..f5f0bdfca3 100644 --- a/synapse/federation/__init__.py +++ b/synapse/federation/__init__.py @@ -15,11 +15,3 @@ """ This package includes all the federation specific logic. """ - -from .replication import ReplicationLayer - - -def initialize_http_replication(hs): - transport = hs.get_federation_transport_client() - - return ReplicationLayer(hs, transport) diff --git a/synapse/federation/replication.py b/synapse/federation/replication.py deleted file mode 100644 index b8b3a3f933..0000000000 --- a/synapse/federation/replication.py +++ /dev/null @@ -1,51 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2014-2016 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""This layer is responsible for replicating with remote home servers using -a given transport. -""" - -from .federation_client import FederationClient -from .federation_server import FederationServer - -import logging - - -logger = logging.getLogger(__name__) - - -class ReplicationLayer(FederationClient, FederationServer): - """This layer is responsible for replicating with remote home servers over - the given transport. I.e., does the sending and receiving of PDUs to - remote home servers. - - The layer communicates with the rest of the server via a registered - ReplicationHandler. - - In more detail, the layer: - * Receives incoming data and processes it into transactions and pdus. - * Fetches any PDUs it thinks it might have missed. - * Keeps the current state for contexts up to date by applying the - suitable conflict resolution. - * Sends outgoing pdus wrapped in transactions. - * Fills out the references to previous pdus/transactions appropriately - for outgoing data. - """ - - def __init__(self, hs, transport_layer): - super(ReplicationLayer, self).__init__(hs) - - def __str__(self): - return "" % self.server_name From d023ecb8109718cd95c4dd86e916a459fc610547 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 11:08:10 +0000 Subject: [PATCH 296/348] Don't build handlers on workers unnecessarily --- synapse/app/client_reader.py | 1 - synapse/app/event_creator.py | 1 - synapse/app/federation_reader.py | 1 - synapse/app/frontend_proxy.py | 1 - synapse/app/media_repository.py | 1 - 5 files changed, 5 deletions(-) diff --git a/synapse/app/client_reader.py b/synapse/app/client_reader.py index 3b3352798d..0a8ce9bc66 100644 --- a/synapse/app/client_reader.py +++ b/synapse/app/client_reader.py @@ -156,7 +156,6 @@ def start(config_options): ) ss.setup() - ss.get_handlers() ss.start_listening(config.worker_listeners) def start(): diff --git a/synapse/app/event_creator.py b/synapse/app/event_creator.py index fc0b9e8c04..eb593c5278 100644 --- a/synapse/app/event_creator.py +++ b/synapse/app/event_creator.py @@ -161,7 +161,6 @@ def start(config_options): ) ss.setup() - ss.get_handlers() ss.start_listening(config.worker_listeners) def start(): diff --git a/synapse/app/federation_reader.py b/synapse/app/federation_reader.py index 4de43c41f0..20d157911b 100644 --- a/synapse/app/federation_reader.py +++ b/synapse/app/federation_reader.py @@ -144,7 +144,6 @@ def start(config_options): ) ss.setup() - ss.get_handlers() ss.start_listening(config.worker_listeners) def start(): diff --git a/synapse/app/frontend_proxy.py b/synapse/app/frontend_proxy.py index e32ee8fe93..816c080d18 100644 --- a/synapse/app/frontend_proxy.py +++ b/synapse/app/frontend_proxy.py @@ -211,7 +211,6 @@ def start(config_options): ) ss.setup() - ss.get_handlers() ss.start_listening(config.worker_listeners) def start(): diff --git a/synapse/app/media_repository.py b/synapse/app/media_repository.py index 1ed1ca8772..84c5791b3b 100644 --- a/synapse/app/media_repository.py +++ b/synapse/app/media_repository.py @@ -158,7 +158,6 @@ def start(config_options): ) ss.setup() - ss.get_handlers() ss.start_listening(config.worker_listeners) def start(): From 31becf4ac3a1c8c675ecab481b07cffb9aa24fd8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 10:28:52 +0000 Subject: [PATCH 297/348] Make functions private that can be --- synapse/handlers/room_member.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index ed3b97730d..2a6b7e9f8c 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -138,7 +138,7 @@ class RoomMemberHandler(object): defer.returnValue(event) @defer.inlineCallbacks - def remote_join(self, remote_room_hosts, room_id, user, content): + def _remote_join(self, remote_room_hosts, room_id, user, content): if len(remote_room_hosts) == 0: raise SynapseError(404, "No known servers") @@ -292,7 +292,7 @@ class RoomMemberHandler(object): raise AuthError(403, "Guest access not allowed") if not is_host_in_room: - inviter = yield self.get_inviter(target.to_string(), room_id) + inviter = yield self._get_inviter(target.to_string(), room_id) if inviter and not self.hs.is_mine(inviter): remote_room_hosts.append(inviter.domain) @@ -306,7 +306,7 @@ class RoomMemberHandler(object): if requester.is_guest: content["kind"] = "guest" - ret = yield self.remote_join( + ret = yield self._remote_join( remote_room_hosts, room_id, target, content ) defer.returnValue(ret) @@ -314,7 +314,7 @@ class RoomMemberHandler(object): elif effective_membership_state == Membership.LEAVE: if not is_host_in_room: # perhaps we've been invited - inviter = yield self.get_inviter(target.to_string(), room_id) + inviter = yield self._get_inviter(target.to_string(), room_id) if not inviter: raise SynapseError(404, "Not a known room") @@ -496,7 +496,7 @@ class RoomMemberHandler(object): defer.returnValue((RoomID.from_string(room_id), servers)) @defer.inlineCallbacks - def get_inviter(self, user_id, room_id): + def _get_inviter(self, user_id, room_id): invite = yield self.store.get_invite_for_user_in_room( user_id=user_id, room_id=room_id, @@ -573,7 +573,7 @@ class RoomMemberHandler(object): if "mxid" in data: if "signatures" not in data: raise AuthError(401, "No signatures on 3pid binding") - yield self.verify_any_signature(data, id_server) + yield self._verify_any_signature(data, id_server) defer.returnValue(data["mxid"]) except IOError as e: @@ -581,7 +581,7 @@ class RoomMemberHandler(object): defer.returnValue(None) @defer.inlineCallbacks - def verify_any_signature(self, data, server_hostname): + def _verify_any_signature(self, data, server_hostname): if server_hostname not in data["signatures"]: raise AuthError(401, "No signature from server %s" % (server_hostname,)) for key_name, signature in data["signatures"][server_hostname].items(): From d0fcc48f9dfc09531619faf23d407807eec46df9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 17:39:58 +0000 Subject: [PATCH 298/348] extra_users is actually a list of UserIDs --- synapse/handlers/message.py | 2 +- synapse/replication/http/send_event.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 42aab91c50..4f97c8db79 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -667,7 +667,7 @@ class EventCreationHandler(object): event (FrozenEvent) context (EventContext) ratelimit (bool) - extra_users (list(str)): Any extra users to notify about event + extra_users (list(UserID)): Any extra users to notify about event """ try: diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py index 70f2fe456a..bbe2f967b7 100644 --- a/synapse/replication/http/send_event.py +++ b/synapse/replication/http/send_event.py @@ -25,7 +25,7 @@ from synapse.util.async import sleep from synapse.util.caches.response_cache import ResponseCache from synapse.util.logcontext import make_deferred_yieldable, preserve_fn from synapse.util.metrics import Measure -from synapse.types import Requester +from synapse.types import Requester, UserID import logging import re @@ -46,7 +46,7 @@ def send_event_to_master(client, host, port, requester, event, context, event (FrozenEvent) context (EventContext) ratelimit (bool) - extra_users (list(str)): Any extra users to notify about event + extra_users (list(UserID)): Any extra users to notify about event """ uri = "http://%s:%s/_synapse/replication/send_event/%s" % ( host, port, event.event_id, @@ -59,7 +59,7 @@ def send_event_to_master(client, host, port, requester, event, context, "context": context.serialize(event), "requester": requester.serialize(), "ratelimit": ratelimit, - "extra_users": extra_users, + "extra_users": [u.to_string() for u in extra_users], } try: @@ -143,7 +143,7 @@ class ReplicationSendEventRestServlet(RestServlet): context = yield EventContext.deserialize(self.store, content["context"]) ratelimit = content["ratelimit"] - extra_users = content["extra_users"] + extra_users = [UserID.from_string(u) for u in content["extra_users"]] if requester.user: request.authenticated_entity = requester.user.to_string() From 0f942f68c106b9d0fb89d0eaef9fa942b6d003ab Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 11:31:11 +0000 Subject: [PATCH 299/348] Factor out _remote_reject_invite in RoomMember --- synapse/handlers/room_member.py | 50 ++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index ed3b97730d..6c8acfbf09 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -154,6 +154,30 @@ class RoomMemberHandler(object): ) yield user_joined_room(self.distributor, user, room_id) + @defer.inlineCallbacks + def _remote_reject_invite(self, remote_room_hosts, room_id, target): + fed_handler = self.federation_handler + try: + ret = yield fed_handler.do_remotely_reject_invite( + remote_room_hosts, + room_id, + target.to_string(), + ) + defer.returnValue(ret) + except Exception as e: + # if we were unable to reject the exception, just mark + # it as rejected on our end and plough ahead. + # + # The 'except' clause is very broad, but we need to + # capture everything from DNS failures upwards + # + logger.warn("Failed to reject invite: %s", e) + + yield self.store.locally_reject_invite( + target.to_string(), room_id + ) + defer.returnValue({}) + @defer.inlineCallbacks def update_membership( self, @@ -328,28 +352,10 @@ class RoomMemberHandler(object): else: # send the rejection to the inviter's HS. remote_room_hosts = remote_room_hosts + [inviter.domain] - fed_handler = self.federation_handler - try: - ret = yield fed_handler.do_remotely_reject_invite( - remote_room_hosts, - room_id, - target.to_string(), - ) - defer.returnValue(ret) - except Exception as e: - # if we were unable to reject the exception, just mark - # it as rejected on our end and plough ahead. - # - # The 'except' clause is very broad, but we need to - # capture everything from DNS failures upwards - # - logger.warn("Failed to reject invite: %s", e) - - yield self.store.locally_reject_invite( - target.to_string(), room_id - ) - - defer.returnValue({}) + res = yield self._remote_reject_invite( + remote_room_hosts, room_id, target, + ) + defer.returnValue(res) res = yield self._local_membership_update( requester=requester, From f43b6d6d9b7e6f6a94ba3e1886cec6ca864fad43 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 11:29:17 +0000 Subject: [PATCH 300/348] Fix docstring types --- synapse/federation/federation_server.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 5b1914f2f4..90302c9537 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -581,7 +581,7 @@ class FederationHandlerRegistry(object): Args: edu_type (str): The type of the incoming EDU to register handler for - handler (Callable[str, dict]): A callable invoked on incoming EDU + handler (Callable[[str, dict]]): A callable invoked on incoming EDU of the given type. The arguments are the origin server name and the EDU contents. """ @@ -597,7 +597,7 @@ class FederationHandlerRegistry(object): Args: query_type (str): Category name of the query, which should match the string used by make_query. - handler (Callable[dict] -> Deferred[dict]): Invoked to handle + handler (Callable[[dict], Deferred[dict]]): Invoked to handle incoming queries of this type. The return will be yielded on and the result used as the response to the query request. """ From 8b3573a8b209c60b03d5ef7f4dfed9ccb9e9f7b3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 12:04:38 +0000 Subject: [PATCH 301/348] Refactor get_or_register_3pid_guest --- synapse/handlers/register.py | 26 ++++++++++++++++++++++---- synapse/handlers/room_member.py | 10 +++------- 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 9021d4d57f..ed5939880a 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -446,16 +446,34 @@ class RegistrationHandler(BaseHandler): return self.hs.get_auth_handler() @defer.inlineCallbacks - def guest_access_token_for(self, medium, address, inviter_user_id): + def get_or_register_3pid_guest(self, medium, address, inviter_user_id): + """Get a guest access token for a 3PID, creating a guest account if + one doesn't already exist. + + Args: + medium (str) + address (str) + inviter_user_id (str): The user ID who is trying to invite the + 3PID + + Returns: + Deferred[(str, str)]: A 2-tuple of `(user_id, access_token)` of the + 3PID guest account. + """ access_token = yield self.store.get_3pid_guest_access_token(medium, address) if access_token: - defer.returnValue(access_token) + user_info = yield self.auth.get_user_by_access_token( + access_token + ) - _, access_token = yield self.register( + defer.returnValue((user_info["user"].to_string(), access_token)) + + user_id, access_token = yield self.register( generate_token=True, make_guest=True ) access_token = yield self.store.save_or_get_3pid_guest_access_token( medium, address, access_token, inviter_user_id ) - defer.returnValue(access_token) + + defer.returnValue((user_id, access_token)) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index ed3b97730d..c3c720536e 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -735,20 +735,16 @@ class RoomMemberHandler(object): } if self.config.invite_3pid_guest: - registration_handler = self.registration_handler - guest_access_token = yield registration_handler.guest_access_token_for( + rh = self.registration_handler + guest_user_id, guest_access_token = yield rh.get_or_register_3pid_guest( medium=medium, address=address, inviter_user_id=inviter_user_id, ) - guest_user_info = yield self.auth.get_user_by_access_token( - guest_access_token - ) - invite_config.update({ "guest_access_token": guest_access_token, - "guest_user_id": guest_user_info["user"].to_string(), + "guest_user_id": guest_user_id, }) data = yield self.simple_http_client.post_urlencoded_get_json( From f5160d4a3e559ba23f3e6002a8f9172dff4b3d60 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 12:12:55 +0000 Subject: [PATCH 302/348] RoomMembershipRestServlet doesn't handle /forget Due to the order we register the REST handlers `/forget` was handled by the correct handler. --- synapse/rest/client/v1/room.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 9d745174c7..f8999d64d7 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -599,7 +599,7 @@ class RoomMembershipRestServlet(ClientV1RestServlet): def register(self, http_server): # /rooms/$roomid/[invite|join|leave] PATTERNS = ("/rooms/(?P[^/]*)/" - "(?Pjoin|invite|leave|ban|unban|kick|forget)") + "(?Pjoin|invite|leave|ban|unban|kick)") register_txn_path(self, PATTERNS, http_server) @defer.inlineCallbacks From ea3442c15c32ba98c407c71722cb80821d99d160 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 13:16:21 +0000 Subject: [PATCH 303/348] Add docstring --- synapse/handlers/room_member.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 6c8acfbf09..da35e604d0 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -139,6 +139,19 @@ class RoomMemberHandler(object): @defer.inlineCallbacks def remote_join(self, remote_room_hosts, room_id, user, content): + """Try and join a room that this server is not in + + Args: + remote_room_hosts (list[str]): List of servers that can be used + to join via. + room_id (str): Room that we are trying to join + user (UserID): User who is trying to join + content (dict): A dict that should be used as the content of the + join event. + + Returns: + Deferred + """ if len(remote_room_hosts) == 0: raise SynapseError(404, "No known servers") @@ -156,6 +169,19 @@ class RoomMemberHandler(object): @defer.inlineCallbacks def _remote_reject_invite(self, remote_room_hosts, room_id, target): + """Attempt to reject an invite for a room this server is not in. If we + fail to do so we locally mark the invite as rejected. + + Args: + remote_room_hosts (list[str]): List of servers to use to try and + reject invite + room_id (str) + target (UserID): The user rejecting the invite + + Returns: + Deferred[dict]: A dictionary to be returned to the client, may + include event_id etc, or nothing if we locally rejected + """ fed_handler = self.federation_handler try: ret = yield fed_handler.do_remotely_reject_invite( From cea462e2857d3af2007521f131adb46e4f5ce6fe Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 13:22:21 +0000 Subject: [PATCH 304/348] s/replication_server/federation_server --- synapse/federation/transport/server.py | 2 +- synapse/server.py | 4 ++-- tests/handlers/test_profile.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 04b83e691a..a66a6b0692 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -1190,7 +1190,7 @@ GROUP_ATTESTATION_SERVLET_CLASSES = ( def register_servlets(hs, resource, authenticator, ratelimiter): for servletclass in FEDERATION_SERVLET_CLASSES: servletclass( - handler=hs.get_replication_server(), + handler=hs.get_federation_server(), authenticator=authenticator, ratelimiter=ratelimiter, server_name=hs.hostname, diff --git a/synapse/server.py b/synapse/server.py index 894e9c2acf..802a793848 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -102,7 +102,7 @@ class HomeServer(object): 'http_client', 'db_pool', 'replication_client', - 'replication_server', + 'federation_server', 'handlers', 'v1auth', 'auth', @@ -202,7 +202,7 @@ class HomeServer(object): def build_replication_client(self): return FederationClient(self) - def build_replication_server(self): + def build_federation_server(self): return FederationServer(self) def build_handlers(self): diff --git a/tests/handlers/test_profile.py b/tests/handlers/test_profile.py index c690437682..f9f828471a 100644 --- a/tests/handlers/test_profile.py +++ b/tests/handlers/test_profile.py @@ -52,7 +52,7 @@ class ProfileTestCase(unittest.TestCase): handlers=None, resource_for_federation=Mock(), replication_client=self.mock_federation, - replication_server=Mock(), + federation_server=Mock(), federation_registry=self.mock_registry, ratelimiter=NonCallableMock(spec_set=[ "send_message", From cb9f8e527c09315eea05955ec970154ea2fb9729 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 13:26:52 +0000 Subject: [PATCH 305/348] s/replication_client/federation_client/ --- synapse/app/homeserver.py | 2 +- synapse/handlers/device.py | 2 +- synapse/handlers/directory.py | 2 +- synapse/handlers/e2e_keys.py | 2 +- synapse/handlers/federation.py | 2 +- synapse/handlers/profile.py | 2 +- synapse/handlers/room_list.py | 2 +- synapse/server.py | 4 ++-- tests/handlers/test_directory.py | 2 +- tests/handlers/test_e2e_keys.py | 2 +- tests/handlers/test_profile.py | 2 +- tests/replication/slave/storage/_base.py | 2 +- tests/rest/client/v1/test_events.py | 2 +- tests/rest/client/v1/test_profile.py | 2 +- tests/rest/client/v1/test_rooms.py | 16 ++++++++-------- tests/rest/client/v1/test_typing.py | 2 +- tests/storage/test_appservice.py | 10 +++++----- 17 files changed, 29 insertions(+), 29 deletions(-) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 503f461ab4..e477c7ced6 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -348,7 +348,7 @@ def setup(config_options): hs.get_state_handler().start_caching() hs.get_datastore().start_profiling() hs.get_datastore().start_doing_background_updates() - hs.get_replication_client().start_get_pdu_cache() + hs.get_federation_client().start_get_pdu_cache() register_memory_metrics(hs) diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index fcf41630d6..40f3d24678 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -431,7 +431,7 @@ class DeviceListEduUpdater(object): def __init__(self, hs, device_handler): self.store = hs.get_datastore() - self.federation = hs.get_replication_client() + self.federation = hs.get_federation_client() self.clock = hs.get_clock() self.device_handler = device_handler diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index dfe04eb1c1..c5b6e75e03 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -36,7 +36,7 @@ class DirectoryHandler(BaseHandler): self.appservice_handler = hs.get_application_service_handler() self.event_creation_handler = hs.get_event_creation_handler() - self.federation = hs.get_replication_client() + self.federation = hs.get_federation_client() hs.get_federation_registry().register_query_handler( "directory", self.on_directory_query ) diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 0ca8d036ee..31b1ece13e 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -32,7 +32,7 @@ logger = logging.getLogger(__name__) class E2eKeysHandler(object): def __init__(self, hs): self.store = hs.get_datastore() - self.federation = hs.get_replication_client() + self.federation = hs.get_federation_client() self.device_handler = hs.get_device_handler() self.is_mine = hs.is_mine self.clock = hs.get_clock() diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index cfd4379160..080aca3d71 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -68,7 +68,7 @@ class FederationHandler(BaseHandler): self.hs = hs self.store = hs.get_datastore() - self.replication_layer = hs.get_replication_client() + self.replication_layer = hs.get_federation_client() self.state_handler = hs.get_state_handler() self.server_name = hs.hostname self.keyring = hs.get_keyring() diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py index 0cfac60d74..cb710fe796 100644 --- a/synapse/handlers/profile.py +++ b/synapse/handlers/profile.py @@ -31,7 +31,7 @@ class ProfileHandler(BaseHandler): def __init__(self, hs): super(ProfileHandler, self).__init__(hs) - self.federation = hs.get_replication_client() + self.federation = hs.get_federation_client() hs.get_federation_registry().register_query_handler( "profile", self.on_profile_query ) diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index f79bd8902f..5d81f59b44 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -409,7 +409,7 @@ class RoomListHandler(BaseHandler): def _get_remote_list_cached(self, server_name, limit=None, since_token=None, search_filter=None, include_all_networks=False, third_party_instance_id=None,): - repl_layer = self.hs.get_replication_client() + repl_layer = self.hs.get_federation_client() if search_filter: # We can't cache when asking for search return repl_layer.get_public_rooms( diff --git a/synapse/server.py b/synapse/server.py index 802a793848..43c6e0a6d6 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -101,7 +101,7 @@ class HomeServer(object): DEPENDENCIES = [ 'http_client', 'db_pool', - 'replication_client', + 'federation_client', 'federation_server', 'handlers', 'v1auth', @@ -199,7 +199,7 @@ class HomeServer(object): def get_ratelimiter(self): return self.ratelimiter - def build_replication_client(self): + def build_federation_client(self): return FederationClient(self) def build_federation_server(self): diff --git a/tests/handlers/test_directory.py b/tests/handlers/test_directory.py index b4f36b27a6..7e5332e272 100644 --- a/tests/handlers/test_directory.py +++ b/tests/handlers/test_directory.py @@ -47,7 +47,7 @@ class DirectoryTestCase(unittest.TestCase): hs = yield setup_test_homeserver( http_client=None, resource_for_federation=Mock(), - replication_client=self.mock_federation, + federation_client=self.mock_federation, federation_registry=self.mock_registry, ) hs.handlers = DirectoryHandlers(hs) diff --git a/tests/handlers/test_e2e_keys.py b/tests/handlers/test_e2e_keys.py index fe73f2b96c..d1bd87b898 100644 --- a/tests/handlers/test_e2e_keys.py +++ b/tests/handlers/test_e2e_keys.py @@ -34,7 +34,7 @@ class E2eKeysHandlerTestCase(unittest.TestCase): def setUp(self): self.hs = yield utils.setup_test_homeserver( handlers=None, - replication_client=mock.Mock(), + federation_client=mock.Mock(), ) self.handler = synapse.handlers.e2e_keys.E2eKeysHandler(self.hs) diff --git a/tests/handlers/test_profile.py b/tests/handlers/test_profile.py index f9f828471a..458296ee4c 100644 --- a/tests/handlers/test_profile.py +++ b/tests/handlers/test_profile.py @@ -51,7 +51,7 @@ class ProfileTestCase(unittest.TestCase): http_client=None, handlers=None, resource_for_federation=Mock(), - replication_client=self.mock_federation, + federation_client=self.mock_federation, federation_server=Mock(), federation_registry=self.mock_registry, ratelimiter=NonCallableMock(spec_set=[ diff --git a/tests/replication/slave/storage/_base.py b/tests/replication/slave/storage/_base.py index ceffdaad54..64e07a8c93 100644 --- a/tests/replication/slave/storage/_base.py +++ b/tests/replication/slave/storage/_base.py @@ -31,7 +31,7 @@ class BaseSlavedStoreTestCase(unittest.TestCase): self.hs = yield setup_test_homeserver( "blue", http_client=None, - replication_client=Mock(), + federation_client=Mock(), ratelimiter=NonCallableMock(spec_set=[ "send_message", ]), diff --git a/tests/rest/client/v1/test_events.py b/tests/rest/client/v1/test_events.py index f04bf7dfde..2b89c0a3c7 100644 --- a/tests/rest/client/v1/test_events.py +++ b/tests/rest/client/v1/test_events.py @@ -114,7 +114,7 @@ class EventStreamPermissionsTestCase(RestTestCase): hs = yield setup_test_homeserver( http_client=None, - replication_client=Mock(), + federation_client=Mock(), ratelimiter=NonCallableMock(spec_set=[ "send_message", ]), diff --git a/tests/rest/client/v1/test_profile.py b/tests/rest/client/v1/test_profile.py index feddcf024e..deac7f100c 100644 --- a/tests/rest/client/v1/test_profile.py +++ b/tests/rest/client/v1/test_profile.py @@ -45,7 +45,7 @@ class ProfileTestCase(unittest.TestCase): http_client=None, resource_for_client=self.mock_resource, federation=Mock(), - replication_client=Mock(), + federation_client=Mock(), profile_handler=self.mock_handler ) diff --git a/tests/rest/client/v1/test_rooms.py b/tests/rest/client/v1/test_rooms.py index 2c0708b0d8..7e8966a1a8 100644 --- a/tests/rest/client/v1/test_rooms.py +++ b/tests/rest/client/v1/test_rooms.py @@ -46,7 +46,7 @@ class RoomPermissionsTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_client=Mock(), + federation_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.ratelimiter = hs.get_ratelimiter() @@ -409,7 +409,7 @@ class RoomsMemberListTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_client=Mock(), + federation_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.ratelimiter = hs.get_ratelimiter() @@ -493,7 +493,7 @@ class RoomsCreateTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_client=Mock(), + federation_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.ratelimiter = hs.get_ratelimiter() @@ -582,7 +582,7 @@ class RoomTopicTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_client=Mock(), + federation_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.ratelimiter = hs.get_ratelimiter() @@ -697,7 +697,7 @@ class RoomMemberStateTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_client=Mock(), + federation_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.ratelimiter = hs.get_ratelimiter() @@ -829,7 +829,7 @@ class RoomMessagesTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_client=Mock(), + federation_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.ratelimiter = hs.get_ratelimiter() @@ -929,7 +929,7 @@ class RoomInitialSyncTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_client=Mock(), + federation_client=Mock(), ratelimiter=NonCallableMock(spec_set=[ "send_message", ]), @@ -1003,7 +1003,7 @@ class RoomMessageListTestCase(RestTestCase): hs = yield setup_test_homeserver( "red", http_client=None, - replication_client=Mock(), + federation_client=Mock(), ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.ratelimiter = hs.get_ratelimiter() diff --git a/tests/rest/client/v1/test_typing.py b/tests/rest/client/v1/test_typing.py index 62639e3adc..2ec4ecab5b 100644 --- a/tests/rest/client/v1/test_typing.py +++ b/tests/rest/client/v1/test_typing.py @@ -47,7 +47,7 @@ class RoomTypingTestCase(RestTestCase): "red", clock=self.clock, http_client=None, - replication_client=Mock(), + federation_client=Mock(), ratelimiter=NonCallableMock(spec_set=[ "send_message", ]), diff --git a/tests/storage/test_appservice.py b/tests/storage/test_appservice.py index cc0df7f662..c2e39a7288 100644 --- a/tests/storage/test_appservice.py +++ b/tests/storage/test_appservice.py @@ -42,7 +42,7 @@ class ApplicationServiceStoreTestCase(unittest.TestCase): hs = yield setup_test_homeserver( config=config, federation_sender=Mock(), - replication_client=Mock(), + federation_client=Mock(), ) self.as_token = "token1" @@ -119,7 +119,7 @@ class ApplicationServiceTransactionStoreTestCase(unittest.TestCase): hs = yield setup_test_homeserver( config=config, federation_sender=Mock(), - replication_client=Mock(), + federation_client=Mock(), ) self.db_pool = hs.get_db_pool() @@ -455,7 +455,7 @@ class ApplicationServiceStoreConfigTestCase(unittest.TestCase): config=config, datastore=Mock(), federation_sender=Mock(), - replication_client=Mock(), + federation_client=Mock(), ) ApplicationServiceStore(None, hs) @@ -473,7 +473,7 @@ class ApplicationServiceStoreConfigTestCase(unittest.TestCase): config=config, datastore=Mock(), federation_sender=Mock(), - replication_client=Mock(), + federation_client=Mock(), ) with self.assertRaises(ConfigError) as cm: @@ -497,7 +497,7 @@ class ApplicationServiceStoreConfigTestCase(unittest.TestCase): config=config, datastore=Mock(), federation_sender=Mock(), - replication_client=Mock(), + federation_client=Mock(), ) with self.assertRaises(ConfigError) as cm: From b78717b87ba3fc248838db522f007c13d0cd8c76 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 13:49:13 +0000 Subject: [PATCH 306/348] Split RoomMemberHandler into base and master class The intention here is to split the class into the bits that can be done on workers and the bits that have to be done on the master. In future there will also be a class that can be run on the worker, which will delegate work to the master when necessary. --- synapse/handlers/room_member.py | 231 +++++++++++++++++++------------- synapse/server.py | 6 +- 2 files changed, 139 insertions(+), 98 deletions(-) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 6ee8420d1f..ce83d56451 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -14,9 +14,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - -import logging - from signedjson.key import decode_verify_key_bytes from signedjson.sign import verify_signed_json from twisted.internet import defer @@ -31,6 +28,10 @@ from synapse.types import UserID, RoomID from synapse.util.async import Linearizer from synapse.util.distributor import user_left_room, user_joined_room +import abc +import logging + + logger = logging.getLogger(__name__) id_server_scheme = "https://" @@ -42,6 +43,8 @@ class RoomMemberHandler(object): # API that takes ID strings and returns pagination chunks. These concerns # ought to be separated out a lot better. + __metaclass__ = abc.ABCMeta + def __init__(self, hs): self.hs = hs self.store = hs.get_datastore() @@ -62,9 +65,56 @@ class RoomMemberHandler(object): self.clock = hs.get_clock() self.spam_checker = hs.get_spam_checker() - self.distributor = hs.get_distributor() - self.distributor.declare("user_joined_room") - self.distributor.declare("user_left_room") + @abc.abstractmethod + def _remote_join(self, requester, remote_room_hosts, room_id, user, content): + """Try and join a room that this server is not in + + Args: + remote_room_hosts (list[str]): List of servers that can be used + to join via. + room_id (str): Room that we are trying to join + user (UserID): User who is trying to join + content (dict): A dict that should be used as the content of the + join event. + + Returns: + Deferred + """ + raise NotImplementedError() + + @abc.abstractmethod + def _remote_reject_invite(self, remote_room_hosts, room_id, target): + """Attempt to reject an invite for a room this server is not in. If we + fail to do so we locally mark the invite as rejected. + + Args: + remote_room_hosts (list[str]): List of servers to use to try and + reject invite + room_id (str) + target (UserID): The user rejecting the invite + + Returns: + Deferred[dict]: A dictionary to be returned to the client, may + include event_id etc, or nothing if we locally rejected + """ + raise NotImplementedError() + + @abc.abstractmethod + def get_or_register_3pid_guest(self, requester, medium, address, inviter_user_id): + """Get a guest access token for a 3PID, creating a guest account if + one doesn't already exist. + + Args: + medium (str) + address (str) + inviter_user_id (str): The user ID who is trying to invite the + 3PID + + Returns: + Deferred[(str, str)]: A 2-tuple of `(user_id, access_token)` of the + 3PID guest account. + """ + raise NotImplementedError() @defer.inlineCallbacks def _local_membership_update( @@ -137,73 +187,6 @@ class RoomMemberHandler(object): defer.returnValue(event) - @defer.inlineCallbacks - def _remote_join(self, remote_room_hosts, room_id, user, content): - """Try and join a room that this server is not in - - Args: - remote_room_hosts (list[str]): List of servers that can be used - to join via. - room_id (str): Room that we are trying to join - user (UserID): User who is trying to join - content (dict): A dict that should be used as the content of the - join event. - - Returns: - Deferred - """ - if len(remote_room_hosts) == 0: - raise SynapseError(404, "No known servers") - - # We don't do an auth check if we are doing an invite - # join dance for now, since we're kinda implicitly checking - # that we are allowed to join when we decide whether or not we - # need to do the invite/join dance. - yield self.federation_handler.do_invite_join( - remote_room_hosts, - room_id, - user.to_string(), - content, - ) - yield user_joined_room(self.distributor, user, room_id) - - @defer.inlineCallbacks - def _remote_reject_invite(self, remote_room_hosts, room_id, target): - """Attempt to reject an invite for a room this server is not in. If we - fail to do so we locally mark the invite as rejected. - - Args: - remote_room_hosts (list[str]): List of servers to use to try and - reject invite - room_id (str) - target (UserID): The user rejecting the invite - - Returns: - Deferred[dict]: A dictionary to be returned to the client, may - include event_id etc, or nothing if we locally rejected - """ - fed_handler = self.federation_handler - try: - ret = yield fed_handler.do_remotely_reject_invite( - remote_room_hosts, - room_id, - target.to_string(), - ) - defer.returnValue(ret) - except Exception as e: - # if we were unable to reject the exception, just mark - # it as rejected on our end and plough ahead. - # - # The 'except' clause is very broad, but we need to - # capture everything from DNS failures upwards - # - logger.warn("Failed to reject invite: %s", e) - - yield self.store.locally_reject_invite( - target.to_string(), room_id - ) - defer.returnValue({}) - @defer.inlineCallbacks def update_membership( self, @@ -673,6 +656,7 @@ class RoomMemberHandler(object): token, public_keys, fallback_public_key, display_name = ( yield self._ask_id_server_for_third_party_invite( + requester=requester, id_server=id_server, medium=medium, address=address, @@ -709,6 +693,7 @@ class RoomMemberHandler(object): @defer.inlineCallbacks def _ask_id_server_for_third_party_invite( self, + requester, id_server, medium, address, @@ -725,6 +710,7 @@ class RoomMemberHandler(object): Asks an identity server for a third party invite. Args: + requester (Requester) id_server (str): hostname + optional port for the identity server. medium (str): The literal string "email". address (str): The third party address being invited. @@ -767,8 +753,8 @@ class RoomMemberHandler(object): } if self.config.invite_3pid_guest: - rh = self.registration_handler - guest_user_id, guest_access_token = yield rh.get_or_register_3pid_guest( + guest_access_token, guest_user_id = yield self.get_or_register_3pid_guest( + requester=requester, medium=medium, address=address, inviter_user_id=inviter_user_id, @@ -801,27 +787,6 @@ class RoomMemberHandler(object): display_name = data["display_name"] defer.returnValue((token, public_keys, fallback_public_key, display_name)) - @defer.inlineCallbacks - def forget(self, user, room_id): - user_id = user.to_string() - - member = yield self.state_handler.get_current_state( - room_id=room_id, - event_type=EventTypes.Member, - state_key=user_id - ) - membership = member.membership if member else None - - if membership is not None and membership not in [ - Membership.LEAVE, Membership.BAN - ]: - raise SynapseError(400, "User %s in room %s" % ( - user_id, room_id - )) - - if membership: - yield self.store.forget(user_id, room_id) - @defer.inlineCallbacks def _is_host_in_room(self, current_state_ids): # Have we just created the room, and is this about to be the very @@ -843,3 +808,77 @@ class RoomMemberHandler(object): defer.returnValue(True) defer.returnValue(False) + + +class RoomMemberMasterHandler(RoomMemberHandler): + @defer.inlineCallbacks + def _remote_join(self, remote_room_hosts, room_id, user, content): + """Implements RoomMemberHandler._remote_join + """ + if len(remote_room_hosts) == 0: + raise SynapseError(404, "No known servers") + + # We don't do an auth check if we are doing an invite + # join dance for now, since we're kinda implicitly checking + # that we are allowed to join when we decide whether or not we + # need to do the invite/join dance. + yield self.federation_handler.do_invite_join( + remote_room_hosts, + room_id, + user.to_string(), + content, + ) + yield user_joined_room(self.distributor, user, room_id) + + @defer.inlineCallbacks + def _remote_reject_invite(self, remote_room_hosts, room_id, target): + """Implements RoomMemberHandler._remote_reject_invite + """ + fed_handler = self.federation_handler + try: + ret = yield fed_handler.do_remotely_reject_invite( + remote_room_hosts, + room_id, + target.to_string(), + ) + defer.returnValue(ret) + except Exception as e: + # if we were unable to reject the exception, just mark + # it as rejected on our end and plough ahead. + # + # The 'except' clause is very broad, but we need to + # capture everything from DNS failures upwards + # + logger.warn("Failed to reject invite: %s", e) + + yield self.store.locally_reject_invite( + target.to_string(), room_id + ) + defer.returnValue({}) + + def get_or_register_3pid_guest(self, requester, medium, address, inviter_user_id): + """Implements RoomMemberHandler.get_or_register_3pid_guest + """ + rg = self.registration_handler + return rg.get_or_register_3pid_guest(medium, address, inviter_user_id) + + @defer.inlineCallbacks + def forget(self, user, room_id): + user_id = user.to_string() + + member = yield self.state_handler.get_current_state( + room_id=room_id, + event_type=EventTypes.Member, + state_key=user_id + ) + membership = member.membership if member else None + + if membership is not None and membership not in [ + Membership.LEAVE, Membership.BAN + ]: + raise SynapseError(400, "User %s in room %s" % ( + user_id, room_id + )) + + if membership: + yield self.store.forget(user_id, room_id) diff --git a/synapse/server.py b/synapse/server.py index 1bc8d6f702..3d47e2793b 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -46,7 +46,7 @@ from synapse.handlers.device import DeviceHandler from synapse.handlers.e2e_keys import E2eKeysHandler from synapse.handlers.presence import PresenceHandler from synapse.handlers.room_list import RoomListHandler -from synapse.handlers.room_member import RoomMemberHandler +from synapse.handlers.room_member import RoomMemberMasterHandler from synapse.handlers.set_password import SetPasswordHandler from synapse.handlers.sync import SyncHandler from synapse.handlers.typing import TypingHandler @@ -387,7 +387,9 @@ class HomeServer(object): return SpamChecker(self) def build_room_member_handler(self): - return RoomMemberHandler(self) + if self.config.worker_app: + return Exception("Can't use RoomMemberHandler on workers") + return RoomMemberMasterHandler(self) def build_federation_registry(self): return FederationHandlerRegistry() From 82f16faa78864a23653ca952072a6f0e906f3367 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 16:00:26 +0000 Subject: [PATCH 307/348] Move user_*_room distributor stuff to master class I added yields when calling user_left_room, but they shouldn't matter on the master process as they always return None anyway. --- synapse/handlers/room_member.py | 55 ++++++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 5 deletions(-) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index ce83d56451..669210b73d 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -116,6 +116,34 @@ class RoomMemberHandler(object): """ raise NotImplementedError() + @abc.abstractmethod + def _user_joined_room(self, target, room_id): + """Notifies distributor on master process that the user has joined the + room. + + Args: + target (UserID) + room_id (str) + + Returns: + Deferred|None + """ + raise NotImplementedError() + + @abc.abstractmethod + def _user_left_room(self, target, room_id): + """Notifies distributor on master process that the user has left the + room. + + Args: + target (UserID) + room_id (str) + + Returns: + Deferred|None + """ + raise NotImplementedError() + @defer.inlineCallbacks def _local_membership_update( self, requester, target, room_id, membership, @@ -178,12 +206,12 @@ class RoomMemberHandler(object): prev_member_event = yield self.store.get_event(prev_member_event_id) newly_joined = prev_member_event.membership != Membership.JOIN if newly_joined: - yield user_joined_room(self.distributor, target, room_id) + yield self._user_joined_room(target, room_id) elif event.membership == Membership.LEAVE: if prev_member_event_id: prev_member_event = yield self.store.get_event(prev_member_event_id) if prev_member_event.membership == Membership.JOIN: - user_left_room(self.distributor, target, room_id) + yield self._user_left_room(target, room_id) defer.returnValue(event) @@ -460,12 +488,12 @@ class RoomMemberHandler(object): prev_member_event = yield self.store.get_event(prev_member_event_id) newly_joined = prev_member_event.membership != Membership.JOIN if newly_joined: - yield user_joined_room(self.distributor, target_user, room_id) + yield self._user_joined_room(target_user, room_id) elif event.membership == Membership.LEAVE: if prev_member_event_id: prev_member_event = yield self.store.get_event(prev_member_event_id) if prev_member_event.membership == Membership.JOIN: - user_left_room(self.distributor, target_user, room_id) + yield self._user_left_room(target_user, room_id) @defer.inlineCallbacks def _can_guest_join(self, current_state_ids): @@ -811,6 +839,13 @@ class RoomMemberHandler(object): class RoomMemberMasterHandler(RoomMemberHandler): + def __init__(self, hs): + super(RoomMemberMasterHandler, self).__init__(hs) + + self.distributor = hs.get_distributor() + self.distributor.declare("user_joined_room") + self.distributor.declare("user_left_room") + @defer.inlineCallbacks def _remote_join(self, remote_room_hosts, room_id, user, content): """Implements RoomMemberHandler._remote_join @@ -828,7 +863,7 @@ class RoomMemberMasterHandler(RoomMemberHandler): user.to_string(), content, ) - yield user_joined_room(self.distributor, user, room_id) + yield self._user_joined_room(user, room_id) @defer.inlineCallbacks def _remote_reject_invite(self, remote_room_hosts, room_id, target): @@ -862,6 +897,16 @@ class RoomMemberMasterHandler(RoomMemberHandler): rg = self.registration_handler return rg.get_or_register_3pid_guest(medium, address, inviter_user_id) + def _user_joined_room(self, target, room_id): + """Implements RoomMemberHandler._user_joined_room + """ + return user_joined_room(self.distributor, target, room_id) + + def _user_left_room(self, target, room_id): + """Implements RoomMemberHandler._user_left_room + """ + return user_left_room(self.distributor, target, room_id) + @defer.inlineCallbacks def forget(self, user, room_id): user_id = user.to_string() From 16adb11cc0aa255f0be98d6446d629e18a0dcefe Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 16:57:07 +0000 Subject: [PATCH 308/348] Correct import order --- synapse/handlers/room_member.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 669210b73d..78b5feee4f 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -14,6 +14,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +import abc +import logging + from signedjson.key import decode_verify_key_bytes from signedjson.sign import verify_signed_json from twisted.internet import defer @@ -28,9 +31,6 @@ from synapse.types import UserID, RoomID from synapse.util.async import Linearizer from synapse.util.distributor import user_left_room, user_joined_room -import abc -import logging - logger = logging.getLogger(__name__) From 6dbebef1415fd4d0da29ebf6b90a16b9cc877b96 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 17:15:32 +0000 Subject: [PATCH 309/348] Add missing param to docstrings --- synapse/handlers/room_member.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 78b5feee4f..790e11c57c 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -70,6 +70,7 @@ class RoomMemberHandler(object): """Try and join a room that this server is not in Args: + requester (Requester) remote_room_hosts (list[str]): List of servers that can be used to join via. room_id (str): Room that we are trying to join @@ -88,6 +89,7 @@ class RoomMemberHandler(object): fail to do so we locally mark the invite as rejected. Args: + requester (Requester) remote_room_hosts (list[str]): List of servers to use to try and reject invite room_id (str) @@ -105,6 +107,7 @@ class RoomMemberHandler(object): one doesn't already exist. Args: + requester (Requester) medium (str) address (str) inviter_user_id (str): The user ID who is trying to invite the From d45a1148245839c5f9776fee6d463ed69981d729 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 17:24:34 +0000 Subject: [PATCH 310/348] Raise, don't return, exception --- synapse/server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/server.py b/synapse/server.py index 3d47e2793b..6ffe900b74 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -388,7 +388,7 @@ class HomeServer(object): def build_room_member_handler(self): if self.config.worker_app: - return Exception("Can't use RoomMemberHandler on workers") + raise Exception("Can't use RoomMemberHandler on workers") return RoomMemberMasterHandler(self) def build_federation_registry(self): From 3518d0ea8fc4cf9247790fde587e37e5d129ddb0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 17:36:50 +0000 Subject: [PATCH 311/348] Split up ProfileStore --- synapse/replication/slave/storage/profile.py | 21 ++++++++ synapse/storage/profile.py | 50 ++++++++++---------- 2 files changed, 47 insertions(+), 24 deletions(-) create mode 100644 synapse/replication/slave/storage/profile.py diff --git a/synapse/replication/slave/storage/profile.py b/synapse/replication/slave/storage/profile.py new file mode 100644 index 0000000000..46c28d4171 --- /dev/null +++ b/synapse/replication/slave/storage/profile.py @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from synapse.replication.slave.storage._base import BaseSlavedStore +from synapse.storage.profile import ProfileWorkerStore + + +class SlavedProfileStore(ProfileWorkerStore, BaseSlavedStore): + pass diff --git a/synapse/storage/profile.py b/synapse/storage/profile.py index ec02e73bc2..8612bd5ecc 100644 --- a/synapse/storage/profile.py +++ b/synapse/storage/profile.py @@ -21,14 +21,7 @@ from synapse.api.errors import StoreError from ._base import SQLBaseStore -class ProfileStore(SQLBaseStore): - def create_profile(self, user_localpart): - return self._simple_insert( - table="profiles", - values={"user_id": user_localpart}, - desc="create_profile", - ) - +class ProfileWorkerStore(SQLBaseStore): @defer.inlineCallbacks def get_profileinfo(self, user_localpart): try: @@ -61,14 +54,6 @@ class ProfileStore(SQLBaseStore): desc="get_profile_displayname", ) - def set_profile_displayname(self, user_localpart, new_displayname): - return self._simple_update_one( - table="profiles", - keyvalues={"user_id": user_localpart}, - updatevalues={"displayname": new_displayname}, - desc="set_profile_displayname", - ) - def get_profile_avatar_url(self, user_localpart): return self._simple_select_one_onecol( table="profiles", @@ -77,14 +62,6 @@ class ProfileStore(SQLBaseStore): desc="get_profile_avatar_url", ) - def set_profile_avatar_url(self, user_localpart, new_avatar_url): - return self._simple_update_one( - table="profiles", - keyvalues={"user_id": user_localpart}, - updatevalues={"avatar_url": new_avatar_url}, - desc="set_profile_avatar_url", - ) - def get_from_remote_profile_cache(self, user_id): return self._simple_select_one( table="remote_profile_cache", @@ -94,6 +71,31 @@ class ProfileStore(SQLBaseStore): desc="get_from_remote_profile_cache", ) + +class ProfileStore(ProfileWorkerStore): + def create_profile(self, user_localpart): + return self._simple_insert( + table="profiles", + values={"user_id": user_localpart}, + desc="create_profile", + ) + + def set_profile_displayname(self, user_localpart, new_displayname): + return self._simple_update_one( + table="profiles", + keyvalues={"user_id": user_localpart}, + updatevalues={"displayname": new_displayname}, + desc="set_profile_displayname", + ) + + def set_profile_avatar_url(self, user_localpart, new_avatar_url): + return self._simple_update_one( + table="profiles", + keyvalues={"user_id": user_localpart}, + updatevalues={"avatar_url": new_avatar_url}, + desc="set_profile_avatar_url", + ) + def add_remote_profile_cache(self, user_id, displayname, avatar_url): """Ensure we are caching the remote user's profiles. From df8ff682a72362acde7d99f7235ad4d4f87817e0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 17:38:21 +0000 Subject: [PATCH 312/348] Only update remote profile cache on master --- synapse/handlers/profile.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py index cb710fe796..3465a787ab 100644 --- a/synapse/handlers/profile.py +++ b/synapse/handlers/profile.py @@ -38,7 +38,10 @@ class ProfileHandler(BaseHandler): self.user_directory_handler = hs.get_user_directory_handler() - self.clock.looping_call(self._update_remote_profile_cache, self.PROFILE_UPDATE_MS) + if hs.config.worker_app is None: + self.clock.looping_call( + self._update_remote_profile_cache, self.PROFILE_UPDATE_MS, + ) @defer.inlineCallbacks def get_profile(self, user_id): From 350331d4664467358469e7d6161f9747707239ba Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 17:50:39 +0000 Subject: [PATCH 313/348] _remote_join and co take a requester --- synapse/handlers/room_member.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 1d7e6997b9..9977be8831 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -370,7 +370,7 @@ class RoomMemberHandler(object): content["kind"] = "guest" ret = yield self._remote_join( - remote_room_hosts, room_id, target, content + requester, remote_room_hosts, room_id, target, content ) defer.returnValue(ret) @@ -392,7 +392,7 @@ class RoomMemberHandler(object): # send the rejection to the inviter's HS. remote_room_hosts = remote_room_hosts + [inviter.domain] res = yield self._remote_reject_invite( - remote_room_hosts, room_id, target, + requester, remote_room_hosts, room_id, target, ) defer.returnValue(res) @@ -849,7 +849,7 @@ class RoomMemberMasterHandler(RoomMemberHandler): self.distributor.declare("user_left_room") @defer.inlineCallbacks - def _remote_join(self, remote_room_hosts, room_id, user, content): + def _remote_join(self, requester, remote_room_hosts, room_id, user, content): """Implements RoomMemberHandler._remote_join """ if len(remote_room_hosts) == 0: @@ -868,7 +868,7 @@ class RoomMemberMasterHandler(RoomMemberHandler): yield self._user_joined_room(user, room_id) @defer.inlineCallbacks - def _remote_reject_invite(self, remote_room_hosts, room_id, target): + def _remote_reject_invite(self, requester, remote_room_hosts, room_id, target): """Implements RoomMemberHandler._remote_reject_invite """ fed_handler = self.federation_handler From b27320b5503d38e6bd05375b272c1a1d18656ea2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 13 Mar 2018 16:32:37 +0000 Subject: [PATCH 314/348] Implement RoomMemberWorkerHandler --- synapse/handlers/room_member.py | 78 ++++++ synapse/replication/http/__init__.py | 2 + synapse/replication/http/membership.py | 334 +++++++++++++++++++++++++ synapse/server.py | 6 +- 4 files changed, 418 insertions(+), 2 deletions(-) create mode 100644 synapse/replication/http/membership.py diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 1d7e6997b9..3c5751d66c 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -27,6 +27,10 @@ from synapse.api.constants import ( EventTypes, Membership, ) from synapse.api.errors import AuthError, SynapseError, Codes +from synapse.replication.http.membership import ( + remote_join, remote_reject_invite, get_or_register_3pid_guest, + notify_user_membership_change, +) from synapse.types import UserID, RoomID from synapse.util.async import Linearizer from synapse.util.distributor import user_left_room, user_joined_room @@ -929,3 +933,77 @@ class RoomMemberMasterHandler(RoomMemberHandler): if membership: yield self.store.forget(user_id, room_id) + + +class RoomMemberWorkerHandler(RoomMemberHandler): + @defer.inlineCallbacks + def _remote_join(self, requester, remote_room_hosts, room_id, user, content): + """Implements RoomMemberHandler._remote_join + """ + if len(remote_room_hosts) == 0: + raise SynapseError(404, "No known servers") + + ret = yield remote_join( + self.simple_http_client, + host=self.config.worker_replication_host, + port=self.config.worker_replication_http_port, + requester=requester, + remote_room_hosts=remote_room_hosts, + room_id=room_id, + user_id=user.to_string(), + content=content, + ) + + yield self._user_joined_room(user, room_id) + + defer.returnValue(ret) + + def _remote_reject_invite(self, requester, remote_room_hosts, room_id, target): + """Implements RoomMemberHandler._remote_reject_invite + """ + return remote_reject_invite( + self.simple_http_client, + host=self.config.worker_replication_host, + port=self.config.worker_replication_http_port, + requester=requester, + remote_room_hosts=remote_room_hosts, + room_id=room_id, + user_id=target.to_string(), + ) + + def _user_joined_room(self, target, room_id): + """Implements RoomMemberHandler._user_joined_room + """ + return notify_user_membership_change( + self.simple_http_client, + host=self.config.worker_replication_host, + port=self.config.worker_replication_http_port, + user_id=target.to_string(), + room_id=room_id, + change="join", + ) + + def _user_left_room(self, target, room_id): + """Implements RoomMemberHandler._user_left_room + """ + return notify_user_membership_change( + self.simple_http_client, + host=self.config.worker_replication_host, + port=self.config.worker_replication_http_port, + user_id=target.to_string(), + room_id=room_id, + change="left", + ) + + def get_or_register_3pid_guest(self, requester, medium, address, inviter_user_id): + """Implements RoomMemberHandler.get_or_register_3pid_guest + """ + return get_or_register_3pid_guest( + self.simple_http_client, + host=self.config.worker_replication_host, + port=self.config.worker_replication_http_port, + requester=requester, + medium=medium, + address=address, + inviter_user_id=inviter_user_id, + ) diff --git a/synapse/replication/http/__init__.py b/synapse/replication/http/__init__.py index b378b41646..7a148301de 100644 --- a/synapse/replication/http/__init__.py +++ b/synapse/replication/http/__init__.py @@ -15,6 +15,7 @@ import send_event +import membership from synapse.http.server import JsonResource @@ -29,3 +30,4 @@ class ReplicationRestResource(JsonResource): def register_servlets(self, hs): send_event.register_servlets(hs, self) + membership.register_servlets(hs, self) diff --git a/synapse/replication/http/membership.py b/synapse/replication/http/membership.py new file mode 100644 index 0000000000..df16c3b2b4 --- /dev/null +++ b/synapse/replication/http/membership.py @@ -0,0 +1,334 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet import defer + +from synapse.api.errors import SynapseError, MatrixCodeMessageException +from synapse.http.servlet import RestServlet, parse_json_object_from_request +from synapse.types import Requester, UserID +from synapse.util.distributor import user_left_room, user_joined_room + +import logging +import re + +logger = logging.getLogger(__name__) + + +@defer.inlineCallbacks +def remote_join(client, host, port, requester, remote_room_hosts, + room_id, user_id, content): + """Ask the master to do a remote join for the given user to the given room + + Args: + client (SimpleHttpClient) + host (str): host of master + port (int): port on master listening for HTTP replication + requester (Requester) + remote_room_hosts (list[str]): Servers to try and join via + room_id (str) + user_id (str) + content (dict): The event content to use for the join event + + Returns: + Deferred + """ + uri = "http://%s:%s/_synapse/replication/remote_join" % (host, port) + + payload = { + "requester": requester.serialize(), + "remote_room_hosts": remote_room_hosts, + "room_id": room_id, + "user_id": user_id, + "content": content, + } + + try: + result = yield client.post_json_get_json(uri, payload) + except MatrixCodeMessageException as e: + # We convert to SynapseError as we know that it was a SynapseError + # on the master process that we should send to the client. (And + # importantly, not stack traces everywhere) + raise SynapseError(e.code, e.msg, e.errcode) + defer.returnValue(result) + + +@defer.inlineCallbacks +def remote_reject_invite(client, host, port, requester, remote_room_hosts, + room_id, user_id): + """Ask master to reject the invite for the user and room. + + Args: + client (SimpleHttpClient) + host (str): host of master + port (int): port on master listening for HTTP replication + requester (Requester) + remote_room_hosts (list[str]): Servers to try and reject via + room_id (str) + user_id (str) + + Returns: + Deferred + """ + uri = "http://%s:%s/_synapse/replication/remote_reject_invite" % (host, port) + + payload = { + "requester": requester.serialize(), + "remote_room_hosts": remote_room_hosts, + "room_id": room_id, + "user_id": user_id, + } + + try: + result = yield client.post_json_get_json(uri, payload) + except MatrixCodeMessageException as e: + # We convert to SynapseError as we know that it was a SynapseError + # on the master process that we should send to the client. (And + # importantly, not stack traces everywhere) + raise SynapseError(e.code, e.msg, e.errcode) + defer.returnValue(result) + + +@defer.inlineCallbacks +def get_or_register_3pid_guest(client, host, port, requester, + medium, address, inviter_user_id): + """Ask the master to get/create a guest account for given 3PID. + + Args: + client (SimpleHttpClient) + host (str): host of master + port (int): port on master listening for HTTP replication + requester (Requester) + medium (str) + address (str) + inviter_user_id (str): The user ID who is trying to invite the + 3PID + + Returns: + Deferred[(str, str)]: A 2-tuple of `(user_id, access_token)` of the + 3PID guest account. + """ + + uri = "http://%s:%s/_synapse/replication/get_or_register_3pid_guest" % (host, port) + + payload = { + "requester": requester.serialize(), + "medium": medium, + "address": address, + "inviter_user_id": inviter_user_id, + } + + try: + result = yield client.post_json_get_json(uri, payload) + except MatrixCodeMessageException as e: + # We convert to SynapseError as we know that it was a SynapseError + # on the master process that we should send to the client. (And + # importantly, not stack traces everywhere) + raise SynapseError(e.code, e.msg, e.errcode) + defer.returnValue(result) + + +@defer.inlineCallbacks +def notify_user_membership_change(client, host, port, user_id, room_id, change): + """Notify master that a user has joined or left the room + + Args: + client (SimpleHttpClient) + host (str): host of master + port (int): port on master listening for HTTP replication. + user_id (str) + room_id (str) + change (str): Either "join" or "left" + + Returns: + Deferred + """ + assert change in ("join", "left") + + uri = "http://%s:%s/_synapse/replication/user_%s_room" % (host, port, change) + + payload = { + "user_id": user_id, + "room_id": room_id, + } + + try: + result = yield client.post_json_get_json(uri, payload) + except MatrixCodeMessageException as e: + # We convert to SynapseError as we know that it was a SynapseError + # on the master process that we should send to the client. (And + # importantly, not stack traces everywhere) + raise SynapseError(e.code, e.msg, e.errcode) + defer.returnValue(result) + + +class ReplicationRemoteJoinRestServlet(RestServlet): + PATTERNS = [re.compile("^/_synapse/replication/remote_join$")] + + def __init__(self, hs): + super(ReplicationRemoteJoinRestServlet, self).__init__() + + self.federation_handler = hs.get_handlers().federation_handler + self.store = hs.get_datastore() + self.clock = hs.get_clock() + + @defer.inlineCallbacks + def on_POST(self, request): + content = parse_json_object_from_request(request) + + remote_room_hosts = content["remote_room_hosts"] + room_id = content["room_id"] + user_id = content["user_id"] + event_content = content["content"] + + requester = Requester.deserialize(self.store, content["requester"]) + + if requester.user: + request.authenticated_entity = requester.user.to_string() + + logger.info( + "remote_join: %s into room: %s", + user_id, room_id, + ) + + yield self.federation_handler.do_invite_join( + remote_room_hosts, + room_id, + user_id, + event_content, + ) + + defer.returnValue((200, {})) + + +class ReplicationRemoteRejectInviteRestServlet(RestServlet): + PATTERNS = [re.compile("^/_synapse/replication/remote_reject_invite$")] + + def __init__(self, hs): + super(ReplicationRemoteRejectInviteRestServlet, self).__init__() + + self.federation_handler = hs.get_handlers().federation_handler + self.store = hs.get_datastore() + self.clock = hs.get_clock() + + @defer.inlineCallbacks + def on_POST(self, request): + content = parse_json_object_from_request(request) + + remote_room_hosts = content["remote_room_hosts"] + room_id = content["room_id"] + user_id = content["user_id"] + + requester = Requester.deserialize(self.store, content["requester"]) + + if requester.user: + request.authenticated_entity = requester.user.to_string() + + logger.info( + "remote_reject_invite: %s out of room: %s", + user_id, room_id, + ) + + try: + event = yield self.federation_handler.do_remotely_reject_invite( + remote_room_hosts, + room_id, + user_id, + ) + ret = event.get_pdu_json() + except Exception as e: + # if we were unable to reject the exception, just mark + # it as rejected on our end and plough ahead. + # + # The 'except' clause is very broad, but we need to + # capture everything from DNS failures upwards + # + logger.warn("Failed to reject invite: %s", e) + + yield self.store.locally_reject_invite( + user_id, room_id + ) + ret = {} + + defer.returnValue((200, ret)) + + +class ReplicationRegister3PIDGuestRestServlet(RestServlet): + PATTERNS = [re.compile("^/_synapse/replication/get_or_register_3pid_guest$")] + + def __init__(self, hs): + super(ReplicationRegister3PIDGuestRestServlet, self).__init__() + + self.registeration_handler = hs.get_handlers().registration_handler + self.store = hs.get_datastore() + self.clock = hs.get_clock() + + @defer.inlineCallbacks + def on_POST(self, request): + content = parse_json_object_from_request(request) + + medium = content["medium"] + address = content["address"] + inviter_user_id = content["inviter_user_id"] + + requester = Requester.deserialize(self.store, content["requester"]) + + if requester.user: + request.authenticated_entity = requester.user.to_string() + + logger.info("get_or_register_3pid_guest: %r", content) + + ret = yield self.registeration_handler.get_or_register_3pid_guest( + medium, address, inviter_user_id, + ) + + defer.returnValue((200, ret)) + + +class ReplicationUserJoinedLeftRoomRestServlet(RestServlet): + PATTERNS = [re.compile("^/_synapse/replication/user_(?Pjoin|left)_room$")] + + def __init__(self, hs): + super(ReplicationUserJoinedLeftRoomRestServlet, self).__init__() + + self.registeration_handler = hs.get_handlers().registration_handler + self.store = hs.get_datastore() + self.clock = hs.get_clock() + self.distributor = hs.get_distributor() + + def on_POST(self, request, change): + content = parse_json_object_from_request(request) + + user_id = content["user_id"] + room_id = content["room_id"] + + logger.info("user membership change: %s in %s", user_id, room_id) + + user = UserID.from_string(user_id) + + if change == "join": + user_joined_room(self.distributor, user, room_id) + elif change == "left": + user_left_room(self.distributor, user, room_id) + else: + raise Exception("Unrecognized change: %r", change) + + return (200, {}) + + +def register_servlets(hs, http_server): + ReplicationRemoteJoinRestServlet(hs).register(http_server) + ReplicationRemoteRejectInviteRestServlet(hs).register(http_server) + ReplicationRegister3PIDGuestRestServlet(hs).register(http_server) + ReplicationUserJoinedLeftRoomRestServlet(hs).register(http_server) diff --git a/synapse/server.py b/synapse/server.py index 763f0f5a68..c48c96727e 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -47,7 +47,9 @@ from synapse.handlers.device import DeviceHandler from synapse.handlers.e2e_keys import E2eKeysHandler from synapse.handlers.presence import PresenceHandler from synapse.handlers.room_list import RoomListHandler -from synapse.handlers.room_member import RoomMemberMasterHandler +from synapse.handlers.room_member import ( + RoomMemberMasterHandler, RoomMemberWorkerHandler, +) from synapse.handlers.set_password import SetPasswordHandler from synapse.handlers.sync import SyncHandler from synapse.handlers.typing import TypingHandler @@ -393,7 +395,7 @@ class HomeServer(object): def build_room_member_handler(self): if self.config.worker_app: - raise Exception("Can't use RoomMemberHandler on workers") + return RoomMemberWorkerHandler(self) return RoomMemberMasterHandler(self) def build_federation_registry(self): From a08726fc4262c6d8e9c69db3df322317cb87f6c2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 15:20:54 +0000 Subject: [PATCH 315/348] Add is_blocked to worker store --- synapse/storage/room.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 7f2c08d7a6..34ed84ea22 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -157,6 +157,18 @@ class RoomWorkerStore(SQLBaseStore): "get_public_room_changes", get_public_room_changes_txn ) + @cached(max_entries=10000) + def is_room_blocked(self, room_id): + return self._simple_select_one_onecol( + table="blocked_rooms", + keyvalues={ + "room_id": room_id, + }, + retcol="1", + allow_none=True, + desc="is_room_blocked", + ) + class RoomStore(RoomWorkerStore, SearchStore): @@ -485,18 +497,6 @@ class RoomStore(RoomWorkerStore, SearchStore): else: defer.returnValue(None) - @cached(max_entries=10000) - def is_room_blocked(self, room_id): - return self._simple_select_one_onecol( - table="blocked_rooms", - keyvalues={ - "room_id": room_id, - }, - retcol="1", - allow_none=True, - desc="is_room_blocked", - ) - @defer.inlineCallbacks def block_room(self, room_id, user_id): yield self._simple_insert( @@ -507,7 +507,11 @@ class RoomStore(RoomWorkerStore, SearchStore): }, desc="block_room", ) - self.is_room_blocked.invalidate((room_id,)) + yield self.runInteraction( + "block_room_invalidation", + self._invalidate_cache_and_stream, + self.is_room_blocked, (room_id,), + ) def get_media_mxcs_in_room(self, room_id): """Retrieves all the local and remote media MXC URIs in a given room From d144ed6ffb578db83bff0f36826d25d9f09bcd30 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 13 Mar 2018 22:36:04 +0000 Subject: [PATCH 316/348] fix bug #2926 (loading all state for a given type from the DB if the state_key is None) (#2990) Fixes a regression that had crept in where the caching layer upholds requests for loading state which is filtered by type (but not by state_key), but the DB layer itself would interpret a missing state_key as a request to filter by null state_key rather than returning all state_keys. --- synapse/storage/state.py | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 2b325e1c1f..ffa4246031 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -240,6 +240,9 @@ class StateGroupWorkerStore(SQLBaseStore): ( "AND type = ? AND state_key = ?", (etype, state_key) + ) if state_key is not None else ( + "AND type = ?", + (etype,) ) for etype, state_key in types ] @@ -259,10 +262,19 @@ class StateGroupWorkerStore(SQLBaseStore): key = (typ, state_key) results[group][key] = event_id else: + where_args = [] + where_clauses = [] + wildcard_types = False if types is not None: - where_clause = "AND (%s)" % ( - " OR ".join(["(type = ? AND state_key = ?)"] * len(types)), - ) + for typ in types: + if typ[1] is None: + where_clauses.append("(type = ?)") + where_args.extend(typ[0]) + wildcard_types = True + else: + where_clauses.append("(type = ? AND state_key = ?)") + where_args.extend([typ[0], typ[1]]) + where_clause = "AND (%s)" % (" OR ".join(where_clauses)) else: where_clause = "" @@ -279,7 +291,7 @@ class StateGroupWorkerStore(SQLBaseStore): # after we finish deduping state, which requires this func) args = [next_group] if types: - args.extend(i for typ in types for i in typ) + args.extend(where_args) txn.execute( "SELECT type, state_key, event_id FROM state_groups_state" @@ -292,9 +304,17 @@ class StateGroupWorkerStore(SQLBaseStore): if (typ, state_key) not in results[group] ) - # If the lengths match then we must have all the types, - # so no need to go walk further down the tree. - if types is not None and len(results[group]) == len(types): + # If the number of entries in the (type,state_key)->event_id dict + # matches the number of (type,state_keys) types we were searching + # for, then we must have found them all, so no need to go walk + # further down the tree... UNLESS our types filter contained + # wildcards (i.e. Nones) in which case we have to do an exhaustive + # search + if ( + types is not None and + not wildcard_types and + len(results[group]) == len(types) + ): break next_group = self._simple_select_one_onecol_txn( From 3f0f06cb31d0293e75943e804b3e1ed8fe58ebed Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 14 Mar 2018 11:41:45 +0000 Subject: [PATCH 317/348] Split RoomMemberWorkerHandler to separate file --- synapse/handlers/room_member.py | 78 ------------------- synapse/handlers/room_member_worker.py | 102 +++++++++++++++++++++++++ synapse/server.py | 5 +- 3 files changed, 104 insertions(+), 81 deletions(-) create mode 100644 synapse/handlers/room_member_worker.py diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 3c5751d66c..1d7e6997b9 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -27,10 +27,6 @@ from synapse.api.constants import ( EventTypes, Membership, ) from synapse.api.errors import AuthError, SynapseError, Codes -from synapse.replication.http.membership import ( - remote_join, remote_reject_invite, get_or_register_3pid_guest, - notify_user_membership_change, -) from synapse.types import UserID, RoomID from synapse.util.async import Linearizer from synapse.util.distributor import user_left_room, user_joined_room @@ -933,77 +929,3 @@ class RoomMemberMasterHandler(RoomMemberHandler): if membership: yield self.store.forget(user_id, room_id) - - -class RoomMemberWorkerHandler(RoomMemberHandler): - @defer.inlineCallbacks - def _remote_join(self, requester, remote_room_hosts, room_id, user, content): - """Implements RoomMemberHandler._remote_join - """ - if len(remote_room_hosts) == 0: - raise SynapseError(404, "No known servers") - - ret = yield remote_join( - self.simple_http_client, - host=self.config.worker_replication_host, - port=self.config.worker_replication_http_port, - requester=requester, - remote_room_hosts=remote_room_hosts, - room_id=room_id, - user_id=user.to_string(), - content=content, - ) - - yield self._user_joined_room(user, room_id) - - defer.returnValue(ret) - - def _remote_reject_invite(self, requester, remote_room_hosts, room_id, target): - """Implements RoomMemberHandler._remote_reject_invite - """ - return remote_reject_invite( - self.simple_http_client, - host=self.config.worker_replication_host, - port=self.config.worker_replication_http_port, - requester=requester, - remote_room_hosts=remote_room_hosts, - room_id=room_id, - user_id=target.to_string(), - ) - - def _user_joined_room(self, target, room_id): - """Implements RoomMemberHandler._user_joined_room - """ - return notify_user_membership_change( - self.simple_http_client, - host=self.config.worker_replication_host, - port=self.config.worker_replication_http_port, - user_id=target.to_string(), - room_id=room_id, - change="join", - ) - - def _user_left_room(self, target, room_id): - """Implements RoomMemberHandler._user_left_room - """ - return notify_user_membership_change( - self.simple_http_client, - host=self.config.worker_replication_host, - port=self.config.worker_replication_http_port, - user_id=target.to_string(), - room_id=room_id, - change="left", - ) - - def get_or_register_3pid_guest(self, requester, medium, address, inviter_user_id): - """Implements RoomMemberHandler.get_or_register_3pid_guest - """ - return get_or_register_3pid_guest( - self.simple_http_client, - host=self.config.worker_replication_host, - port=self.config.worker_replication_http_port, - requester=requester, - medium=medium, - address=address, - inviter_user_id=inviter_user_id, - ) diff --git a/synapse/handlers/room_member_worker.py b/synapse/handlers/room_member_worker.py new file mode 100644 index 0000000000..75a881d990 --- /dev/null +++ b/synapse/handlers/room_member_worker.py @@ -0,0 +1,102 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from twisted.internet import defer + +from synapse.api.errors import SynapseError +from synapse.handlers.room_member import RoomMemberHandler +from synapse.replication.http.membership import ( + remote_join, remote_reject_invite, get_or_register_3pid_guest, + notify_user_membership_change, +) + + +logger = logging.getLogger(__name__) + + +class RoomMemberWorkerHandler(RoomMemberHandler): + @defer.inlineCallbacks + def _remote_join(self, requester, remote_room_hosts, room_id, user, content): + """Implements RoomMemberHandler._remote_join + """ + if len(remote_room_hosts) == 0: + raise SynapseError(404, "No known servers") + + ret = yield remote_join( + self.simple_http_client, + host=self.config.worker_replication_host, + port=self.config.worker_replication_http_port, + requester=requester, + remote_room_hosts=remote_room_hosts, + room_id=room_id, + user_id=user.to_string(), + content=content, + ) + + yield self._user_joined_room(user, room_id) + + defer.returnValue(ret) + + def _remote_reject_invite(self, requester, remote_room_hosts, room_id, target): + """Implements RoomMemberHandler._remote_reject_invite + """ + return remote_reject_invite( + self.simple_http_client, + host=self.config.worker_replication_host, + port=self.config.worker_replication_http_port, + requester=requester, + remote_room_hosts=remote_room_hosts, + room_id=room_id, + user_id=target.to_string(), + ) + + def _user_joined_room(self, target, room_id): + """Implements RoomMemberHandler._user_joined_room + """ + return notify_user_membership_change( + self.simple_http_client, + host=self.config.worker_replication_host, + port=self.config.worker_replication_http_port, + user_id=target.to_string(), + room_id=room_id, + change="join", + ) + + def _user_left_room(self, target, room_id): + """Implements RoomMemberHandler._user_left_room + """ + return notify_user_membership_change( + self.simple_http_client, + host=self.config.worker_replication_host, + port=self.config.worker_replication_http_port, + user_id=target.to_string(), + room_id=room_id, + change="left", + ) + + def get_or_register_3pid_guest(self, requester, medium, address, inviter_user_id): + """Implements RoomMemberHandler.get_or_register_3pid_guest + """ + return get_or_register_3pid_guest( + self.simple_http_client, + host=self.config.worker_replication_host, + port=self.config.worker_replication_http_port, + requester=requester, + medium=medium, + address=address, + inviter_user_id=inviter_user_id, + ) diff --git a/synapse/server.py b/synapse/server.py index c48c96727e..cd0c1a51be 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -47,9 +47,8 @@ from synapse.handlers.device import DeviceHandler from synapse.handlers.e2e_keys import E2eKeysHandler from synapse.handlers.presence import PresenceHandler from synapse.handlers.room_list import RoomListHandler -from synapse.handlers.room_member import ( - RoomMemberMasterHandler, RoomMemberWorkerHandler, -) +from synapse.handlers.room_member import RoomMemberMasterHandler +from synapse.handlers.room_member_worker import RoomMemberWorkerHandler from synapse.handlers.set_password import SetPasswordHandler from synapse.handlers.sync import SyncHandler from synapse.handlers.typing import TypingHandler From 62ad701326089ceebbc2683a2e0c9eeeaa734293 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 14 Mar 2018 14:15:49 +0000 Subject: [PATCH 318/348] s/join/joined/ in notify_user_membership_change --- synapse/handlers/room_member_worker.py | 2 +- synapse/replication/http/membership.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/synapse/handlers/room_member_worker.py b/synapse/handlers/room_member_worker.py index 75a881d990..493aec1e48 100644 --- a/synapse/handlers/room_member_worker.py +++ b/synapse/handlers/room_member_worker.py @@ -73,7 +73,7 @@ class RoomMemberWorkerHandler(RoomMemberHandler): port=self.config.worker_replication_http_port, user_id=target.to_string(), room_id=room_id, - change="join", + change="joined", ) def _user_left_room(self, target, room_id): diff --git a/synapse/replication/http/membership.py b/synapse/replication/http/membership.py index df16c3b2b4..fcd038a8a2 100644 --- a/synapse/replication/http/membership.py +++ b/synapse/replication/http/membership.py @@ -154,7 +154,7 @@ def notify_user_membership_change(client, host, port, user_id, room_id, change): Returns: Deferred """ - assert change in ("join", "left") + assert change in ("joined", "left") uri = "http://%s:%s/_synapse/replication/user_%s_room" % (host, port, change) @@ -297,7 +297,7 @@ class ReplicationRegister3PIDGuestRestServlet(RestServlet): class ReplicationUserJoinedLeftRoomRestServlet(RestServlet): - PATTERNS = [re.compile("^/_synapse/replication/user_(?Pjoin|left)_room$")] + PATTERNS = [re.compile("^/_synapse/replication/user_(?Pjoined|left)_room$")] def __init__(self, hs): super(ReplicationUserJoinedLeftRoomRestServlet, self).__init__() @@ -317,7 +317,7 @@ class ReplicationUserJoinedLeftRoomRestServlet(RestServlet): user = UserID.from_string(user_id) - if change == "join": + if change == "joined": user_joined_room(self.distributor, user, room_id) elif change == "left": user_left_room(self.distributor, user, room_id) From 0011ede3b0ba9de22af3bb36c300b6779918bfa1 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 14 Mar 2018 14:19:23 +0000 Subject: [PATCH 319/348] Fix imports --- synapse/replication/http/__init__.py | 5 +---- synapse/replication/http/membership.py | 6 +++--- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/synapse/replication/http/__init__.py b/synapse/replication/http/__init__.py index 7a148301de..1d7a607529 100644 --- a/synapse/replication/http/__init__.py +++ b/synapse/replication/http/__init__.py @@ -13,11 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. - -import send_event -import membership - from synapse.http.server import JsonResource +from synapse.replication.http import membership, send_event REPLICATION_PREFIX = "/_synapse/replication" diff --git a/synapse/replication/http/membership.py b/synapse/replication/http/membership.py index fcd038a8a2..e66c4e881f 100644 --- a/synapse/replication/http/membership.py +++ b/synapse/replication/http/membership.py @@ -13,6 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging +import re + from twisted.internet import defer from synapse.api.errors import SynapseError, MatrixCodeMessageException @@ -20,9 +23,6 @@ from synapse.http.servlet import RestServlet, parse_json_object_from_request from synapse.types import Requester, UserID from synapse.util.distributor import user_left_room, user_joined_room -import logging -import re - logger = logging.getLogger(__name__) From 4f28018c83c071d0a70a545ec953bd1399c55c7e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 1 Mar 2018 15:21:11 +0000 Subject: [PATCH 320/348] Register membership/state servlets in event_creator --- synapse/app/event_creator.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/synapse/app/event_creator.py b/synapse/app/event_creator.py index eb593c5278..172e989b54 100644 --- a/synapse/app/event_creator.py +++ b/synapse/app/event_creator.py @@ -31,14 +31,20 @@ from synapse.replication.slave.storage.account_data import SlavedAccountDataStor from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore from synapse.replication.slave.storage.client_ips import SlavedClientIpStore from synapse.replication.slave.storage.devices import SlavedDeviceStore +from synapse.replication.slave.storage.directory import DirectoryStore from synapse.replication.slave.storage.events import SlavedEventStore +from synapse.replication.slave.storage.profile import SlavedProfileStore from synapse.replication.slave.storage.push_rule import SlavedPushRuleStore from synapse.replication.slave.storage.pushers import SlavedPusherStore from synapse.replication.slave.storage.receipts import SlavedReceiptsStore from synapse.replication.slave.storage.registration import SlavedRegistrationStore from synapse.replication.slave.storage.room import RoomStore +from synapse.replication.slave.storage.transactions import TransactionStore from synapse.replication.tcp.client import ReplicationClientHandler -from synapse.rest.client.v1.room import RoomSendEventRestServlet +from synapse.rest.client.v1.room import ( + RoomSendEventRestServlet, RoomMembershipRestServlet, RoomStateEventRestServlet, + JoinRoomAliasServlet, +) from synapse.server import HomeServer from synapse.storage.engines import create_engine from synapse.util.httpresourcetree import create_resource_tree @@ -52,6 +58,9 @@ logger = logging.getLogger("synapse.app.event_creator") class EventCreatorSlavedStore( + DirectoryStore, + TransactionStore, + SlavedProfileStore, SlavedAccountDataStore, SlavedPusherStore, SlavedReceiptsStore, @@ -85,6 +94,9 @@ class EventCreatorServer(HomeServer): elif name == "client": resource = JsonResource(self, canonical_json=False) RoomSendEventRestServlet(self).register(resource) + RoomMembershipRestServlet(self).register(resource) + RoomStateEventRestServlet(self).register(resource) + JoinRoomAliasServlet(self).register(resource) resources.update({ "/_matrix/client/r0": resource, "/_matrix/client/unstable": resource, From cb2c7c0669c7f4d4cc41d65a7f2328589fa04fc5 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 14 Mar 2018 16:09:20 +0000 Subject: [PATCH 321/348] Update CHANGES.rst WIP, need to add most recent PRs --- CHANGES.rst | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index a7ed49e105..0bcd5a7177 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -8,6 +8,97 @@ This release also begins the process of renaming a number of the metrics reported to prometheus. See `docs/metrics-howto.rst `_. +Changes in synapse v0.27.0-rc1 (2018-03-14) +=========================================== + +Features: + * Add support for a remote media repository backed by S3 (PR 2867, 2777, 2783, 2789, 2791, 2804, 2812,2814, 2857, 2868, 2767) + * Add shiney new purge API. New implementation is:- + * Faster + * Supports clearing by timestamp + * Supports deleting of local events + * Is transactional +(PR #2858,2867,2882, 2946,2962,2943) + + * Let homeservers specify a whitelist for the format of 3PIDs that users are allowed to register with or add to their HS accounts. (PR #2813) + * Add /room/{id}/event/{id} to synapse (PR #2766) + * Add an admin route to get all the media in a room (PR #2818) Thanks to @turt2live! + * Add federation_domain_whitelist option (PR #2820,2821) + + +Changes: + + * Major refactor to move computation out of the main process and into a series of worker processes with a view to taking advantage of multicore machines. New workers are:- + * Event Creation + * Push Actions + +With more to follow + + (PR #2892, #2893, #2894, #2896, #2897, #2898, #2899, #2900, #2901, #2902, #2903, #2904, #2913, #2920, #2921, #2922, #2923, #2924, #2925, #2926, #2947, #2847, #2854, #2872, #2873, #2874, #2928, #2929, #2934, #2856) + + * Use StateResolutionHandler to resolve state in persist_events (PR #2864, #2871, #2802, #2835, #2836, #2841, #2842, #2849) + * Adapt the default config to bind on both IPv4 and IPv6 on all platforms (PR #2435) Thanks to @silkeh! + * Allow use of higher versions of saml2 (PR #2695) Thanks to @okurz! + * Better logging when login can't find a 3pid (PR #2744) + * add ?ts massaging for ASes (PR #2754) + * Remove 'verbosity'/'log_file' from generated cfg (PR #2755) + * Make indentation of generated log config consistent (PR #2762) + * Remove dead code related to default thumbnails (PR #2764) + * Update http request metrics before calling servlet (PR #2770) + * Do bcrypt hashing in a background thread (PR #2773) + * When using synctl with workers, don't start the main synapse automatically (PR #2774) + * Make Counter render floats (PR #2778) + * Store state groups separately from events (PR #2784) + * Reorganise request and block metrics (PR #2785) + * Metrics for number of RDATA commands received (PR #2786) + * Metrics for events processed in appservice and fed sender (PR #2787) + * Optimise LoggingContext creation and copying (PR #2792) + * Track db txn time in millisecs (PR #2793) + * Track DB scheduling delay per-request (PR #2794, #2795) + * Sanity checking for user ids (PR #2797) + * better exception logging in callbackmetrics (PR #2809) + * Fix bugs in block metrics (PR #2810) + * Add some comments about the reactor tick time metric (PR #2816) + * Use a connection pool for the SimpleHttpClient (PR #2817) + * Remove unused/bitrotted MemoryDataStore (PR #2828) + * Make it possible to run tests against postgres (PR #2829) + * Factor out get_db_conn to HomeServer base class (PR #2830) + * Logging and metrics for the http pusher (PR #2833) + * Improve exception handling in persist_event (PR #2834) + * montoring metrics for number of cache evictions (PR #2844) + * Update pynacl dependency to 1.2.1 or higher (PR #2888) Thanks to @bachp! + * Remove ability for AS users to call /events and /sync (PR #2948) + * use bcrypt.checkpw (PR #2949) Thanks to @krombel! + * Factor run_in_background out from preserve_fn (PR #2961) + * Add a metric which increments when a request is received (PR #2965) + * Improve caching for read_marker API (PR #2927) + + +Synapse 0.27.0 begins the process of rationalising metric names. To enable a graceful migration path, this release just adds new names for the metrics being renamed. A future release will remove the old ones. For more info see [here](https://github.com/matrix-org/synapse/blob/develop/docs/metrics-howto.rst#block-and-response-metrics-renamed-for-0270) + + + +Bug fixes: + + * synapse/config/password_auth_providers: Fixed bracket typo (PR #2683) Thanks to @seckrv! + * Check missing fields in event_from_pdu_json (PR #2745) + * Fix templating error with unban permission message (PR #2761) Thanks to @turt2live! + * Fix flaky test_rooms UTs (PR #2765) + * Fix publicised groups GET API (singular) over federation (PR #2772) + * Fix a logcontext leak in persist_events (PR #2790) + * Fix 'NoneType' object has no attribute 'writeHeaders' (PR #2796) + * fix SQL when searching all users (PR #2803) + * Fix server 500 on public rooms call when no rooms exist (PR #2827) + * Fix SQL for user search (PR #2831) + * Fix sql error in quarantine_media (PR #2837) + * Handle url_previews with no content-type (PR #2845) + * Add missing yield during 3pid signature checks (PR #2933) + * Fix race in sync when joining room (PR #2944) + * Fix slow event search, switch back from GIST to GIN indexes (PR #2769, #2848) + * Fix scary-looking dns resolution errors (PR #2838) + + + Changes in synapse v0.26.0 (2018-01-05) ======================================= From a492b17fe2cbbb10687d9a7cbe9eda67b0562656 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 14 Mar 2018 16:18:09 +0000 Subject: [PATCH 322/348] Update CHANGES.rst clean formatting --- CHANGES.rst | 128 ++++++++++++++++++++++++---------------------------- 1 file changed, 60 insertions(+), 68 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 0bcd5a7177..4d1b4ba13c 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -12,66 +12,58 @@ Changes in synapse v0.27.0-rc1 (2018-03-14) =========================================== Features: - * Add support for a remote media repository backed by S3 (PR 2867, 2777, 2783, 2789, 2791, 2804, 2812,2814, 2857, 2868, 2767) - * Add shiney new purge API. New implementation is:- +* Add support for a remote media repository backed by S3 (PR 2867, 2777, 2783, 2789, 2791, 2804, 2812,2814, 2857, 2868, 2767) +* Add shiney new purge API. New implementation is:- * Faster * Supports clearing by timestamp * Supports deleting of local events - * Is transactional -(PR #2858,2867,2882, 2946,2962,2943) + * Is transactional (PR #2858,2867,2882, 2946,2962,2943) - * Let homeservers specify a whitelist for the format of 3PIDs that users are allowed to register with or add to their HS accounts. (PR #2813) - * Add /room/{id}/event/{id} to synapse (PR #2766) - * Add an admin route to get all the media in a room (PR #2818) Thanks to @turt2live! - * Add federation_domain_whitelist option (PR #2820,2821) +* Let homeservers specify a whitelist for the format of 3PIDs that users are allowed to register with or add to their HS accounts. (PR #2813) +* Add /room/{id}/event/{id} to synapse (PR #2766) +* Add an admin route to get all the media in a room (PR #2818) Thanks to @turt2live! +* Add federation_domain_whitelist option (PR #2820,2821) Changes: - * Major refactor to move computation out of the main process and into a series of worker processes with a view to taking advantage of multicore machines. New workers are:- - * Event Creation - * Push Actions - -With more to follow - - (PR #2892, #2893, #2894, #2896, #2897, #2898, #2899, #2900, #2901, #2902, #2903, #2904, #2913, #2920, #2921, #2922, #2923, #2924, #2925, #2926, #2947, #2847, #2854, #2872, #2873, #2874, #2928, #2929, #2934, #2856) - - * Use StateResolutionHandler to resolve state in persist_events (PR #2864, #2871, #2802, #2835, #2836, #2841, #2842, #2849) - * Adapt the default config to bind on both IPv4 and IPv6 on all platforms (PR #2435) Thanks to @silkeh! - * Allow use of higher versions of saml2 (PR #2695) Thanks to @okurz! - * Better logging when login can't find a 3pid (PR #2744) - * add ?ts massaging for ASes (PR #2754) - * Remove 'verbosity'/'log_file' from generated cfg (PR #2755) - * Make indentation of generated log config consistent (PR #2762) - * Remove dead code related to default thumbnails (PR #2764) - * Update http request metrics before calling servlet (PR #2770) - * Do bcrypt hashing in a background thread (PR #2773) - * When using synctl with workers, don't start the main synapse automatically (PR #2774) - * Make Counter render floats (PR #2778) - * Store state groups separately from events (PR #2784) - * Reorganise request and block metrics (PR #2785) - * Metrics for number of RDATA commands received (PR #2786) - * Metrics for events processed in appservice and fed sender (PR #2787) - * Optimise LoggingContext creation and copying (PR #2792) - * Track db txn time in millisecs (PR #2793) - * Track DB scheduling delay per-request (PR #2794, #2795) - * Sanity checking for user ids (PR #2797) - * better exception logging in callbackmetrics (PR #2809) - * Fix bugs in block metrics (PR #2810) - * Add some comments about the reactor tick time metric (PR #2816) - * Use a connection pool for the SimpleHttpClient (PR #2817) - * Remove unused/bitrotted MemoryDataStore (PR #2828) - * Make it possible to run tests against postgres (PR #2829) - * Factor out get_db_conn to HomeServer base class (PR #2830) - * Logging and metrics for the http pusher (PR #2833) - * Improve exception handling in persist_event (PR #2834) - * montoring metrics for number of cache evictions (PR #2844) - * Update pynacl dependency to 1.2.1 or higher (PR #2888) Thanks to @bachp! - * Remove ability for AS users to call /events and /sync (PR #2948) - * use bcrypt.checkpw (PR #2949) Thanks to @krombel! - * Factor run_in_background out from preserve_fn (PR #2961) - * Add a metric which increments when a request is received (PR #2965) - * Improve caching for read_marker API (PR #2927) +* Major refactor to move computation out of the main process and into a series of worker processes with a view to taking advantage of multicore machines. New workers are XXXXXX, (PR #2892, #2893, #2894, #2896, #2897, #2898, #2899, #2900, #2901, #2902, #2903, #2904, #2913, #2920, #2921, #2922, #2923, #2924, #2925, #2926, #2947, #2847, #2854, #2872, #2873, #2874, #2928, #2929, #2934, #2856) +* Use StateResolutionHandler to resolve state in persist_events (PR #2864, #2871, #2802, #2835, #2836, #2841, #2842, #2849) +* Adapt the default config to bind on both IPv4 and IPv6 on all platforms (PR #2435) Thanks to @silkeh! +* Allow use of higher versions of saml2 (PR #2695) Thanks to @okurz! +* Better logging when login can't find a 3pid (PR #2744) +* add ?ts massaging for ASes (PR #2754) +* Remove 'verbosity'/'log_file' from generated cfg (PR #2755) +* Make indentation of generated log config consistent (PR #2762) +* Remove dead code related to default thumbnails (PR #2764) +* Update http request metrics before calling servlet (PR #2770) +* Do bcrypt hashing in a background thread (PR #2773) +* When using synctl with workers, don't start the main synapse automatically (PR #2774) +* Make Counter render floats (PR #2778) +* Store state groups separately from events (PR #2784) +* Reorganise request and block metrics (PR #2785) +* Metrics for number of RDATA commands received (PR #2786) +* Metrics for events processed in appservice and fed sender (PR #2787) +* Optimise LoggingContext creation and copying (PR #2792) +* Track db txn time in millisecs (PR #2793) +* Track DB scheduling delay per-request (PR #2794, #2795) +* Sanity checking for user ids (PR #2797) +* better exception logging in callbackmetrics (PR #2809) +* Fix bugs in block metrics (PR #2810) +* Add some comments about the reactor tick time metric (PR #2816) +* Use a connection pool for the SimpleHttpClient (PR #2817) +* Remove unused/bitrotted MemoryDataStore (PR #2828) +* Make it possible to run tests against postgres (PR #2829) +* Factor out get_db_conn to HomeServer base class (PR #2830) +* Logging and metrics for the http pusher (PR #2833) +* Improve exception handling in persist_event (PR #2834) +* montoring metrics for number of cache evictions (PR #2844) +* Update pynacl dependency to 1.2.1 or higher (PR #2888) Thanks to @bachp! +* Remove ability for AS users to call /events and /sync (PR #2948) +* use bcrypt.checkpw (PR #2949) Thanks to @krombel! +* Factor run_in_background out from preserve_fn (PR #2961) +* Add a metric which increments when a request is received (PR #2965) +* Improve caching for read_marker API (PR #2927) Synapse 0.27.0 begins the process of rationalising metric names. To enable a graceful migration path, this release just adds new names for the metrics being renamed. A future release will remove the old ones. For more info see [here](https://github.com/matrix-org/synapse/blob/develop/docs/metrics-howto.rst#block-and-response-metrics-renamed-for-0270) @@ -80,22 +72,22 @@ Synapse 0.27.0 begins the process of rationalising metric names. To enable a gra Bug fixes: - * synapse/config/password_auth_providers: Fixed bracket typo (PR #2683) Thanks to @seckrv! - * Check missing fields in event_from_pdu_json (PR #2745) - * Fix templating error with unban permission message (PR #2761) Thanks to @turt2live! - * Fix flaky test_rooms UTs (PR #2765) - * Fix publicised groups GET API (singular) over federation (PR #2772) - * Fix a logcontext leak in persist_events (PR #2790) - * Fix 'NoneType' object has no attribute 'writeHeaders' (PR #2796) - * fix SQL when searching all users (PR #2803) - * Fix server 500 on public rooms call when no rooms exist (PR #2827) - * Fix SQL for user search (PR #2831) - * Fix sql error in quarantine_media (PR #2837) - * Handle url_previews with no content-type (PR #2845) - * Add missing yield during 3pid signature checks (PR #2933) - * Fix race in sync when joining room (PR #2944) - * Fix slow event search, switch back from GIST to GIN indexes (PR #2769, #2848) - * Fix scary-looking dns resolution errors (PR #2838) +* synapse/config/password_auth_providers: Fixed bracket typo (PR #2683) Thanks to @seckrv! +* Check missing fields in event_from_pdu_json (PR #2745) +* Fix templating error with unban permission message (PR #2761) Thanks to @turt2live! +* Fix flaky test_rooms UTs (PR #2765) +* Fix publicised groups GET API (singular) over federation (PR #2772) +* Fix a logcontext leak in persist_events (PR #2790) +* Fix 'NoneType' object has no attribute 'writeHeaders' (PR #2796) +* fix SQL when searching all users (PR #2803) +* Fix server 500 on public rooms call when no rooms exist (PR #2827) +* Fix SQL for user search (PR #2831) +* Fix sql error in quarantine_media (PR #2837) +* Handle url_previews with no content-type (PR #2845) +* Add missing yield during 3pid signature checks (PR #2933) +* Fix race in sync when joining room (PR #2944) +* Fix slow event search, switch back from GIST to GIN indexes (PR #2769, #2848) +* Fix scary-looking dns resolution errors (PR #2838) From fb647164f2a008cdba56577ad0f41c4b4c1147d1 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 14 Mar 2018 16:20:36 +0000 Subject: [PATCH 323/348] Update CHANGES.rst --- CHANGES.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 4d1b4ba13c..53a97a7697 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -14,10 +14,10 @@ Changes in synapse v0.27.0-rc1 (2018-03-14) Features: * Add support for a remote media repository backed by S3 (PR 2867, 2777, 2783, 2789, 2791, 2804, 2812,2814, 2857, 2868, 2767) * Add shiney new purge API. New implementation is:- - * Faster - * Supports clearing by timestamp - * Supports deleting of local events - * Is transactional (PR #2858,2867,2882, 2946,2962,2943) + * Faster + * Supports clearing by timestamp + * Supports deleting of local events + * Is transactional (PR #2858,2867,2882, 2946,2962,2943) * Let homeservers specify a whitelist for the format of 3PIDs that users are allowed to register with or add to their HS accounts. (PR #2813) * Add /room/{id}/event/{id} to synapse (PR #2766) From c33c1ceddd5da8195b38059dce31255209075ba2 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Wed, 14 Mar 2018 11:09:08 -0600 Subject: [PATCH 324/348] OCD: Make the event_creator routes regex a code block All the others are code blocks, so this one should be to (currently it is a blockquote). Signed-off-by: Travis Ralston --- docs/workers.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/workers.rst b/docs/workers.rst index dee04bbf3e..80f8d2181a 100644 --- a/docs/workers.rst +++ b/docs/workers.rst @@ -230,7 +230,7 @@ file. For example:: ``synapse.app.event_creator`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Handles non-state event creation. It can handle REST endpoints matching: +Handles non-state event creation. It can handle REST endpoints matching:: ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/send From 5ccb57d3ff6e3affd32cc796f625b2378483d871 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 14 Mar 2018 17:12:58 +0000 Subject: [PATCH 325/348] Update CHANGES.rst --- CHANGES.rst | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 53a97a7697..beac2117d3 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,16 +1,17 @@ Unreleased ========== + + +Changes in synapse v0.27.0-rc1 (2018-03-14) +=========================================== + synctl no longer starts the main synapse when using ``-a`` option with workers. A new worker file should be added with ``worker_app: synapse.app.homeserver``. This release also begins the process of renaming a number of the metrics reported to prometheus. See `docs/metrics-howto.rst `_. - -Changes in synapse v0.27.0-rc1 (2018-03-14) -=========================================== - Features: * Add support for a remote media repository backed by S3 (PR 2867, 2777, 2783, 2789, 2791, 2804, 2812,2814, 2857, 2868, 2767) * Add shiney new purge API. New implementation is:- @@ -27,7 +28,7 @@ Features: Changes: -* Major refactor to move computation out of the main process and into a series of worker processes with a view to taking advantage of multicore machines. New workers are XXXXXX, (PR #2892, #2893, #2894, #2896, #2897, #2898, #2899, #2900, #2901, #2902, #2903, #2904, #2913, #2920, #2921, #2922, #2923, #2924, #2925, #2926, #2947, #2847, #2854, #2872, #2873, #2874, #2928, #2929, #2934, #2856) +* Major refactor to move computation out of the main process and into a series of worker processes with a view to taking advantage of multicore machines. See more [here](https://github.com/matrix-org/synapse/blob/master/docs/workers.rst), (PR #2892-#2904, #2913, #2920 - #2926, #2947, #2847, #2854, #2872, #2873, #2874, #2928, #2929, #2934, #2856, #2976 - #2984, #2987 - #2989, #2991 - #2993, #2995) * Use StateResolutionHandler to resolve state in persist_events (PR #2864, #2871, #2802, #2835, #2836, #2841, #2842, #2849) * Adapt the default config to bind on both IPv4 and IPv6 on all platforms (PR #2435) Thanks to @silkeh! * Allow use of higher versions of saml2 (PR #2695) Thanks to @okurz! @@ -64,12 +65,9 @@ Changes: * Factor run_in_background out from preserve_fn (PR #2961) * Add a metric which increments when a request is received (PR #2965) * Improve caching for read_marker API (PR #2927) +* Add Measure block for persist_events (PR #2975) -Synapse 0.27.0 begins the process of rationalising metric names. To enable a graceful migration path, this release just adds new names for the metrics being renamed. A future release will remove the old ones. For more info see [here](https://github.com/matrix-org/synapse/blob/develop/docs/metrics-howto.rst#block-and-response-metrics-renamed-for-0270) - - - Bug fixes: * synapse/config/password_auth_providers: Fixed bracket typo (PR #2683) Thanks to @seckrv! From 10fdcf561d614235ba8f6ef3f980ac16ca0d7df7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 14 Mar 2018 17:30:17 +0000 Subject: [PATCH 326/348] Fix up rst formatting --- CHANGES.rst | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index beac2117d3..42fe461b24 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -13,12 +13,14 @@ This release also begins the process of renaming a number of the metrics reported to prometheus. See `docs/metrics-howto.rst `_. Features: + * Add support for a remote media repository backed by S3 (PR 2867, 2777, 2783, 2789, 2791, 2804, 2812,2814, 2857, 2868, 2767) -* Add shiney new purge API. New implementation is:- - * Faster - * Supports clearing by timestamp - * Supports deleting of local events - * Is transactional (PR #2858,2867,2882, 2946,2962,2943) +* Add shiney new purge API. New implementation is: + + * Faster + * Supports clearing by timestamp + * Supports deleting of local events + * Is transactional (PR #2858,2867,2882, 2946,2962,2943) * Let homeservers specify a whitelist for the format of 3PIDs that users are allowed to register with or add to their HS accounts. (PR #2813) * Add /room/{id}/event/{id} to synapse (PR #2766) @@ -28,7 +30,7 @@ Features: Changes: -* Major refactor to move computation out of the main process and into a series of worker processes with a view to taking advantage of multicore machines. See more [here](https://github.com/matrix-org/synapse/blob/master/docs/workers.rst), (PR #2892-#2904, #2913, #2920 - #2926, #2947, #2847, #2854, #2872, #2873, #2874, #2928, #2929, #2934, #2856, #2976 - #2984, #2987 - #2989, #2991 - #2993, #2995) +* Major refactor to move computation out of the main process and into a series of worker processes with a view to taking advantage of multicore machines. See more at `docs/workers.rst `_ (PR #2892-#2904, #2913, #2920 - #2926, #2947, #2847, #2854, #2872, #2873, #2874, #2928, #2929, #2934, #2856, #2976 - #2984, #2987 - #2989, #2991 - #2993, #2995) * Use StateResolutionHandler to resolve state in persist_events (PR #2864, #2871, #2802, #2835, #2836, #2841, #2842, #2849) * Adapt the default config to bind on both IPv4 and IPv6 on all platforms (PR #2435) Thanks to @silkeh! * Allow use of higher versions of saml2 (PR #2695) Thanks to @okurz! From 2059b8573f997e3210e0fa995d03f9168f8aeba6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 14 Mar 2018 18:11:21 +0000 Subject: [PATCH 327/348] Update CHANGES.rst --- CHANGES.rst | 86 +++++++++++++++-------------------------------------- 1 file changed, 24 insertions(+), 62 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 42fe461b24..38cd3bec65 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -14,80 +14,42 @@ reported to prometheus. See `docs/metrics-howto.rst `_ for full details (PR #2858, #2867, #2882, #2946, #2962, #2943) +* Add support for whitelisting 3PIDs that users can register. (PR #2813) +* Add ``/room/{id}/event/{id}`` API (PR #2766) +* Add an admin API to get all the media in a room (PR #2818) Thanks to @turt2live! +* Add ``federation_domain_whitelist`` option (PR #2820, #2821) Changes: -* Major refactor to move computation out of the main process and into a series of worker processes with a view to taking advantage of multicore machines. See more at `docs/workers.rst `_ (PR #2892-#2904, #2913, #2920 - #2926, #2947, #2847, #2854, #2872, #2873, #2874, #2928, #2929, #2934, #2856, #2976 - #2984, #2987 - #2989, #2991 - #2993, #2995) -* Use StateResolutionHandler to resolve state in persist_events (PR #2864, #2871, #2802, #2835, #2836, #2841, #2842, #2849) -* Adapt the default config to bind on both IPv4 and IPv6 on all platforms (PR #2435) Thanks to @silkeh! -* Allow use of higher versions of saml2 (PR #2695) Thanks to @okurz! -* Better logging when login can't find a 3pid (PR #2744) -* add ?ts massaging for ASes (PR #2754) -* Remove 'verbosity'/'log_file' from generated cfg (PR #2755) -* Make indentation of generated log config consistent (PR #2762) -* Remove dead code related to default thumbnails (PR #2764) -* Update http request metrics before calling servlet (PR #2770) -* Do bcrypt hashing in a background thread (PR #2773) +* Continue to factor out processing from main process and into worker processes. See updated `docs/workers.rst `_ (PR #2892 - #2904, #2913, #2920 - #2926, #2947, #2847, #2854, #2872, #2873, #2874, #2928, #2929, #2934, #2856, #2976 - #2984, #2987 - #2989, #2991 - #2993, #2995, #2784) +* Ensure state cache is used when persisting events (PR #2864, #2871, #2802, #2835, #2836, #2841, #2842, #2849) +* Change the default config to bind on both IPv4 and IPv6 on all platforms (PR #2435) Thanks to @silkeh! +* No longer require a specific version of saml2 (PR #2695) Thanks to @okurz! +* Remove ``verbosity``/``log_file`` from generated config (PR #2755) +* Add and improve metrics and logging (PR #2770, #2778, #2785, #2786, #2787, #2793, #2794, #2795, #2809, #2810, #2833, #2834, #2844, #2965, #2927, #2975, #2790, #2796, #2838) * When using synctl with workers, don't start the main synapse automatically (PR #2774) -* Make Counter render floats (PR #2778) -* Store state groups separately from events (PR #2784) -* Reorganise request and block metrics (PR #2785) -* Metrics for number of RDATA commands received (PR #2786) -* Metrics for events processed in appservice and fed sender (PR #2787) -* Optimise LoggingContext creation and copying (PR #2792) -* Track db txn time in millisecs (PR #2793) -* Track DB scheduling delay per-request (PR #2794, #2795) -* Sanity checking for user ids (PR #2797) -* better exception logging in callbackmetrics (PR #2809) -* Fix bugs in block metrics (PR #2810) -* Add some comments about the reactor tick time metric (PR #2816) -* Use a connection pool for the SimpleHttpClient (PR #2817) -* Remove unused/bitrotted MemoryDataStore (PR #2828) -* Make it possible to run tests against postgres (PR #2829) -* Factor out get_db_conn to HomeServer base class (PR #2830) -* Logging and metrics for the http pusher (PR #2833) -* Improve exception handling in persist_event (PR #2834) -* montoring metrics for number of cache evictions (PR #2844) +* Minor performance improvements (PR #2773, #2792) +* Use a connection pool for non-federation outbound connections (PR #2817) +* Make it possible to run unit tests against postgres (PR #2829) * Update pynacl dependency to 1.2.1 or higher (PR #2888) Thanks to @bachp! * Remove ability for AS users to call /events and /sync (PR #2948) -* use bcrypt.checkpw (PR #2949) Thanks to @krombel! -* Factor run_in_background out from preserve_fn (PR #2961) -* Add a metric which increments when a request is received (PR #2965) -* Improve caching for read_marker API (PR #2927) -* Add Measure block for persist_events (PR #2975) - +* Use bcrypt.checkpw (PR #2949) Thanks to @krombel! Bug fixes: -* synapse/config/password_auth_providers: Fixed bracket typo (PR #2683) Thanks to @seckrv! -* Check missing fields in event_from_pdu_json (PR #2745) -* Fix templating error with unban permission message (PR #2761) Thanks to @turt2live! -* Fix flaky test_rooms UTs (PR #2765) +* Fix broken ``ldap_config`` config option (PR #2683) Thanks to @seckrv! +* Fix error message when user is not allowed to unban (PR #2761) Thanks to @turt2live! * Fix publicised groups GET API (singular) over federation (PR #2772) -* Fix a logcontext leak in persist_events (PR #2790) -* Fix 'NoneType' object has no attribute 'writeHeaders' (PR #2796) -* fix SQL when searching all users (PR #2803) -* Fix server 500 on public rooms call when no rooms exist (PR #2827) -* Fix SQL for user search (PR #2831) -* Fix sql error in quarantine_media (PR #2837) -* Handle url_previews with no content-type (PR #2845) -* Add missing yield during 3pid signature checks (PR #2933) -* Fix race in sync when joining room (PR #2944) +* Fix user directory when using ``user_directory_search_all_users`` config option (PR #2803, #2831) +* Fix error on ``/publicRooms`` when no rooms exist (PR #2827) +* Fix bug in quarantine_media (PR #2837) +* Fix url_previews when no Content-Type is returned from URL (PR #2845) +* Fix rare race in sync API when joining room (PR #2944) * Fix slow event search, switch back from GIST to GIN indexes (PR #2769, #2848) -* Fix scary-looking dns resolution errors (PR #2838) From 7d26591048ac5ab6263ddb24a840c58edfd85f50 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Thu, 15 Mar 2018 10:33:24 +0000 Subject: [PATCH 328/348] Update CHANGES.rst --- CHANGES.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 38cd3bec65..a50d3ea5ba 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -6,12 +6,13 @@ Unreleased Changes in synapse v0.27.0-rc1 (2018-03-14) =========================================== -synctl no longer starts the main synapse when using ``-a`` option with workers. -A new worker file should be added with ``worker_app: synapse.app.homeserver``. +The common case for running Synapse is not to run separate workers, but for those that do, synctl no longer starts the main synapse when using ``-a`` option with workers. A new worker file should be added with ``worker_app: synapse.app.homeserver``. This release also begins the process of renaming a number of the metrics reported to prometheus. See `docs/metrics-howto.rst `_. +Note, old metrics will be removed from the 0.28.0 release. + Features: * Add ability for ASes to override message send time (PR #2754) From 7367a4a8239a04047fa648694c08f3d1c63dd3ee Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Thu, 15 Mar 2018 10:33:52 +0000 Subject: [PATCH 329/348] Update CHANGES.rst --- CHANGES.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index a50d3ea5ba..48791cec33 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -6,7 +6,7 @@ Unreleased Changes in synapse v0.27.0-rc1 (2018-03-14) =========================================== -The common case for running Synapse is not to run separate workers, but for those that do, synctl no longer starts the main synapse when using ``-a`` option with workers. A new worker file should be added with ``worker_app: synapse.app.homeserver``. +The common case for running Synapse is not to run separate workers, but for those that do, be aware that synctl no longer starts the main synapse when using ``-a`` option with workers. A new worker file should be added with ``worker_app: synapse.app.homeserver``. This release also begins the process of renaming a number of the metrics reported to prometheus. See `docs/metrics-howto.rst `_. From b29d1abab6ca42f693e3b83ab31440fee95d00f2 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Thu, 15 Mar 2018 10:34:15 +0000 Subject: [PATCH 330/348] Update CHANGES.rst --- CHANGES.rst | 5 ----- 1 file changed, 5 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 48791cec33..931ecb4d31 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,8 +1,3 @@ -Unreleased -========== - - - Changes in synapse v0.27.0-rc1 (2018-03-14) =========================================== From 068c21ab102b22aeac7fc9aa1f4d2d229a807872 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 15 Mar 2018 10:36:31 +0000 Subject: [PATCH 331/348] CHANGES.rst: reword metric deprecation --- CHANGES.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 931ecb4d31..e25327f28f 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -5,8 +5,7 @@ The common case for running Synapse is not to run separate workers, but for thos This release also begins the process of renaming a number of the metrics reported to prometheus. See `docs/metrics-howto.rst `_. - -Note, old metrics will be removed from the 0.28.0 release. +Note that the v0.28.0 release will remove the deprecated metric names. Features: From 0ad5125814dc18a79423740ac54f96e16a427758 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 15 Mar 2018 11:05:42 +0000 Subject: [PATCH 332/348] Update purge_history_api.rst clarify that `purge_history` will not purge state --- docs/admin_api/purge_history_api.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/admin_api/purge_history_api.rst b/docs/admin_api/purge_history_api.rst index ea2922da5c..2da833c827 100644 --- a/docs/admin_api/purge_history_api.rst +++ b/docs/admin_api/purge_history_api.rst @@ -16,9 +16,11 @@ including an ``access_token`` of a server admin. By default, events sent by local users are not deleted, as they may represent the only copies of this content in existence. (Events sent by remote users are -deleted, and room state data before the cutoff is always removed). +deleted.) -To delete local events as well, set ``delete_local_events`` in the body: +Room state data (such as joins, leaves, topic) is always preserved. + +To delete local message events as well, set ``delete_local_events`` in the body: .. code:: json From 5ea624b0f5f6ad11b1640729b96272f8e7c94d46 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 15 Mar 2018 11:48:35 +0000 Subject: [PATCH 333/348] CONTRIBUTING.rst: fix CI info --- CONTRIBUTING.rst | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 2a88647ca3..c6ee16efc7 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -30,8 +30,12 @@ use github's pull request workflow to review the contribution, and either ask you to make any refinements needed or merge it and make them ourselves. The changes will then land on master when we next do a release. -We use Jenkins for continuous integration (http://matrix.org/jenkins), and -typically all pull requests get automatically tested Jenkins: if your change breaks the build, Jenkins will yell about it in #matrix-dev:matrix.org so please lurk there and keep an eye open. +We use `Jenkins `_ and +`Travis `_ for continuous +integration. All pull requests to synapse get automatically tested by Travis; +the Jenkins builds require an adminstrator to start them. If your change +breaks the build, this will be shown in github, so please keep an eye on the +pull request for feedback. Code style ~~~~~~~~~~ @@ -115,4 +119,4 @@ can't be accepted. Git makes this trivial - just use the -s flag when you do Conclusion ~~~~~~~~~~ -That's it! Matrix is a very open and collaborative project as you might expect given our obsession with open communication. If we're going to successfully matrix together all the fragmented communication technologies out there we are reliant on contributions and collaboration from the community to do so. So please get involved - and we hope you have as much fun hacking on Matrix as we do! \ No newline at end of file +That's it! Matrix is a very open and collaborative project as you might expect given our obsession with open communication. If we're going to successfully matrix together all the fragmented communication technologies out there we are reliant on contributions and collaboration from the community to do so. So please get involved - and we hope you have as much fun hacking on Matrix as we do! From ddb00efc1ddec646d02e8def6053003f04d077d7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Mar 2018 14:41:30 +0000 Subject: [PATCH 334/348] Bump version number --- synapse/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/__init__.py b/synapse/__init__.py index ef8853bd24..d72d88b47d 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -16,4 +16,4 @@ """ This is a reference implementation of a Matrix home server. """ -__version__ = "0.26.0" +__version__ = "0.27.0-rc1" From 7c7706f42b56dd61f5eb17679aa12247f7058ed5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Mar 2018 15:40:13 +0000 Subject: [PATCH 335/348] Fix bug where state cache used lots of memory The state cache bases its size on the sum of the size of entries. The size of the entry is calculated once on insertion, so it is important that the size of entries does not change. The DictionaryCache modified the entries size, which caused the state cache to incorrectly think it was smaller than it actually was. --- synapse/util/caches/dictionary_cache.py | 6 +++++- synapse/util/caches/lrucache.py | 8 ++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py index d4105822b3..1709e8b429 100644 --- a/synapse/util/caches/dictionary_cache.py +++ b/synapse/util/caches/dictionary_cache.py @@ -132,9 +132,13 @@ class DictionaryCache(object): self._update_or_insert(key, value, known_absent) def _update_or_insert(self, key, value, known_absent): - entry = self.cache.setdefault(key, DictionaryEntry(False, set(), {})) + # We pop and reinsert as we need to tell the cache the size may have + # changed + + entry = self.cache.pop(key, DictionaryEntry(False, set(), {})) entry.value.update(value) entry.known_absent.update(known_absent) + self.cache[key] = entry def _insert(self, key, value, known_absent): self.cache[key] = DictionaryEntry(True, known_absent, value) diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py index f088dd430e..a4bf8fa6ae 100644 --- a/synapse/util/caches/lrucache.py +++ b/synapse/util/caches/lrucache.py @@ -154,14 +154,14 @@ class LruCache(object): def cache_set(key, value, callbacks=[]): node = cache.get(key, None) if node is not None: - if value != node.value: + if node.callbacks and value != node.value: for cb in node.callbacks: cb() node.callbacks.clear() - if size_callback: - cached_cache_len[0] -= size_callback(node.value) - cached_cache_len[0] += size_callback(value) + if size_callback: + cached_cache_len[0] -= size_callback(node.value) + cached_cache_len[0] += size_callback(value) node.callbacks.update(callbacks) From a8ce159be43560e9aea8f3be65110eea49d1f50e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Mar 2018 23:38:43 +0000 Subject: [PATCH 336/348] Replace some ujson with simplejson to make it work --- synapse/http/server.py | 3 ++- synapse/rest/client/v2_alpha/sync.py | 2 +- synapse/storage/events.py | 2 +- synapse/storage/events_worker.py | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/synapse/http/server.py b/synapse/http/server.py index 4b567215c8..3c7a0ef97a 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -38,6 +38,7 @@ import collections import logging import urllib import ujson +import simplejson logger = logging.getLogger(__name__) @@ -462,7 +463,7 @@ def respond_with_json(request, code, json_object, send_cors=False, json_bytes = encode_canonical_json(json_object) else: # ujson doesn't like frozen_dicts. - json_bytes = ujson.dumps(json_object, ensure_ascii=False) + json_bytes = simplejson.dumps(json_object) return respond_with_json_bytes( request, code, json_bytes, diff --git a/synapse/rest/client/v2_alpha/sync.py b/synapse/rest/client/v2_alpha/sync.py index a0a8e4b8e4..eb91c0b293 100644 --- a/synapse/rest/client/v2_alpha/sync.py +++ b/synapse/rest/client/v2_alpha/sync.py @@ -33,7 +33,7 @@ from ._base import set_timeline_upper_limit import itertools import logging -import ujson as json +import simplejson as json logger = logging.getLogger(__name__) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 3890878170..9fc65229fd 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -38,7 +38,7 @@ from functools import wraps import synapse.metrics import logging -import ujson as json +import simplejson as json # these are only included to make the type annotations work from synapse.events import EventBase # noqa: F401 diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index 86c3b48ad4..2e23dd78ba 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -28,7 +28,7 @@ from synapse.api.errors import SynapseError from collections import namedtuple import logging -import ujson as json +import simplejson as json # these are only included to make the type annotations work from synapse.events import EventBase # noqa: F401 From 38f952b9bc96cf72d96bab3510e8f428a8247542 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 19 Mar 2018 09:27:36 +0000 Subject: [PATCH 337/348] spell out not to massively increase bcrypt rounds --- synapse/config/registration.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/synapse/config/registration.py b/synapse/config/registration.py index 336959094b..c5384b3ad4 100644 --- a/synapse/config/registration.py +++ b/synapse/config/registration.py @@ -77,7 +77,9 @@ class RegistrationConfig(Config): # Set the number of bcrypt rounds used to generate password hash. # Larger numbers increase the work factor needed to generate the hash. - # The default number of rounds is 12. + # The default number is 12 (which equates to 2^12 rounds). + # N.B. that increasing this will exponentially increase the time required + # to register or login - e.g. 24 => 2^24 rounds which will take >20 mins. bcrypt_rounds: 12 # Allows users to register as guests without a password/email/etc, and From 9a0d783c113ae74c55e409d33219cd77f3662b9f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 19 Mar 2018 11:35:53 +0000 Subject: [PATCH 338/348] Add comments --- synapse/util/caches/lrucache.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py index a4bf8fa6ae..1c5a982094 100644 --- a/synapse/util/caches/lrucache.py +++ b/synapse/util/caches/lrucache.py @@ -154,11 +154,18 @@ class LruCache(object): def cache_set(key, value, callbacks=[]): node = cache.get(key, None) if node is not None: + # We sometimes store large objects, e.g. dicts, which cause + # the inequality check to take a long time. So let's only do + # the check if we have some callbacks to call. if node.callbacks and value != node.value: for cb in node.callbacks: cb() node.callbacks.clear() + # We don't bother to protect this by value != node.value as + # generally size_callback will be cheap compared with equality + # checks. (For example, taking the size of two dicts is quicker + # than comparing them for equality.) if size_callback: cached_cache_len[0] -= size_callback(node.value) cached_cache_len[0] += size_callback(value) From 1a3aa957ca1164268c9bbbd564491c9d46fbb7d2 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Mon, 19 Mar 2018 15:11:00 +0000 Subject: [PATCH 339/348] Update CHANGES.rst --- CHANGES.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index e25327f28f..55358287a0 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,3 +1,10 @@ +Changes in synapse v0.27.0-rc2 (2018-03-19) +=========================================== +Bugs: + +* Fix bug introduced in v0.27.0-rc1 that causes much increased memory usage in state cache (PR #3005) + + Changes in synapse v0.27.0-rc1 (2018-03-14) =========================================== From c384705ee877ba40e1a736b68ebe07434662cbaa Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Mon, 19 Mar 2018 15:11:58 +0000 Subject: [PATCH 340/348] Update __init__.py bump version --- synapse/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/__init__.py b/synapse/__init__.py index d72d88b47d..71b409ba8b 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -16,4 +16,4 @@ """ This is a reference implementation of a Matrix home server. """ -__version__ = "0.27.0-rc1" +__version__ = "0.27.0-rc2" From 610accbb7f58d7a07007dedaa054c8ad7a9f5851 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 19 Mar 2018 16:06:02 +0000 Subject: [PATCH 341/348] Fix replication after switch to simplejson Turns out that simplejson serialises namedtuple's as dictionaries rather than tuples by default. --- synapse/replication/tcp/commands.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/synapse/replication/tcp/commands.py b/synapse/replication/tcp/commands.py index 9633404f73..1b4b657210 100644 --- a/synapse/replication/tcp/commands.py +++ b/synapse/replication/tcp/commands.py @@ -107,7 +107,7 @@ class RdataCommand(Command): return " ".join(( self.stream_name, str(self.token) if self.token is not None else "batch", - json.dumps(self.row), + json.dumps(self.row, namedtuple_as_object=False), )) @@ -301,7 +301,9 @@ class InvalidateCacheCommand(Command): return cls(cache_func, json.loads(keys_json)) def to_line(self): - return " ".join((self.cache_func, json.dumps(self.keys))) + return " ".join(( + self.cache_func, json.dumps(self.keys, namedtuple_as_object=False) + )) class UserIpCommand(Command): From 9aa5a0af514b3f2719dd01aa221039c5ba1c0c80 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 20 Mar 2018 09:58:13 +0000 Subject: [PATCH 342/348] Explicitly use simplejson --- synapse/replication/tcp/commands.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/synapse/replication/tcp/commands.py b/synapse/replication/tcp/commands.py index 1b4b657210..0005ad5879 100644 --- a/synapse/replication/tcp/commands.py +++ b/synapse/replication/tcp/commands.py @@ -19,7 +19,7 @@ allowed to be sent by which side. """ import logging -import simplejson as json +import simplejson logger = logging.getLogger(__name__) @@ -100,14 +100,14 @@ class RdataCommand(Command): return cls( stream_name, None if token == "batch" else int(token), - json.loads(row_json) + simplejson.loads(row_json) ) def to_line(self): return " ".join(( self.stream_name, str(self.token) if self.token is not None else "batch", - json.dumps(self.row, namedtuple_as_object=False), + simplejson.dumps(self.row, namedtuple_as_object=False), )) @@ -298,11 +298,11 @@ class InvalidateCacheCommand(Command): def from_line(cls, line): cache_func, keys_json = line.split(" ", 1) - return cls(cache_func, json.loads(keys_json)) + return cls(cache_func, simplejson.loads(keys_json)) def to_line(self): return " ".join(( - self.cache_func, json.dumps(self.keys, namedtuple_as_object=False) + self.cache_func, simplejson.dumps(self.keys, namedtuple_as_object=False) )) @@ -327,14 +327,14 @@ class UserIpCommand(Command): def from_line(cls, line): user_id, jsn = line.split(" ", 1) - access_token, ip, user_agent, device_id, last_seen = json.loads(jsn) + access_token, ip, user_agent, device_id, last_seen = simplejson.loads(jsn) return cls( user_id, access_token, ip, user_agent, device_id, last_seen ) def to_line(self): - return self.user_id + " " + json.dumps(( + return self.user_id + " " + simplejson.dumps(( self.access_token, self.ip, self.user_agent, self.device_id, self.last_seen, )) From f5aa027c2f9d836df7b98da6c1c5fee6b22d0a3d Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Tue, 20 Mar 2018 15:06:22 +0000 Subject: [PATCH 343/348] Update CHANGES.rst rearrange ordering of releases to match chronology --- CHANGES.rst | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 788150bcfc..bac78d3c1c 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,10 +1,21 @@ Changes in synapse v0.27.0-rc2 (2018-03-19) =========================================== -Bugs: + +Pulls in v0.26.1 + +Bug fixes: * Fix bug introduced in v0.27.0-rc1 that causes much increased memory usage in state cache (PR #3005) + + +Changes in synapse v0.26.1 (2018-03-15) +======================================= + +Bug fixes: + * Fix bug where an invalid event caused server to stop functioning correctly, - due to parsing and serializing bugs in ujson library. (pulled in from 0.26.1) + due to parsing and serializing bugs in ujson library (PR #3008) + Changes in synapse v0.27.0-rc1 (2018-03-14) =========================================== @@ -55,16 +66,6 @@ Bug fixes: * Fix slow event search, switch back from GIST to GIN indexes (PR #2769, #2848) -Changes in synapse v0.26.1 (2018-03-15) -======================================= - -Bug fixes: - -* Fix bug where an invalid event caused server to stop functioning correctly, - due to parsing and serializing bugs in ujson library. - - - Changes in synapse v0.26.0 (2018-01-05) ======================================= From 51406dab96cb3a4de2fad02a31984cfcb613f809 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Mon, 26 Mar 2018 14:48:19 +0100 Subject: [PATCH 344/348] version bump --- CHANGES.rst | 6 ++++++ synapse/__init__.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index bac78d3c1c..92c19b13e9 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,3 +1,9 @@ +Changes in synapse v0.27.0 (2018-03-26) +======================================= + +No changes since v0.27.0-rc2 + + Changes in synapse v0.27.0-rc2 (2018-03-19) =========================================== diff --git a/synapse/__init__.py b/synapse/__init__.py index 71b409ba8b..70a0297052 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -16,4 +16,4 @@ """ This is a reference implementation of a Matrix home server. """ -__version__ = "0.27.0-rc2" +__version__ = "0.27.0" From 91878620027d17b478ca678482f501d3cfb2edaf Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 26 Mar 2018 16:19:38 +0100 Subject: [PATCH 345/348] Bump version and changelog --- CHANGES.rst | 6 ++++++ synapse/__init__.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 92c19b13e9..e5e8e31a1a 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,3 +1,9 @@ +Changes in synapse v0.27.1 (2018-03-19) +======================================= + +Meta release as v0.27.0 temporarily pointed to the wrong commit + + Changes in synapse v0.27.0 (2018-03-26) ======================================= diff --git a/synapse/__init__.py b/synapse/__init__.py index 70a0297052..2cb6ef4652 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -16,4 +16,4 @@ """ This is a reference implementation of a Matrix home server. """ -__version__ = "0.27.0" +__version__ = "0.27.1" From 01f72e2fc7379db7d02d5bc1eae96167aa320389 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 26 Mar 2018 16:21:26 +0100 Subject: [PATCH 346/348] Fix date --- CHANGES.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index e5e8e31a1a..ddef8c205b 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,4 +1,4 @@ -Changes in synapse v0.27.1 (2018-03-19) +Changes in synapse v0.27.1 (2018-03-26) ======================================= Meta release as v0.27.0 temporarily pointed to the wrong commit From a9d7d98d3f87cc20d94c9cc4ee9285e6a2dddb2c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 26 Mar 2018 16:36:53 +0100 Subject: [PATCH 347/348] Bum version and changelog --- CHANGES.rst | 8 ++++++++ synapse/__init__.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index ddef8c205b..1372de4246 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,3 +1,11 @@ +Changes in synapse v0.27.2 (2018-03-26) +======================================= + +Bug fixes: + +* Fix bug which broke TCP replication between workers (PR #3015) + + Changes in synapse v0.27.1 (2018-03-26) ======================================= diff --git a/synapse/__init__.py b/synapse/__init__.py index 2cb6ef4652..a9d5198aba 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -16,4 +16,4 @@ """ This is a reference implementation of a Matrix home server. """ -__version__ = "0.27.1" +__version__ = "0.27.2" From 16aeb415478f32185679ba5ff6c89d2cd3242861 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Wed, 28 Mar 2018 16:47:56 +0100 Subject: [PATCH 348/348] Update README.rst update docker hub url --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index ef27b92ddd..8812cc1b4f 100644 --- a/README.rst +++ b/README.rst @@ -157,8 +157,8 @@ if you prefer. In case of problems, please see the _`Troubleshooting` section below. -Alternatively, Silvio Fricke has contributed a Dockerfile to automate the -above in Docker at https://registry.hub.docker.com/u/silviof/docker-matrix/. +Alternatively, Andreas Peters (previously Silvio Fricke) has contributed a Dockerfile to automate the +above in Docker at https://hub.docker.com/r/avhost/docker-matrix/tags/ Also, Martin Giess has created an auto-deployment process with vagrant/ansible, tested with VirtualBox/AWS/DigitalOcean - see https://github.com/EMnify/matrix-synapse-auto-deploy