diff --git a/.gitignore b/.gitignore index 7940158c5b..9f42a7568f 100644 --- a/.gitignore +++ b/.gitignore @@ -43,6 +43,7 @@ media_store/ build/ venv/ +venv*/ localhost-800*/ static/client/register/register_config.js diff --git a/.travis.yml b/.travis.yml index e6ba6f4752..a98d547978 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,7 +4,12 @@ language: python # tell travis to cache ~/.cache/pip cache: pip +before_script: + - git remote set-branches --add origin develop + - git fetch origin develop + matrix: + fast_finish: true include: - python: 2.7 env: TOX_ENV=packaging @@ -14,10 +19,13 @@ matrix: - python: 2.7 env: TOX_ENV=py27 - + - python: 3.6 env: TOX_ENV=py36 + - python: 3.6 + env: TOX_ENV=check-newsfragment + install: - pip install tox diff --git a/CHANGES.rst b/CHANGES.rst index 70fc5af4c1..ba6929c435 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,3 +1,72 @@ +Synapse 0.32.2 (2018-07-07) +=========================== + +Bugfixes +-------- + +- Amend the Python dependencies to depend on attrs from PyPI, not attr (`#3492 <https://github.com/matrix-org/synapse/issues/3492>`_) + + +Synapse 0.32.1 (2018-07-06) +=========================== + +Bugfixes +-------- + +- Add explicit dependency on netaddr (`#3488 <https://github.com/matrix-org/synapse/issues/3488>`_) + + +Changes in synapse v0.32.0 (2018-07-06) +=========================================== +No changes since 0.32.0rc1 + +Synapse 0.32.0rc1 (2018-07-05) +============================== + +Features +-------- + +- Add blacklist & whitelist of servers allowed to send events to a room via ``m.room.server_acl`` event. +- Cache factor override system for specific caches (`#3334 <https://github.com/matrix-org/synapse/issues/3334>`_) +- Add metrics to track appservice transactions (`#3344 <https://github.com/matrix-org/synapse/issues/3344>`_) +- Try to log more helpful info when a sig verification fails (`#3372 <https://github.com/matrix-org/synapse/issues/3372>`_) +- Synapse now uses the best performing JSON encoder/decoder according to your runtime (simplejson on CPython, stdlib json on PyPy). (`#3462 <https://github.com/matrix-org/synapse/issues/3462>`_) +- Add optional ip_range_whitelist param to AS registration files to lock AS IP access (`#3465 <https://github.com/matrix-org/synapse/issues/3465>`_) +- Reject invalid server names in federation requests (`#3480 <https://github.com/matrix-org/synapse/issues/3480>`_) +- Reject invalid server names in homeserver.yaml (`#3483 <https://github.com/matrix-org/synapse/issues/3483>`_) + + +Bugfixes +-------- + +- Strip access_token from outgoing requests (`#3327 <https://github.com/matrix-org/synapse/issues/3327>`_) +- Redact AS tokens in logs (`#3349 <https://github.com/matrix-org/synapse/issues/3349>`_) +- Fix federation backfill from SQLite servers (`#3355 <https://github.com/matrix-org/synapse/issues/3355>`_) +- Fix event-purge-by-ts admin API (`#3363 <https://github.com/matrix-org/synapse/issues/3363>`_) +- Fix event filtering in get_missing_events handler (`#3371 <https://github.com/matrix-org/synapse/issues/3371>`_) +- Synapse is now stricter regarding accepting events which it cannot retrieve the prev_events for. (`#3456 <https://github.com/matrix-org/synapse/issues/3456>`_) +- Fix bug where synapse would explode when receiving unicode in HTTP User-Agent header (`#3470 <https://github.com/matrix-org/synapse/issues/3470>`_) +- Invalidate cache on correct thread to avoid race (`#3473 <https://github.com/matrix-org/synapse/issues/3473>`_) + + +Improved Documentation +---------------------- + +- ``doc/postgres.rst``: fix display of the last command block. Thanks to @ArchangeGabriel! (`#3340 <https://github.com/matrix-org/synapse/issues/3340>`_) + + +Deprecations and Removals +------------------------- + +- Remove was_forgotten_at (`#3324 <https://github.com/matrix-org/synapse/issues/3324>`_) + + +Misc +---- + +- `#3332 <https://github.com/matrix-org/synapse/issues/3332>`_, `#3341 <https://github.com/matrix-org/synapse/issues/3341>`_, `#3347 <https://github.com/matrix-org/synapse/issues/3347>`_, `#3348 <https://github.com/matrix-org/synapse/issues/3348>`_, `#3356 <https://github.com/matrix-org/synapse/issues/3356>`_, `#3385 <https://github.com/matrix-org/synapse/issues/3385>`_, `#3446 <https://github.com/matrix-org/synapse/issues/3446>`_, `#3447 <https://github.com/matrix-org/synapse/issues/3447>`_, `#3467 <https://github.com/matrix-org/synapse/issues/3467>`_, `#3474 <https://github.com/matrix-org/synapse/issues/3474>`_ + + Changes in synapse v0.31.2 (2018-06-14) ======================================= diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index c6ee16efc7..6c295cfbfe 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -48,6 +48,26 @@ Please ensure your changes match the cosmetic style of the existing project, and **never** mix cosmetic and functional changes in the same commit, as it makes it horribly hard to review otherwise. +Changelog +~~~~~~~~~ + +All changes, even minor ones, need a corresponding changelog +entry. These are managed by Towncrier +(https://github.com/hawkowl/towncrier). + +To create a changelog entry, make a new file in the ``changelog.d`` +file named in the format of ``issuenumberOrPR.type``. The type can be +one of ``feature``, ``bugfix``, ``removal`` (also used for +deprecations), or ``misc`` (for internal-only changes). The content of +the file is your changelog entry, which can contain RestructuredText +formatting. A note of contributors is welcomed in changelogs for +non-misc changes (the content of misc changes is not displayed). + +For example, a fix for a bug reported in #1234 would have its +changelog entry in ``changelog.d/1234.bugfix``, and contain content +like "The security levels of Florbs are now validated when +recieved over federation. Contributed by Jane Matrix". + Attribution ~~~~~~~~~~~ @@ -110,11 +130,15 @@ If you agree to this for your contribution, then all that's needed is to include the line in your commit or pull request comment:: Signed-off-by: Your Name <your@email.example.org> - -...using your real name; unfortunately pseudonyms and anonymous contributions -can't be accepted. Git makes this trivial - just use the -s flag when you do -``git commit``, having first set ``user.name`` and ``user.email`` git configs -(which you should have done anyway :) + +We accept contributions under a legally identifiable name, such as +your name on government documentation or common-law names (names +claimed by legitimate usage or repute). Unfortunately, we cannot +accept anonymous contributions at this time. + +Git allows you to add this signoff automatically when using the ``-s`` +flag to ``git commit``, which uses the name and email set in your +``user.name`` and ``user.email`` git configs. Conclusion ~~~~~~~~~~ diff --git a/MANIFEST.in b/MANIFEST.in index e2a6623a63..97f57f443f 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -29,5 +29,8 @@ exclude Dockerfile exclude .dockerignore recursive-exclude jenkins *.sh +include pyproject.toml +recursive-include changelog.d * + prune .github prune demo/etc diff --git a/changelog.d/.gitignore b/changelog.d/.gitignore new file mode 100644 index 0000000000..f935021a8f --- /dev/null +++ b/changelog.d/.gitignore @@ -0,0 +1 @@ +!.gitignore diff --git a/docs/admin_api/user_admin_api.rst b/docs/admin_api/user_admin_api.rst index 1c9c5a6bde..d17121a188 100644 --- a/docs/admin_api/user_admin_api.rst +++ b/docs/admin_api/user_admin_api.rst @@ -44,13 +44,26 @@ Deactivate Account This API deactivates an account. It removes active access tokens, resets the password, and deletes third-party IDs (to prevent the user requesting a -password reset). +password reset). It can also mark the user as GDPR-erased (stopping their data +from distributed further, and deleting it entirely if there are no other +references to it). The api is:: POST /_matrix/client/r0/admin/deactivate/<user_id> -including an ``access_token`` of a server admin, and an empty request body. +with a body of: + +.. code:: json + + { + "erase": true + } + +including an ``access_token`` of a server admin. + +The erase parameter is optional and defaults to 'false'. +An empty body may be passed for backwards compatibility. Reset password diff --git a/docs/postgres.rst b/docs/postgres.rst index 296293e859..2377542296 100644 --- a/docs/postgres.rst +++ b/docs/postgres.rst @@ -9,19 +9,19 @@ Set up database Assuming your PostgreSQL database user is called ``postgres``, create a user ``synapse_user`` with:: - su - postgres - createuser --pwprompt synapse_user + su - postgres + createuser --pwprompt synapse_user The PostgreSQL database used *must* have the correct encoding set, otherwise it would not be able to store UTF8 strings. To create a database with the correct encoding use, e.g.:: - CREATE DATABASE synapse - ENCODING 'UTF8' - LC_COLLATE='C' - LC_CTYPE='C' - template=template0 - OWNER synapse_user; + CREATE DATABASE synapse + ENCODING 'UTF8' + LC_COLLATE='C' + LC_CTYPE='C' + template=template0 + OWNER synapse_user; This would create an appropriate database named ``synapse`` owned by the ``synapse_user`` user (which must already exist). @@ -126,7 +126,7 @@ run:: --postgres-config homeserver-postgres.yaml Once that has completed, change the synapse config to point at the PostgreSQL -database configuration file ``homeserver-postgres.yaml``: +database configuration file ``homeserver-postgres.yaml``:: ./synctl stop mv homeserver.yaml homeserver-old-sqlite.yaml diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000000..d1603b5d8b --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,5 @@ +[tool.towncrier] + package = "synapse" + filename = "CHANGES.rst" + directory = "changelog.d" + issue_format = "`#{issue} <https://github.com/matrix-org/synapse/issues/{issue}>`_" diff --git a/scripts-dev/federation_client.py b/scripts-dev/federation_client.py index 3b28417376..d2acc7654d 100755 --- a/scripts-dev/federation_client.py +++ b/scripts-dev/federation_client.py @@ -18,14 +18,22 @@ from __future__ import print_function import argparse +from urlparse import urlparse, urlunparse + import nacl.signing import json import base64 import requests import sys + +from requests.adapters import HTTPAdapter import srvlookup import yaml +# uncomment the following to enable debug logging of http requests +#from httplib import HTTPConnection +#HTTPConnection.debuglevel = 1 + def encode_base64(input_bytes): """Encode bytes as a base64 string without any padding.""" @@ -113,17 +121,6 @@ def read_signing_keys(stream): return keys -def lookup(destination, path): - if ":" in destination: - return "https://%s%s" % (destination, path) - else: - try: - srv = srvlookup.lookup("matrix", "tcp", destination)[0] - return "https://%s:%d%s" % (srv.host, srv.port, path) - except: - return "https://%s:%d%s" % (destination, 8448, path) - - def request_json(method, origin_name, origin_key, destination, path, content): if method is None: if content is None: @@ -152,13 +149,19 @@ def request_json(method, origin_name, origin_key, destination, path, content): authorization_headers.append(bytes(header)) print ("Authorization: %s" % header, file=sys.stderr) - dest = lookup(destination, path) + dest = "matrix://%s%s" % (destination, path) print ("Requesting %s" % dest, file=sys.stderr) - result = requests.request( + s = requests.Session() + s.mount("matrix://", MatrixConnectionAdapter()) + + result = s.request( method=method, url=dest, - headers={"Authorization": authorization_headers[0]}, + headers={ + "Host": destination, + "Authorization": authorization_headers[0] + }, verify=False, data=content, ) @@ -242,5 +245,39 @@ def read_args_from_config(args): args.signing_key_path = config['signing_key_path'] +class MatrixConnectionAdapter(HTTPAdapter): + @staticmethod + def lookup(s): + if s[-1] == ']': + # ipv6 literal (with no port) + return s, 8448 + + if ":" in s: + out = s.rsplit(":",1) + try: + port = int(out[1]) + except ValueError: + raise ValueError("Invalid host:port '%s'" % s) + return out[0], port + + try: + srv = srvlookup.lookup("matrix", "tcp", s)[0] + return srv.host, srv.port + except: + return s, 8448 + + def get_connection(self, url, proxies=None): + parsed = urlparse(url) + + (host, port) = self.lookup(parsed.netloc) + netloc = "%s:%d" % (host, port) + print("Connecting to %s" % (netloc,), file=sys.stderr) + url = urlunparse(( + "https", netloc, parsed.path, parsed.params, parsed.query, + parsed.fragment, + )) + return super(MatrixConnectionAdapter, self).get_connection(url, proxies) + + if __name__ == "__main__": main() diff --git a/setup.cfg b/setup.cfg index da8eafbb39..fa6f2d1ce4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -17,4 +17,5 @@ ignore = [flake8] max-line-length = 90 # W503 requires that binary operators be at the end, not start, of lines. Erik doesn't like it. -ignore = W503 +# E203 is contrary to PEP8. +ignore = W503,E203 diff --git a/synapse/__init__.py b/synapse/__init__.py index faa183a99e..3cde33c0d7 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -17,4 +17,4 @@ """ This is a reference implementation of a Matrix home server. """ -__version__ = "0.31.2" +__version__ = "0.32.2" diff --git a/synapse/api/auth.py b/synapse/api/auth.py index 66639b0089..088b4e8b6d 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -19,6 +19,7 @@ from six import itervalues import pymacaroons from twisted.internet import defer +from netaddr import IPAddress import synapse.types from synapse import event_auth @@ -244,6 +245,11 @@ class Auth(object): if app_service is None: defer.returnValue((None, None)) + if app_service.ip_range_whitelist: + ip_address = IPAddress(self.hs.get_ip_from_request(request)) + if ip_address not in app_service.ip_range_whitelist: + defer.returnValue((None, None)) + if "user_id" not in request.args: defer.returnValue((app_service.sender, app_service)) @@ -488,7 +494,7 @@ class Auth(object): def _look_up_user_by_access_token(self, token): ret = yield self.store.get_user_by_access_token(token) if not ret: - logger.warn("Unrecognised access token - not in store: %s" % (token,)) + logger.warn("Unrecognised access token - not in store.") raise AuthError( self.TOKEN_NOT_FOUND_HTTP_STATUS, "Unrecognised access token.", errcode=Codes.UNKNOWN_TOKEN @@ -511,7 +517,7 @@ class Auth(object): ) service = self.store.get_app_service_by_token(token) if not service: - logger.warn("Unrecognised appservice access token: %s" % (token,)) + logger.warn("Unrecognised appservice access token.") raise AuthError( self.TOKEN_NOT_FOUND_HTTP_STATUS, "Unrecognised access token.", diff --git a/synapse/api/constants.py b/synapse/api/constants.py index 92b5790557..00aa622b34 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -78,6 +78,8 @@ class EventTypes(object): Topic = "m.room.topic" Name = "m.room.name" + ServerACL = "m.room.server_acl" + class RejectedReason(object): AUTH_ERROR = "auth_error" diff --git a/synapse/api/errors.py b/synapse/api/errors.py index e6ad3768f0..227a0713b2 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -17,7 +17,8 @@ import logging -import simplejson as json +from canonicaljson import json + from six import iteritems from six.moves import http_client diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index dbc0e7e445..aae25e7a47 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -17,7 +17,8 @@ from synapse.storage.presence import UserPresenceState from synapse.types import UserID, RoomID from twisted.internet import defer -import simplejson as json +from canonicaljson import json + import jsonschema from jsonschema import FormatChecker diff --git a/synapse/app/appservice.py b/synapse/app/appservice.py index dd114dee07..4319ddce03 100644 --- a/synapse/app/appservice.py +++ b/synapse/app/appservice.py @@ -23,6 +23,7 @@ from synapse.config._base import ConfigError from synapse.config.homeserver import HomeServerConfig from synapse.config.logger import setup_logging from synapse.http.site import SynapseSite +from synapse.metrics import RegistryProxy from synapse.metrics.resource import METRICS_PREFIX, MetricsResource from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore from synapse.replication.slave.storage.directory import DirectoryStore @@ -62,7 +63,7 @@ class AppserviceServer(HomeServer): for res in listener_config["resources"]: for name in res["names"]: if name == "metrics": - resources[METRICS_PREFIX] = MetricsResource(self) + resources[METRICS_PREFIX] = MetricsResource(RegistryProxy) root_resource = create_resource_tree(resources, NoResource()) @@ -97,7 +98,7 @@ class AppserviceServer(HomeServer): elif listener["type"] == "metrics": if not self.get_config().enable_metrics: logger.warn(("Metrics listener configured, but " - "collect_metrics is not enabled!")) + "enable_metrics is not True!")) else: _base.listen_metrics(listener["bind_addresses"], listener["port"]) diff --git a/synapse/app/client_reader.py b/synapse/app/client_reader.py index 85dada7f9f..654ddb8414 100644 --- a/synapse/app/client_reader.py +++ b/synapse/app/client_reader.py @@ -122,7 +122,7 @@ class ClientReaderServer(HomeServer): elif listener["type"] == "metrics": if not self.get_config().enable_metrics: logger.warn(("Metrics listener configured, but " - "collect_metrics is not enabled!")) + "enable_metrics is not True!")) else: _base.listen_metrics(listener["bind_addresses"], listener["port"]) diff --git a/synapse/app/event_creator.py b/synapse/app/event_creator.py index 5ca77c0f1a..441467093a 100644 --- a/synapse/app/event_creator.py +++ b/synapse/app/event_creator.py @@ -138,7 +138,7 @@ class EventCreatorServer(HomeServer): elif listener["type"] == "metrics": if not self.get_config().enable_metrics: logger.warn(("Metrics listener configured, but " - "collect_metrics is not enabled!")) + "enable_metrics is not True!")) else: _base.listen_metrics(listener["bind_addresses"], listener["port"]) diff --git a/synapse/app/federation_reader.py b/synapse/app/federation_reader.py index 2a1995d0cd..b2415cc671 100644 --- a/synapse/app/federation_reader.py +++ b/synapse/app/federation_reader.py @@ -111,7 +111,7 @@ class FederationReaderServer(HomeServer): elif listener["type"] == "metrics": if not self.get_config().enable_metrics: logger.warn(("Metrics listener configured, but " - "collect_metrics is not enabled!")) + "enable_metrics is not True!")) else: _base.listen_metrics(listener["bind_addresses"], listener["port"]) diff --git a/synapse/app/federation_sender.py b/synapse/app/federation_sender.py index 81ad574043..13d2b70053 100644 --- a/synapse/app/federation_sender.py +++ b/synapse/app/federation_sender.py @@ -125,7 +125,7 @@ class FederationSenderServer(HomeServer): elif listener["type"] == "metrics": if not self.get_config().enable_metrics: logger.warn(("Metrics listener configured, but " - "collect_metrics is not enabled!")) + "enable_metrics is not True!")) else: _base.listen_metrics(listener["bind_addresses"], listener["port"]) diff --git a/synapse/app/frontend_proxy.py b/synapse/app/frontend_proxy.py index 5a164a7a95..d2bae4ad03 100644 --- a/synapse/app/frontend_proxy.py +++ b/synapse/app/frontend_proxy.py @@ -176,7 +176,7 @@ class FrontendProxyServer(HomeServer): elif listener["type"] == "metrics": if not self.get_config().enable_metrics: logger.warn(("Metrics listener configured, but " - "collect_metrics is not enabled!")) + "enable_metrics is not True!")) else: _base.listen_metrics(listener["bind_addresses"], listener["port"]) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 714f98a3e0..ae5fc751d5 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -266,7 +266,7 @@ class SynapseHomeServer(HomeServer): elif listener["type"] == "metrics": if not self.get_config().enable_metrics: logger.warn(("Metrics listener configured, but " - "collect_metrics is not enabled!")) + "enable_metrics is not True!")) else: _base.listen_metrics(listener["bind_addresses"], listener["port"]) @@ -318,11 +318,6 @@ def setup(config_options): # check any extra requirements we have now we have a config check_requirements(config) - version_string = "Synapse/" + get_version_string(synapse) - - logger.info("Server hostname: %s", config.server_name) - logger.info("Server version: %s", version_string) - events.USE_FROZEN_DICTS = config.use_frozen_dicts tls_server_context_factory = context_factory.ServerContextFactory(config) @@ -335,7 +330,7 @@ def setup(config_options): db_config=config.database_config, tls_server_context_factory=tls_server_context_factory, config=config, - version_string=version_string, + version_string="Synapse/" + get_version_string(synapse), database_engine=database_engine, ) diff --git a/synapse/app/media_repository.py b/synapse/app/media_repository.py index 006bba80a8..19a682cce3 100644 --- a/synapse/app/media_repository.py +++ b/synapse/app/media_repository.py @@ -118,7 +118,7 @@ class MediaRepositoryServer(HomeServer): elif listener["type"] == "metrics": if not self.get_config().enable_metrics: logger.warn(("Metrics listener configured, but " - "collect_metrics is not enabled!")) + "enable_metrics is not True!")) else: _base.listen_metrics(listener["bind_addresses"], listener["port"]) diff --git a/synapse/app/pusher.py b/synapse/app/pusher.py index 64df47f9cc..13cfbd08b0 100644 --- a/synapse/app/pusher.py +++ b/synapse/app/pusher.py @@ -128,7 +128,7 @@ class PusherServer(HomeServer): elif listener["type"] == "metrics": if not self.get_config().enable_metrics: logger.warn(("Metrics listener configured, but " - "collect_metrics is not enabled!")) + "enable_metrics is not True!")) else: _base.listen_metrics(listener["bind_addresses"], listener["port"]) diff --git a/synapse/app/synchrotron.py b/synapse/app/synchrotron.py index 6808d6d3e0..82f06ea185 100644 --- a/synapse/app/synchrotron.py +++ b/synapse/app/synchrotron.py @@ -305,7 +305,7 @@ class SynchrotronServer(HomeServer): elif listener["type"] == "metrics": if not self.get_config().enable_metrics: logger.warn(("Metrics listener configured, but " - "collect_metrics is not enabled!")) + "enable_metrics is not True!")) else: _base.listen_metrics(listener["bind_addresses"], listener["port"]) diff --git a/synapse/app/synctl.py b/synapse/app/synctl.py index 712dfa870e..56ae086128 100755 --- a/synapse/app/synctl.py +++ b/synapse/app/synctl.py @@ -171,6 +171,10 @@ def main(): if cache_factor: os.environ["SYNAPSE_CACHE_FACTOR"] = str(cache_factor) + cache_factors = config.get("synctl_cache_factors", {}) + for cache_name, factor in cache_factors.iteritems(): + os.environ["SYNAPSE_CACHE_FACTOR_" + cache_name.upper()] = str(factor) + worker_configfiles = [] if options.worker: start_stop_synapse = False diff --git a/synapse/app/user_dir.py b/synapse/app/user_dir.py index ada1c13cec..f5726e3df6 100644 --- a/synapse/app/user_dir.py +++ b/synapse/app/user_dir.py @@ -150,7 +150,7 @@ class UserDirectoryServer(HomeServer): elif listener["type"] == "metrics": if not self.get_config().enable_metrics: logger.warn(("Metrics listener configured, but " - "collect_metrics is not enabled!")) + "enable_metrics is not True!")) else: _base.listen_metrics(listener["bind_addresses"], listener["port"]) diff --git a/synapse/appservice/__init__.py b/synapse/appservice/__init__.py index 5fdb579723..328cbfa284 100644 --- a/synapse/appservice/__init__.py +++ b/synapse/appservice/__init__.py @@ -85,7 +85,8 @@ class ApplicationService(object): NS_LIST = [NS_USERS, NS_ALIASES, NS_ROOMS] def __init__(self, token, hostname, url=None, namespaces=None, hs_token=None, - sender=None, id=None, protocols=None, rate_limited=True): + sender=None, id=None, protocols=None, rate_limited=True, + ip_range_whitelist=None): self.token = token self.url = url self.hs_token = hs_token @@ -93,6 +94,7 @@ class ApplicationService(object): self.server_name = hostname self.namespaces = self._check_namespaces(namespaces) self.id = id + self.ip_range_whitelist = ip_range_whitelist if "|" in self.id: raise Exception("application service ID cannot contain '|' character") @@ -292,4 +294,8 @@ class ApplicationService(object): return self.rate_limited def __str__(self): - return "ApplicationService: %s" % (self.__dict__,) + # copy dictionary and redact token fields so they don't get logged + dict_copy = self.__dict__.copy() + dict_copy["token"] = "<redacted>" + dict_copy["hs_token"] = "<redacted>" + return "ApplicationService: %s" % (dict_copy,) diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py index 00efff1464..47251fb6ad 100644 --- a/synapse/appservice/api.py +++ b/synapse/appservice/api.py @@ -24,8 +24,27 @@ from synapse.types import ThirdPartyInstanceID import logging import urllib +from prometheus_client import Counter + logger = logging.getLogger(__name__) +sent_transactions_counter = Counter( + "synapse_appservice_api_sent_transactions", + "Number of /transactions/ requests sent", + ["service"] +) + +failed_transactions_counter = Counter( + "synapse_appservice_api_failed_transactions", + "Number of /transactions/ requests that failed to send", + ["service"] +) + +sent_events_counter = Counter( + "synapse_appservice_api_sent_events", + "Number of events sent to the AS", + ["service"] +) HOUR_IN_MS = 60 * 60 * 1000 @@ -219,12 +238,15 @@ class ApplicationServiceApi(SimpleHttpClient): args={ "access_token": service.hs_token }) + sent_transactions_counter.labels(service.id).inc() + sent_events_counter.labels(service.id).inc(len(events)) defer.returnValue(True) return except CodeMessageException as e: logger.warning("push_bulk to %s received %s", uri, e.code) except Exception as ex: logger.warning("push_bulk to %s threw exception %s", uri, ex) + failed_transactions_counter.labels(service.id).inc() defer.returnValue(False) def _serialize(self, events): diff --git a/synapse/config/appservice.py b/synapse/config/appservice.py index 277305e184..0c27bb2fa7 100644 --- a/synapse/config/appservice.py +++ b/synapse/config/appservice.py @@ -17,6 +17,8 @@ from ._base import Config, ConfigError from synapse.appservice import ApplicationService from synapse.types import UserID +from netaddr import IPSet + import yaml import logging @@ -154,6 +156,13 @@ def _load_appservice(hostname, as_info, config_filename): " will not receive events or queries.", config_filename, ) + + ip_range_whitelist = None + if as_info.get('ip_range_whitelist'): + ip_range_whitelist = IPSet( + as_info.get('ip_range_whitelist') + ) + return ApplicationService( token=as_info["as_token"], hostname=hostname, @@ -163,5 +172,6 @@ def _load_appservice(hostname, as_info, config_filename): sender=user_id, id=as_info["id"], protocols=protocols, - rate_limited=rate_limited + rate_limited=rate_limited, + ip_range_whitelist=ip_range_whitelist, ) diff --git a/synapse/config/consent_config.py b/synapse/config/consent_config.py index 8f6ed73328..e22c731aad 100644 --- a/synapse/config/consent_config.py +++ b/synapse/config/consent_config.py @@ -18,6 +18,9 @@ from ._base import Config DEFAULT_CONFIG = """\ # User Consent configuration # +# for detailed instructions, see +# https://github.com/matrix-org/synapse/blob/master/docs/consent_tracking.md +# # Parts of this section are required if enabling the 'consent' resource under # 'listeners', in particular 'template_dir' and 'version'. # diff --git a/synapse/config/logger.py b/synapse/config/logger.py index 6a7228dc2f..557c270fbe 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -12,17 +12,20 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -from ._base import Config -from synapse.util.logcontext import LoggingContextFilter -from twisted.logger import globalLogBeginner, STDLibLogObserver import logging import logging.config -import yaml -from string import Template import os import signal +from string import Template +import sys +from twisted.logger import STDLibLogObserver, globalLogBeginner +import yaml + +import synapse +from synapse.util.logcontext import LoggingContextFilter +from synapse.util.versionstring import get_version_string +from ._base import Config DEFAULT_LOG_CONFIG = Template(""" version: 1 @@ -202,6 +205,15 @@ def setup_logging(config, use_worker_options=False): if getattr(signal, "SIGHUP"): signal.signal(signal.SIGHUP, sighup) + # make sure that the first thing we log is a thing we can grep backwards + # for + logging.warn("***** STARTING SERVER *****") + logging.warn( + "Server %s version %s", + sys.argv[0], get_version_string(synapse), + ) + logging.info("Server hostname: %s", config.server_name) + # It's critical to point twisted's internal logging somewhere, otherwise it # stacks up and leaks kup to 64K object; # see: https://twistedmatrix.com/trac/ticket/8164 diff --git a/synapse/config/server.py b/synapse/config/server.py index 968ecd9ea0..71fd51e4bc 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -16,6 +16,7 @@ import logging +from synapse.http.endpoint import parse_and_validate_server_name from ._base import Config, ConfigError logger = logging.Logger(__name__) @@ -25,6 +26,12 @@ class ServerConfig(Config): def read_config(self, config): self.server_name = config["server_name"] + + try: + parse_and_validate_server_name(self.server_name) + except ValueError as e: + raise ConfigError(str(e)) + self.pid_file = self.abspath(config.get("pid_file")) self.web_client = config["web_client"] self.web_client_location = config.get("web_client_location", None) @@ -162,8 +169,8 @@ class ServerConfig(Config): }) def default_config(self, server_name, **kwargs): - if ":" in server_name: - bind_port = int(server_name.split(":")[1]) + _, bind_port = parse_and_validate_server_name(server_name) + if bind_port is not None: unsecure_port = bind_port - 400 else: bind_port = 8448 diff --git a/synapse/crypto/keyclient.py b/synapse/crypto/keyclient.py index f1fd488b90..2a0eddbea1 100644 --- a/synapse/crypto/keyclient.py +++ b/synapse/crypto/keyclient.py @@ -18,7 +18,7 @@ from twisted.web.http import HTTPClient from twisted.internet.protocol import Factory from twisted.internet import defer, reactor from synapse.http.endpoint import matrix_federation_endpoint -import simplejson as json +from canonicaljson import json import logging diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py index 22ee0fc93f..9b17ef0a08 100644 --- a/synapse/crypto/keyring.py +++ b/synapse/crypto/keyring.py @@ -27,10 +27,12 @@ from synapse.util.metrics import Measure from twisted.internet import defer from signedjson.sign import ( - verify_signed_json, signature_ids, sign_json, encode_canonical_json + verify_signed_json, signature_ids, sign_json, encode_canonical_json, + SignatureVerifyException, ) from signedjson.key import ( - is_signing_algorithm_supported, decode_verify_key_bytes + is_signing_algorithm_supported, decode_verify_key_bytes, + encode_verify_key_base64, ) from unpaddedbase64 import decode_base64, encode_base64 @@ -56,7 +58,7 @@ Attributes: key_ids(set(str)): The set of key_ids to that could be used to verify the JSON object json_object(dict): The JSON object to verify. - deferred(twisted.internet.defer.Deferred): + deferred(Deferred[str, str, nacl.signing.VerifyKey]): A deferred (server_name, key_id, verify_key) tuple that resolves when a verify key has been fetched. The deferreds' callbacks are run with no logcontext. @@ -736,6 +738,17 @@ class Keyring(object): @defer.inlineCallbacks def _handle_key_deferred(verify_request): + """Waits for the key to become available, and then performs a verification + + Args: + verify_request (VerifyKeyRequest): + + Returns: + Deferred[None] + + Raises: + SynapseError if there was a problem performing the verification + """ server_name = verify_request.server_name try: with PreserveLoggingContext(): @@ -768,11 +781,17 @@ def _handle_key_deferred(verify_request): )) try: verify_signed_json(json_object, server_name, verify_key) - except Exception: + except SignatureVerifyException as e: + logger.debug( + "Error verifying signature for %s:%s:%s with key %s: %s", + server_name, verify_key.alg, verify_key.version, + encode_verify_key_base64(verify_key), + str(e), + ) raise SynapseError( 401, - "Invalid signature for server %s with key %s:%s" % ( - server_name, verify_key.alg, verify_key.version + "Invalid signature for server %s with key %s:%s: %s" % ( + server_name, verify_key.alg, verify_key.version, str(e), ), Codes.UNAUTHORIZED, ) diff --git a/synapse/event_auth.py b/synapse/event_auth.py index f512d88145..cdf99fd140 100644 --- a/synapse/event_auth.py +++ b/synapse/event_auth.py @@ -76,6 +76,7 @@ def check(event, auth_events, do_sig_check=True, do_size_check=True): return if event.type == EventTypes.Create: + sender_domain = get_domain_from_id(event.sender) room_id_domain = get_domain_from_id(event.room_id) if room_id_domain != sender_domain: raise AuthError( @@ -524,7 +525,11 @@ def _check_power_levels(event, auth_events): "to your own" ) - if old_level > user_level or new_level > user_level: + # Check if the old and new levels are greater than the user level + # (if defined) + old_level_too_big = old_level is not None and old_level > user_level + new_level_too_big = new_level is not None and new_level > user_level + if old_level_too_big or new_level_too_big: raise AuthError( 403, "You don't have permission to add ops level greater " diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 2d420a58a2..591d0026bf 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -14,10 +14,14 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging +import re -import simplejson as json +from canonicaljson import json +import six from twisted.internet import defer +from twisted.internet.abstract import isIPAddress +from synapse.api.constants import EventTypes from synapse.api.errors import AuthError, FederationError, SynapseError, NotFoundError from synapse.crypto.event_signing import compute_event_signature from synapse.federation.federation_base import ( @@ -27,6 +31,7 @@ from synapse.federation.federation_base import ( from synapse.federation.persistence import TransactionActions from synapse.federation.units import Edu, Transaction +from synapse.http.endpoint import parse_server_name from synapse.types import get_domain_from_id from synapse.util import async from synapse.util.caches.response_cache import ResponseCache @@ -74,6 +79,9 @@ class FederationServer(FederationBase): @log_function def on_backfill_request(self, origin, room_id, versions, limit): with (yield self._server_linearizer.queue((origin, room_id))): + origin_host, _ = parse_server_name(origin) + yield self.check_server_matches_acl(origin_host, room_id) + pdus = yield self.handler.on_backfill_request( origin, room_id, versions, limit ) @@ -134,6 +142,8 @@ class FederationServer(FederationBase): received_pdus_counter.inc(len(transaction.pdus)) + origin_host, _ = parse_server_name(transaction.origin) + pdus_by_room = {} for p in transaction.pdus: @@ -154,9 +164,21 @@ class FederationServer(FederationBase): # we can process different rooms in parallel (which is useful if they # require callouts to other servers to fetch missing events), but # impose a limit to avoid going too crazy with ram/cpu. + @defer.inlineCallbacks def process_pdus_for_room(room_id): logger.debug("Processing PDUs for %s", room_id) + try: + yield self.check_server_matches_acl(origin_host, room_id) + except AuthError as e: + logger.warn( + "Ignoring PDUs for room %s from banned server", room_id, + ) + for pdu in pdus_by_room[room_id]: + event_id = pdu.event_id + pdu_results[event_id] = e.error_dict() + return + for pdu in pdus_by_room[room_id]: event_id = pdu.event_id try: @@ -211,6 +233,9 @@ class FederationServer(FederationBase): if not event_id: raise NotImplementedError("Specify an event") + origin_host, _ = parse_server_name(origin) + yield self.check_server_matches_acl(origin_host, room_id) + in_room = yield self.auth.check_host_in_room(room_id, origin) if not in_room: raise AuthError(403, "Host not in room.") @@ -234,6 +259,9 @@ class FederationServer(FederationBase): if not event_id: raise NotImplementedError("Specify an event") + origin_host, _ = parse_server_name(origin) + yield self.check_server_matches_acl(origin_host, room_id) + in_room = yield self.auth.check_host_in_room(room_id, origin) if not in_room: raise AuthError(403, "Host not in room.") @@ -277,7 +305,7 @@ class FederationServer(FederationBase): @defer.inlineCallbacks @log_function def on_pdu_request(self, origin, event_id): - pdu = yield self._get_persisted_pdu(origin, event_id) + pdu = yield self.handler.get_persisted_pdu(origin, event_id) if pdu: defer.returnValue( @@ -298,7 +326,9 @@ class FederationServer(FederationBase): defer.returnValue((200, resp)) @defer.inlineCallbacks - def on_make_join_request(self, room_id, user_id): + def on_make_join_request(self, origin, room_id, user_id): + origin_host, _ = parse_server_name(origin) + yield self.check_server_matches_acl(origin_host, room_id) pdu = yield self.handler.on_make_join_request(room_id, user_id) time_now = self._clock.time_msec() defer.returnValue({"event": pdu.get_pdu_json(time_now)}) @@ -306,6 +336,8 @@ class FederationServer(FederationBase): @defer.inlineCallbacks def on_invite_request(self, origin, content): pdu = event_from_pdu_json(content) + origin_host, _ = parse_server_name(origin) + yield self.check_server_matches_acl(origin_host, pdu.room_id) ret_pdu = yield self.handler.on_invite_request(origin, pdu) time_now = self._clock.time_msec() defer.returnValue((200, {"event": ret_pdu.get_pdu_json(time_now)})) @@ -314,6 +346,10 @@ class FederationServer(FederationBase): def on_send_join_request(self, origin, content): logger.debug("on_send_join_request: content: %s", content) pdu = event_from_pdu_json(content) + + origin_host, _ = parse_server_name(origin) + yield self.check_server_matches_acl(origin_host, pdu.room_id) + logger.debug("on_send_join_request: pdu sigs: %s", pdu.signatures) res_pdus = yield self.handler.on_send_join_request(origin, pdu) time_now = self._clock.time_msec() @@ -325,7 +361,9 @@ class FederationServer(FederationBase): })) @defer.inlineCallbacks - def on_make_leave_request(self, room_id, user_id): + def on_make_leave_request(self, origin, room_id, user_id): + origin_host, _ = parse_server_name(origin) + yield self.check_server_matches_acl(origin_host, room_id) pdu = yield self.handler.on_make_leave_request(room_id, user_id) time_now = self._clock.time_msec() defer.returnValue({"event": pdu.get_pdu_json(time_now)}) @@ -334,6 +372,10 @@ class FederationServer(FederationBase): def on_send_leave_request(self, origin, content): logger.debug("on_send_leave_request: content: %s", content) pdu = event_from_pdu_json(content) + + origin_host, _ = parse_server_name(origin) + yield self.check_server_matches_acl(origin_host, pdu.room_id) + logger.debug("on_send_leave_request: pdu sigs: %s", pdu.signatures) yield self.handler.on_send_leave_request(origin, pdu) defer.returnValue((200, {})) @@ -341,6 +383,9 @@ class FederationServer(FederationBase): @defer.inlineCallbacks def on_event_auth(self, origin, room_id, event_id): with (yield self._server_linearizer.queue((origin, room_id))): + origin_host, _ = parse_server_name(origin) + yield self.check_server_matches_acl(origin_host, room_id) + time_now = self._clock.time_msec() auth_pdus = yield self.handler.on_event_auth(event_id) res = { @@ -369,6 +414,9 @@ class FederationServer(FederationBase): Deferred: Results in `dict` with the same format as `content` """ with (yield self._server_linearizer.queue((origin, room_id))): + origin_host, _ = parse_server_name(origin) + yield self.check_server_matches_acl(origin_host, room_id) + auth_chain = [ event_from_pdu_json(e) for e in content["auth_chain"] @@ -442,6 +490,9 @@ class FederationServer(FederationBase): def on_get_missing_events(self, origin, room_id, earliest_events, latest_events, limit, min_depth): with (yield self._server_linearizer.queue((origin, room_id))): + origin_host, _ = parse_server_name(origin) + yield self.check_server_matches_acl(origin_host, room_id) + logger.info( "on_get_missing_events: earliest_events: %r, latest_events: %r," " limit: %d, min_depth: %d", @@ -470,17 +521,6 @@ class FederationServer(FederationBase): ts_now_ms = self._clock.time_msec() return self.store.get_user_id_for_open_id_token(token, ts_now_ms) - @log_function - def _get_persisted_pdu(self, origin, event_id, do_auth=True): - """ Get a PDU from the database with given origin and id. - - Returns: - Deferred: Results in a `Pdu`. - """ - return self.handler.get_persisted_pdu( - origin, event_id, do_auth=do_auth - ) - def _transaction_from_pdus(self, pdu_list): """Returns a new Transaction containing the given PDUs suitable for transmission. @@ -560,7 +600,9 @@ class FederationServer(FederationBase): affected=pdu.event_id, ) - yield self.handler.on_receive_pdu(origin, pdu, get_missing=True) + yield self.handler.on_receive_pdu( + origin, pdu, get_missing=True, sent_to_us_directly=True, + ) def __str__(self): return "<ReplicationLayer(%s)>" % self.server_name @@ -588,6 +630,101 @@ class FederationServer(FederationBase): ) defer.returnValue(ret) + @defer.inlineCallbacks + def check_server_matches_acl(self, server_name, room_id): + """Check if the given server is allowed by the server ACLs in the room + + Args: + server_name (str): name of server, *without any port part* + room_id (str): ID of the room to check + + Raises: + AuthError if the server does not match the ACL + """ + state_ids = yield self.store.get_current_state_ids(room_id) + acl_event_id = state_ids.get((EventTypes.ServerACL, "")) + + if not acl_event_id: + return + + acl_event = yield self.store.get_event(acl_event_id) + if server_matches_acl_event(server_name, acl_event): + return + + raise AuthError(code=403, msg="Server is banned from room") + + +def server_matches_acl_event(server_name, acl_event): + """Check if the given server is allowed by the ACL event + + Args: + server_name (str): name of server, without any port part + acl_event (EventBase): m.room.server_acl event + + Returns: + bool: True if this server is allowed by the ACLs + """ + logger.debug("Checking %s against acl %s", server_name, acl_event.content) + + # first of all, check if literal IPs are blocked, and if so, whether the + # server name is a literal IP + allow_ip_literals = acl_event.content.get("allow_ip_literals", True) + if not isinstance(allow_ip_literals, bool): + logger.warn("Ignorning non-bool allow_ip_literals flag") + allow_ip_literals = True + if not allow_ip_literals: + # check for ipv6 literals. These start with '['. + if server_name[0] == '[': + return False + + # check for ipv4 literals. We can just lift the routine from twisted. + if isIPAddress(server_name): + return False + + # next, check the deny list + deny = acl_event.content.get("deny", []) + if not isinstance(deny, (list, tuple)): + logger.warn("Ignorning non-list deny ACL %s", deny) + deny = [] + for e in deny: + if _acl_entry_matches(server_name, e): + # logger.info("%s matched deny rule %s", server_name, e) + return False + + # then the allow list. + allow = acl_event.content.get("allow", []) + if not isinstance(allow, (list, tuple)): + logger.warn("Ignorning non-list allow ACL %s", allow) + allow = [] + for e in allow: + if _acl_entry_matches(server_name, e): + # logger.info("%s matched allow rule %s", server_name, e) + return True + + # everything else should be rejected. + # logger.info("%s fell through", server_name) + return False + + +def _acl_entry_matches(server_name, acl_entry): + if not isinstance(acl_entry, six.string_types): + logger.warn("Ignoring non-str ACL entry '%s' (is %s)", acl_entry, type(acl_entry)) + return False + regex = _glob_to_regex(acl_entry) + return regex.match(server_name) + + +def _glob_to_regex(glob): + res = '' + for c in glob: + if c == '*': + res = res + '.*' + elif c == '?': + res = res + '.' + else: + res = res + re.escape(c) + return re.compile(res + "\\Z", re.IGNORECASE) + class FederationHandlerRegistry(object): """Allows classes to register themselves as handlers for a given EDU or diff --git a/synapse/federation/send_queue.py b/synapse/federation/send_queue.py index 9f1142b5a9..1d5c0f3797 100644 --- a/synapse/federation/send_queue.py +++ b/synapse/federation/send_queue.py @@ -35,7 +35,7 @@ from synapse.storage.presence import UserPresenceState from synapse.util.metrics import Measure from synapse.metrics import LaterGauge -from blist import sorteddict +from sortedcontainers import SortedDict from collections import namedtuple import logging @@ -55,19 +55,19 @@ class FederationRemoteSendQueue(object): self.is_mine_id = hs.is_mine_id self.presence_map = {} # Pending presence map user_id -> UserPresenceState - self.presence_changed = sorteddict() # Stream position -> user_id + self.presence_changed = SortedDict() # Stream position -> user_id self.keyed_edu = {} # (destination, key) -> EDU - self.keyed_edu_changed = sorteddict() # stream position -> (destination, key) + self.keyed_edu_changed = SortedDict() # stream position -> (destination, key) - self.edus = sorteddict() # stream position -> Edu + self.edus = SortedDict() # stream position -> Edu - self.failures = sorteddict() # stream position -> (destination, Failure) + self.failures = SortedDict() # stream position -> (destination, Failure) - self.device_messages = sorteddict() # stream position -> destination + self.device_messages = SortedDict() # stream position -> destination self.pos = 1 - self.pos_time = sorteddict() + self.pos_time = SortedDict() # EVERYTHING IS SAD. In particular, python only makes new scopes when # we make a new function, so we need to make a new function so the inner @@ -98,7 +98,7 @@ class FederationRemoteSendQueue(object): now = self.clock.time_msec() keys = self.pos_time.keys() - time = keys.bisect_left(now - FIVE_MINUTES_AGO) + time = self.pos_time.bisect_left(now - FIVE_MINUTES_AGO) if not keys[:time]: return @@ -113,7 +113,7 @@ class FederationRemoteSendQueue(object): with Measure(self.clock, "send_queue._clear"): # Delete things out of presence maps keys = self.presence_changed.keys() - i = keys.bisect_left(position_to_delete) + i = self.presence_changed.bisect_left(position_to_delete) for key in keys[:i]: del self.presence_changed[key] @@ -131,7 +131,7 @@ class FederationRemoteSendQueue(object): # Delete things out of keyed edus keys = self.keyed_edu_changed.keys() - i = keys.bisect_left(position_to_delete) + i = self.keyed_edu_changed.bisect_left(position_to_delete) for key in keys[:i]: del self.keyed_edu_changed[key] @@ -145,19 +145,19 @@ class FederationRemoteSendQueue(object): # Delete things out of edu map keys = self.edus.keys() - i = keys.bisect_left(position_to_delete) + i = self.edus.bisect_left(position_to_delete) for key in keys[:i]: del self.edus[key] # Delete things out of failure map keys = self.failures.keys() - i = keys.bisect_left(position_to_delete) + i = self.failures.bisect_left(position_to_delete) for key in keys[:i]: del self.failures[key] # Delete things out of device map keys = self.device_messages.keys() - i = keys.bisect_left(position_to_delete) + i = self.device_messages.bisect_left(position_to_delete) for key in keys[:i]: del self.device_messages[key] @@ -250,13 +250,12 @@ class FederationRemoteSendQueue(object): self._clear_queue_before_pos(federation_ack) # Fetch changed presence - keys = self.presence_changed.keys() - i = keys.bisect_right(from_token) - j = keys.bisect_right(to_token) + 1 + i = self.presence_changed.bisect_right(from_token) + j = self.presence_changed.bisect_right(to_token) + 1 dest_user_ids = [ (pos, user_id) - for pos in keys[i:j] - for user_id in self.presence_changed[pos] + for pos, user_id_list in self.presence_changed.items()[i:j] + for user_id in user_id_list ] for (key, user_id) in dest_user_ids: @@ -265,13 +264,12 @@ class FederationRemoteSendQueue(object): ))) # Fetch changes keyed edus - keys = self.keyed_edu_changed.keys() - i = keys.bisect_right(from_token) - j = keys.bisect_right(to_token) + 1 + i = self.keyed_edu_changed.bisect_right(from_token) + j = self.keyed_edu_changed.bisect_right(to_token) + 1 # We purposefully clobber based on the key here, python dict comprehensions # always use the last value, so this will correctly point to the last # stream position. - keyed_edus = {self.keyed_edu_changed[k]: k for k in keys[i:j]} + keyed_edus = {v: k for k, v in self.keyed_edu_changed.items()[i:j]} for ((destination, edu_key), pos) in iteritems(keyed_edus): rows.append((pos, KeyedEduRow( @@ -280,19 +278,17 @@ class FederationRemoteSendQueue(object): ))) # Fetch changed edus - keys = self.edus.keys() - i = keys.bisect_right(from_token) - j = keys.bisect_right(to_token) + 1 - edus = ((k, self.edus[k]) for k in keys[i:j]) + i = self.edus.bisect_right(from_token) + j = self.edus.bisect_right(to_token) + 1 + edus = self.edus.items()[i:j] for (pos, edu) in edus: rows.append((pos, EduRow(edu))) # Fetch changed failures - keys = self.failures.keys() - i = keys.bisect_right(from_token) - j = keys.bisect_right(to_token) + 1 - failures = ((k, self.failures[k]) for k in keys[i:j]) + i = self.failures.bisect_right(from_token) + j = self.failures.bisect_right(to_token) + 1 + failures = self.failures.items()[i:j] for (pos, (destination, failure)) in failures: rows.append((pos, FailureRow( @@ -301,10 +297,9 @@ class FederationRemoteSendQueue(object): ))) # Fetch changed device messages - keys = self.device_messages.keys() - i = keys.bisect_right(from_token) - j = keys.bisect_right(to_token) + 1 - device_messages = {self.device_messages[k]: k for k in keys[i:j]} + i = self.device_messages.bisect_right(from_token) + j = self.device_messages.bisect_right(to_token) + 1 + device_messages = {v: k for k, v in self.device_messages.items()[i:j]} for (destination, pos) in iteritems(device_messages): rows.append((pos, DeviceRow( diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index f0aeb5a0d3..d72b057e28 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -21,7 +21,6 @@ from .units import Transaction, Edu from synapse.api.errors import HttpResponseException, FederationDeniedError from synapse.util import logcontext, PreserveLoggingContext -from synapse.util.async import run_on_reactor from synapse.util.retryutils import NotRetryingDestination, get_retry_limiter from synapse.util.metrics import measure_func from synapse.handlers.presence import format_user_presence_state, get_interested_remotes @@ -42,8 +41,11 @@ import logging logger = logging.getLogger(__name__) -sent_pdus_destination_dist = Counter( - "synapse_federation_transaction_queue_sent_pdu_destinations", "" +sent_pdus_destination_dist_count = Counter( + "synapse_federation_client_sent_pdu_destinations:count", "" +) +sent_pdus_destination_dist_total = Counter( + "synapse_federation_client_sent_pdu_destinations:total", "" ) @@ -280,7 +282,8 @@ class TransactionQueue(object): if not destinations: return - sent_pdus_destination_dist.inc(len(destinations)) + sent_pdus_destination_dist_total.inc(len(destinations)) + sent_pdus_destination_dist_count.inc() for destination in destinations: self.pending_pdus_by_dest.setdefault(destination, []).append( @@ -451,9 +454,6 @@ class TransactionQueue(object): # hence why we throw the result away. yield get_retry_limiter(destination, self.clock, self.store) - # XXX: what's this for? - yield run_on_reactor() - pending_pdus = [] while True: device_message_edus, device_stream_id, dev_list_id = ( diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 19d09f5422..c6d98d35cb 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -18,6 +18,7 @@ from twisted.internet import defer from synapse.api.urls import FEDERATION_PREFIX as PREFIX from synapse.api.errors import Codes, SynapseError, FederationDeniedError +from synapse.http.endpoint import parse_and_validate_server_name from synapse.http.server import JsonResource from synapse.http.servlet import ( parse_json_object_from_request, parse_integer_from_args, parse_string_from_args, @@ -99,26 +100,6 @@ class Authenticator(object): origin = None - def parse_auth_header(header_str): - try: - params = auth.split(" ")[1].split(",") - param_dict = dict(kv.split("=") for kv in params) - - def strip_quotes(value): - if value.startswith("\""): - return value[1:-1] - else: - return value - - origin = strip_quotes(param_dict["origin"]) - key = strip_quotes(param_dict["key"]) - sig = strip_quotes(param_dict["sig"]) - return (origin, key, sig) - except Exception: - raise AuthenticationError( - 400, "Malformed Authorization header", Codes.UNAUTHORIZED - ) - auth_headers = request.requestHeaders.getRawHeaders(b"Authorization") if not auth_headers: @@ -127,8 +108,8 @@ class Authenticator(object): ) for auth in auth_headers: - if auth.startswith("X-Matrix"): - (origin, key, sig) = parse_auth_header(auth) + if auth.startswith(b"X-Matrix"): + (origin, key, sig) = _parse_auth_header(auth) json_request["origin"] = origin json_request["signatures"].setdefault(origin, {})[key] = sig @@ -165,6 +146,48 @@ class Authenticator(object): logger.exception("Error resetting retry timings on %s", origin) +def _parse_auth_header(header_bytes): + """Parse an X-Matrix auth header + + Args: + header_bytes (bytes): header value + + Returns: + Tuple[str, str, str]: origin, key id, signature. + + Raises: + AuthenticationError if the header could not be parsed + """ + try: + header_str = header_bytes.decode('utf-8') + params = header_str.split(" ")[1].split(",") + param_dict = dict(kv.split("=") for kv in params) + + def strip_quotes(value): + if value.startswith(b"\""): + return value[1:-1] + else: + return value + + origin = strip_quotes(param_dict["origin"]) + + # ensure that the origin is a valid server name + parse_and_validate_server_name(origin) + + key = strip_quotes(param_dict["key"]) + sig = strip_quotes(param_dict["sig"]) + return origin, key, sig + except Exception as e: + logger.warn( + "Error parsing auth header '%s': %s", + header_bytes.decode('ascii', 'replace'), + e, + ) + raise AuthenticationError( + 400, "Malformed Authorization header", Codes.UNAUTHORIZED, + ) + + class BaseFederationServlet(object): REQUIRE_AUTH = True @@ -362,7 +385,9 @@ class FederationMakeJoinServlet(BaseFederationServlet): @defer.inlineCallbacks def on_GET(self, origin, content, query, context, user_id): - content = yield self.handler.on_make_join_request(context, user_id) + content = yield self.handler.on_make_join_request( + origin, context, user_id, + ) defer.returnValue((200, content)) @@ -371,7 +396,9 @@ class FederationMakeLeaveServlet(BaseFederationServlet): @defer.inlineCallbacks def on_GET(self, origin, content, query, context, user_id): - content = yield self.handler.on_make_leave_request(context, user_id) + content = yield self.handler.on_make_leave_request( + origin, context, user_id, + ) defer.returnValue((200, content)) diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 3c0051586d..cbef1f2770 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -13,8 +13,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + from twisted.internet import defer, threads +from canonicaljson import json + from ._base import BaseHandler from synapse.api.constants import LoginType from synapse.api.errors import ( @@ -23,7 +26,6 @@ from synapse.api.errors import ( ) from synapse.module_api import ModuleApi from synapse.types import UserID -from synapse.util.async import run_on_reactor from synapse.util.caches.expiringcache import ExpiringCache from synapse.util.logcontext import make_deferred_yieldable @@ -32,7 +34,7 @@ from twisted.web.client import PartialDownloadError import logging import bcrypt import pymacaroons -import simplejson +import attr import synapse.util.stringutils as stringutils @@ -402,7 +404,7 @@ class AuthHandler(BaseHandler): except PartialDownloadError as pde: # Twisted is silly data = pde.response - resp_body = simplejson.loads(data) + resp_body = json.loads(data) if 'success' in resp_body: # Note that we do NOT check the hostname here: we explicitly @@ -423,15 +425,11 @@ class AuthHandler(BaseHandler): def _check_msisdn(self, authdict, _): return self._check_threepid('msisdn', authdict) - @defer.inlineCallbacks def _check_dummy_auth(self, authdict, _): - yield run_on_reactor() - defer.returnValue(True) + return defer.succeed(True) @defer.inlineCallbacks def _check_threepid(self, medium, authdict): - yield run_on_reactor() - if 'threepid_creds' not in authdict: raise LoginError(400, "Missing threepid_creds", Codes.MISSING_PARAM) @@ -825,6 +823,15 @@ class AuthHandler(BaseHandler): if medium == 'email': address = address.lower() + identity_handler = self.hs.get_handlers().identity_handler + yield identity_handler.unbind_threepid( + user_id, + { + 'medium': medium, + 'address': address, + }, + ) + ret = yield self.store.user_delete_threepid( user_id, medium, address, ) @@ -849,7 +856,11 @@ class AuthHandler(BaseHandler): return bcrypt.hashpw(password.encode('utf8') + self.hs.config.password_pepper, bcrypt.gensalt(self.bcrypt_rounds)) - return make_deferred_yieldable(threads.deferToThread(_do_hash)) + return make_deferred_yieldable( + threads.deferToThreadPool( + self.hs.get_reactor(), self.hs.get_reactor().getThreadPool(), _do_hash + ), + ) def validate_hash(self, password, stored_hash): """Validates that self.hash(password) == stored_hash. @@ -869,16 +880,21 @@ class AuthHandler(BaseHandler): ) if stored_hash: - return make_deferred_yieldable(threads.deferToThread(_do_validate_hash)) + return make_deferred_yieldable( + threads.deferToThreadPool( + self.hs.get_reactor(), + self.hs.get_reactor().getThreadPool(), + _do_validate_hash, + ), + ) else: return defer.succeed(False) -class MacaroonGeneartor(object): - def __init__(self, hs): - self.clock = hs.get_clock() - self.server_name = hs.config.server_name - self.macaroon_secret_key = hs.config.macaroon_secret_key +@attr.s +class MacaroonGenerator(object): + + hs = attr.ib() def generate_access_token(self, user_id, extra_caveats=None): extra_caveats = extra_caveats or [] @@ -896,7 +912,7 @@ class MacaroonGeneartor(object): def generate_short_term_login_token(self, user_id, duration_in_ms=(2 * 60 * 1000)): macaroon = self._generate_base_macaroon(user_id) macaroon.add_first_party_caveat("type = login") - now = self.clock.time_msec() + now = self.hs.get_clock().time_msec() expiry = now + duration_in_ms macaroon.add_first_party_caveat("time < %d" % (expiry,)) return macaroon.serialize() @@ -908,9 +924,9 @@ class MacaroonGeneartor(object): def _generate_base_macaroon(self, user_id): macaroon = pymacaroons.Macaroon( - location=self.server_name, + location=self.hs.config.server_name, identifier="key", - key=self.macaroon_secret_key) + key=self.hs.config.macaroon_secret_key) macaroon.add_first_party_caveat("gen = 1") macaroon.add_first_party_caveat("user_id = %s" % (user_id,)) return macaroon diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py index c5e92f6214..a84b7b8b80 100644 --- a/synapse/handlers/deactivate_account.py +++ b/synapse/handlers/deactivate_account.py @@ -12,11 +12,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from twisted.internet import defer, reactor +from twisted.internet import defer from ._base import BaseHandler from synapse.types import UserID, create_requester from synapse.util.logcontext import run_in_background +from synapse.api.errors import SynapseError import logging @@ -30,6 +31,7 @@ class DeactivateAccountHandler(BaseHandler): self._auth_handler = hs.get_auth_handler() self._device_handler = hs.get_device_handler() self._room_member_handler = hs.get_room_member_handler() + self._identity_handler = hs.get_handlers().identity_handler self.user_directory_handler = hs.get_user_directory_handler() # Flag that indicates whether the process to part users from rooms is running @@ -37,14 +39,15 @@ class DeactivateAccountHandler(BaseHandler): # Start the user parter loop so it can resume parting users from rooms where # it left off (if it has work left to do). - reactor.callWhenRunning(self._start_user_parting) + hs.get_reactor().callWhenRunning(self._start_user_parting) @defer.inlineCallbacks - def deactivate_account(self, user_id): + def deactivate_account(self, user_id, erase_data): """Deactivate a user's account Args: user_id (str): ID of user to be deactivated + erase_data (bool): whether to GDPR-erase the user's data Returns: Deferred @@ -52,14 +55,35 @@ class DeactivateAccountHandler(BaseHandler): # FIXME: Theoretically there is a race here wherein user resets # password using threepid. - # first delete any devices belonging to the user, which will also + # delete threepids first. We remove these from the IS so if this fails, + # leave the user still active so they can try again. + # Ideally we would prevent password resets and then do this in the + # background thread. + threepids = yield self.store.user_get_threepids(user_id) + for threepid in threepids: + try: + yield self._identity_handler.unbind_threepid( + user_id, + { + 'medium': threepid['medium'], + 'address': threepid['address'], + }, + ) + except Exception: + # Do we want this to be a fatal error or should we carry on? + logger.exception("Failed to remove threepid from ID server") + raise SynapseError(400, "Failed to remove threepid from ID server") + yield self.store.user_delete_threepid( + user_id, threepid['medium'], threepid['address'], + ) + + # delete any devices belonging to the user, which will also # delete corresponding access tokens. yield self._device_handler.delete_all_devices_for_user(user_id) # then delete any remaining access tokens which weren't associated with # a device. yield self._auth_handler.delete_access_tokens_for_user(user_id) - yield self.store.user_delete_threepids(user_id) yield self.store.user_set_password_hash(user_id, None) # Add the user to a table of users pending deactivation (ie. @@ -69,6 +93,11 @@ class DeactivateAccountHandler(BaseHandler): # delete from user directory yield self.user_directory_handler.handle_user_deactivated(user_id) + # Mark the user as erased, if they asked for that + if erase_data: + logger.info("Marking %s as erased", user_id) + yield self.store.mark_user_erased(user_id) + # Now start the process that goes through that list and # parts users from rooms (if it isn't already running) self._start_user_parting() diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 4d7bf5defd..62b4892a4e 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -14,10 +14,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -import simplejson as json import logging -from canonicaljson import encode_canonical_json +from canonicaljson import encode_canonical_json, json from twisted.internet import defer from six import iteritems @@ -357,7 +356,7 @@ def _exception_to_failure(e): # include ConnectionRefused and other errors # # Note that some Exceptions (notably twisted's ResponseFailed etc) don't - # give a string for e.message, which simplejson then fails to serialize. + # give a string for e.message, which json then fails to serialize. return { "status": 503, "message": str(e.message), } diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 495ac4c648..13117d70fe 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -39,11 +39,12 @@ from synapse.events.validator import EventValidator from synapse.util import unwrapFirstError, logcontext from synapse.util.metrics import measure_func from synapse.util.logutils import log_function -from synapse.util.async import run_on_reactor, Linearizer +from synapse.util.async import Linearizer from synapse.util.frozenutils import unfreeze from synapse.crypto.event_signing import ( compute_event_signature, add_hashes_and_signatures, ) +from synapse.state import resolve_events_with_factory from synapse.types import UserID, get_domain_from_id from synapse.events.utils import prune_event @@ -89,7 +90,9 @@ class FederationHandler(BaseHandler): @defer.inlineCallbacks @log_function - def on_receive_pdu(self, origin, pdu, get_missing=True): + def on_receive_pdu( + self, origin, pdu, get_missing=True, sent_to_us_directly=False, + ): """ Process a PDU received via a federation /send/ transaction, or via backfill of missing prev_events @@ -103,8 +106,10 @@ class FederationHandler(BaseHandler): """ # We reprocess pdus when we have seen them only as outliers - existing = yield self.get_persisted_pdu( - origin, pdu.event_id, do_auth=False + existing = yield self.store.get_event( + pdu.event_id, + allow_none=True, + allow_rejected=True, ) # FIXME: Currently we fetch an event again when we already have it @@ -161,14 +166,11 @@ class FederationHandler(BaseHandler): "Ignoring PDU %s for room %s from %s as we've left the room!", pdu.event_id, pdu.room_id, origin, ) - return + defer.returnValue(None) state = None - auth_chain = [] - fetch_state = False - # Get missing pdus if necessary. if not pdu.internal_metadata.is_outlier(): # We only backfill backwards to the min depth. @@ -223,26 +225,60 @@ class FederationHandler(BaseHandler): list(prevs - seen)[:5], ) - if prevs - seen: - logger.info( - "Still missing %d events for room %r: %r...", - len(prevs - seen), pdu.room_id, list(prevs - seen)[:5] + if sent_to_us_directly and prevs - seen: + # If they have sent it to us directly, and the server + # isn't telling us about the auth events that it's + # made a message referencing, we explode + raise FederationError( + "ERROR", + 403, + ( + "Your server isn't divulging details about prev_events " + "referenced in this event." + ), + affected=pdu.event_id, ) - fetch_state = True + elif prevs - seen: + # Calculate the state of the previous events, and + # de-conflict them to find the current state. + state_groups = [] + auth_chains = set() + try: + # Get the state of the events we know about + ours = yield self.store.get_state_groups(pdu.room_id, list(seen)) + state_groups.append(ours) - if fetch_state: - # We need to get the state at this event, since we haven't - # processed all the prev events. - logger.debug( - "_handle_new_pdu getting state for %s", - pdu.room_id - ) - try: - state, auth_chain = yield self.replication_layer.get_state_for_room( - origin, pdu.room_id, pdu.event_id, - ) - except Exception: - logger.exception("Failed to get state for event: %s", pdu.event_id) + # Ask the remote server for the states we don't + # know about + for p in prevs - seen: + state, got_auth_chain = ( + yield self.replication_layer.get_state_for_room( + origin, pdu.room_id, p + ) + ) + auth_chains.update(got_auth_chain) + state_group = {(x.type, x.state_key): x.event_id for x in state} + state_groups.append(state_group) + + # Resolve any conflicting state + def fetch(ev_ids): + return self.store.get_events( + ev_ids, get_prev_content=False, check_redacted=False + ) + + state_map = yield resolve_events_with_factory( + state_groups, {pdu.event_id: pdu}, fetch + ) + + state = (yield self.store.get_events(state_map.values())).values() + auth_chain = list(auth_chains) + except Exception: + raise FederationError( + "ERROR", + 403, + "We can't get valid state history.", + affected=pdu.event_id, + ) yield self._process_received_pdu( origin, @@ -320,11 +356,17 @@ class FederationHandler(BaseHandler): for e in missing_events: logger.info("Handling found event %s", e.event_id) - yield self.on_receive_pdu( - origin, - e, - get_missing=False - ) + try: + yield self.on_receive_pdu( + origin, + e, + get_missing=False + ) + except FederationError as e: + if e.code == 403: + logger.warn("Event %s failed history check.") + else: + raise @log_function @defer.inlineCallbacks @@ -458,6 +500,47 @@ class FederationHandler(BaseHandler): @measure_func("_filter_events_for_server") @defer.inlineCallbacks def _filter_events_for_server(self, server_name, room_id, events): + """Filter the given events for the given server, redacting those the + server can't see. + + Assumes the server is currently in the room. + + Returns + list[FrozenEvent] + """ + # First lets check to see if all the events have a history visibility + # of "shared" or "world_readable". If thats the case then we don't + # need to check membership (as we know the server is in the room). + event_to_state_ids = yield self.store.get_state_ids_for_events( + frozenset(e.event_id for e in events), + types=( + (EventTypes.RoomHistoryVisibility, ""), + ) + ) + + visibility_ids = set() + for sids in event_to_state_ids.itervalues(): + hist = sids.get((EventTypes.RoomHistoryVisibility, "")) + if hist: + visibility_ids.add(hist) + + # If we failed to find any history visibility events then the default + # is "shared" visiblity. + if not visibility_ids: + defer.returnValue(events) + + event_map = yield self.store.get_events(visibility_ids) + all_open = all( + e.content.get("history_visibility") in (None, "shared", "world_readable") + for e in event_map.itervalues() + ) + + if all_open: + defer.returnValue(events) + + # Ok, so we're dealing with events that have non-trivial visibility + # rules, so we need to also get the memberships of the room. + event_to_state_ids = yield self.store.get_state_ids_for_events( frozenset(e.event_id for e in events), types=( @@ -493,7 +576,20 @@ class FederationHandler(BaseHandler): for e_id, key_to_eid in event_to_state_ids.iteritems() } + erased_senders = yield self.store.are_users_erased( + e.sender for e in events, + ) + def redact_disallowed(event, state): + # if the sender has been gdpr17ed, always return a redacted + # copy of the event. + if erased_senders[event.sender]: + logger.info( + "Sender of %s has been erased, redacting", + event.event_id, + ) + return prune_event(event) + if not state: return event @@ -1381,8 +1477,6 @@ class FederationHandler(BaseHandler): def get_state_for_pdu(self, room_id, event_id): """Returns the state at the event. i.e. not including said event. """ - yield run_on_reactor() - state_groups = yield self.store.get_state_groups( room_id, [event_id] ) @@ -1425,8 +1519,6 @@ class FederationHandler(BaseHandler): def get_state_ids_for_pdu(self, room_id, event_id): """Returns the state at the event. i.e. not including said event. """ - yield run_on_reactor() - state_groups = yield self.store.get_state_groups_ids( room_id, [event_id] ) @@ -1468,11 +1560,20 @@ class FederationHandler(BaseHandler): @defer.inlineCallbacks @log_function - def get_persisted_pdu(self, origin, event_id, do_auth=True): - """ Get a PDU from the database with given origin and id. + def get_persisted_pdu(self, origin, event_id): + """Get an event from the database for the given server. + + Args: + origin [str]: hostname of server which is requesting the event; we + will check that the server is allowed to see it. + event_id [str]: id of the event being requested Returns: - Deferred: Results in a `Pdu`. + Deferred[EventBase|None]: None if we know nothing about the event; + otherwise the (possibly-redacted) event. + + Raises: + AuthError if the server is not currently in the room """ event = yield self.store.get_event( event_id, @@ -1493,20 +1594,17 @@ class FederationHandler(BaseHandler): ) ) - if do_auth: - in_room = yield self.auth.check_host_in_room( - event.room_id, - origin - ) - if not in_room: - raise AuthError(403, "Host not in room.") - - events = yield self._filter_events_for_server( - origin, event.room_id, [event] - ) - - event = events[0] + in_room = yield self.auth.check_host_in_room( + event.room_id, + origin + ) + if not in_room: + raise AuthError(403, "Host not in room.") + events = yield self._filter_events_for_server( + origin, event.room_id, [event] + ) + event = events[0] defer.returnValue(event) else: defer.returnValue(None) diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py index 91a0898860..277c2b7760 100644 --- a/synapse/handlers/identity.py +++ b/synapse/handlers/identity.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Copyright 2015, 2016 OpenMarket Ltd # Copyright 2017 Vector Creations Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -18,7 +19,7 @@ import logging -import simplejson as json +from canonicaljson import json from twisted.internet import defer @@ -26,7 +27,6 @@ from synapse.api.errors import ( MatrixCodeMessageException, CodeMessageException ) from ._base import BaseHandler -from synapse.util.async import run_on_reactor from synapse.api.errors import SynapseError, Codes logger = logging.getLogger(__name__) @@ -38,6 +38,7 @@ class IdentityHandler(BaseHandler): super(IdentityHandler, self).__init__(hs) self.http_client = hs.get_simple_http_client() + self.federation_http_client = hs.get_http_client() self.trusted_id_servers = set(hs.config.trusted_third_party_id_servers) self.trust_any_id_server_just_for_testing_do_not_use = ( @@ -60,8 +61,6 @@ class IdentityHandler(BaseHandler): @defer.inlineCallbacks def threepid_from_creds(self, creds): - yield run_on_reactor() - if 'id_server' in creds: id_server = creds['id_server'] elif 'idServer' in creds: @@ -104,7 +103,6 @@ class IdentityHandler(BaseHandler): @defer.inlineCallbacks def bind_threepid(self, creds, mxid): - yield run_on_reactor() logger.debug("binding threepid %r to %s", creds, mxid) data = None @@ -139,9 +137,53 @@ class IdentityHandler(BaseHandler): defer.returnValue(data) @defer.inlineCallbacks - def requestEmailToken(self, id_server, email, client_secret, send_attempt, **kwargs): - yield run_on_reactor() + def unbind_threepid(self, mxid, threepid): + """ + Removes a binding from an identity server + Args: + mxid (str): Matrix user ID of binding to be removed + threepid (dict): Dict with medium & address of binding to be removed + Returns: + Deferred[bool]: True on success, otherwise False + """ + logger.debug("unbinding threepid %r from %s", threepid, mxid) + if not self.trusted_id_servers: + logger.warn("Can't unbind threepid: no trusted ID servers set in config") + defer.returnValue(False) + + # We don't track what ID server we added 3pids on (perhaps we ought to) + # but we assume that any of the servers in the trusted list are in the + # same ID server federation, so we can pick any one of them to send the + # deletion request to. + id_server = next(iter(self.trusted_id_servers)) + + url = "https://%s/_matrix/identity/api/v1/3pid/unbind" % (id_server,) + content = { + "mxid": mxid, + "threepid": threepid, + } + headers = {} + # we abuse the federation http client to sign the request, but we have to send it + # using the normal http client since we don't want the SRV lookup and want normal + # 'browser-like' HTTPS. + self.federation_http_client.sign_request( + destination=None, + method='POST', + url_bytes='/_matrix/identity/api/v1/3pid/unbind'.encode('ascii'), + headers_dict=headers, + content=content, + destination_is=id_server, + ) + yield self.http_client.post_json_get_json( + url, + content, + headers, + ) + defer.returnValue(True) + + @defer.inlineCallbacks + def requestEmailToken(self, id_server, email, client_secret, send_attempt, **kwargs): if not self._should_trust_id_server(id_server): raise SynapseError( 400, "Untrusted ID server '%s'" % id_server, @@ -176,8 +218,6 @@ class IdentityHandler(BaseHandler): self, id_server, country, phone_number, client_secret, send_attempt, **kwargs ): - yield run_on_reactor() - if not self._should_trust_id_server(id_server): raise SynapseError( 400, "Untrusted ID server '%s'" % id_server, diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 1cb81b6cf8..cbadf3c88e 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -14,13 +14,12 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -import simplejson import sys -from canonicaljson import encode_canonical_json +from canonicaljson import encode_canonical_json, json import six from six import string_types, itervalues, iteritems -from twisted.internet import defer, reactor +from twisted.internet import defer from twisted.internet.defer import succeed from twisted.python.failure import Failure @@ -36,7 +35,7 @@ from synapse.events.validator import EventValidator from synapse.types import ( UserID, RoomAlias, RoomStreamToken, ) -from synapse.util.async import run_on_reactor, ReadWriteLock, Limiter +from synapse.util.async import ReadWriteLock, Limiter from synapse.util.logcontext import run_in_background from synapse.util.metrics import measure_func from synapse.util.frozenutils import frozendict_json_encoder @@ -157,7 +156,7 @@ class MessageHandler(BaseHandler): # remove the purge from the list 24 hours after it completes def clear_purge(): del self._purges_by_id[purge_id] - reactor.callLater(24 * 3600, clear_purge) + self.hs.get_reactor().callLater(24 * 3600, clear_purge) def get_purge_status(self, purge_id): """Get the current status of an active purge @@ -491,7 +490,7 @@ class EventCreationHandler(object): target, e ) - is_exempt = yield self._is_exempt_from_privacy_policy(builder) + is_exempt = yield self._is_exempt_from_privacy_policy(builder, requester) if not is_exempt: yield self.assert_accepted_privacy_policy(requester) @@ -509,12 +508,13 @@ class EventCreationHandler(object): defer.returnValue((event, context)) - def _is_exempt_from_privacy_policy(self, builder): + def _is_exempt_from_privacy_policy(self, builder, requester): """"Determine if an event to be sent is exempt from having to consent to the privacy policy Args: builder (synapse.events.builder.EventBuilder): event being created + requester (Requster): user requesting this event Returns: Deferred[bool]: true if the event can be sent without the user @@ -525,6 +525,9 @@ class EventCreationHandler(object): membership = builder.content.get("membership", None) if membership == Membership.JOIN: return self._is_server_notices_room(builder.room_id) + elif membership == Membership.LEAVE: + # the user is always allowed to leave (but not kick people) + return builder.state_key == requester.user.to_string() return succeed(False) @defer.inlineCallbacks @@ -793,7 +796,7 @@ class EventCreationHandler(object): # Ensure that we can round trip before trying to persist in db try: dump = frozendict_json_encoder.encode(event.content) - simplejson.loads(dump) + json.loads(dump) except Exception: logger.exception("Failed to encode content: %r", event.content) raise @@ -806,6 +809,7 @@ class EventCreationHandler(object): # If we're a worker we need to hit out to the master. if self.config.worker_app: yield send_event_to_master( + self.hs.get_clock(), self.http_client, host=self.config.worker_replication_host, port=self.config.worker_replication_http_port, @@ -959,9 +963,7 @@ class EventCreationHandler(object): event_stream_id, max_stream_id ) - @defer.inlineCallbacks def _notify(): - yield run_on_reactor() try: self.notifier.on_new_room_event( event, event_stream_id, max_stream_id, diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 7fe568132f..7db59fba00 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -22,7 +22,7 @@ The methods that define policy are: - should_notify """ -from twisted.internet import defer, reactor +from twisted.internet import defer from contextlib import contextmanager from six import itervalues, iteritems @@ -179,7 +179,7 @@ class PresenceHandler(object): # have not yet been persisted self.unpersisted_users_changes = set() - reactor.addSystemEventTrigger("before", "shutdown", self._on_shutdown) + hs.get_reactor().addSystemEventTrigger("before", "shutdown", self._on_shutdown) self.serial_to_user = {} self._next_serial = 1 diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index ad9bb0a3cd..dcf5d346a1 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -24,7 +24,7 @@ from synapse.api.errors import ( from synapse.http.client import CaptchaServerHttpClient from synapse import types from synapse.types import UserID, create_requester, RoomID, RoomAlias -from synapse.util.async import run_on_reactor, Linearizer +from synapse.util.async import Linearizer from synapse.util.threepids import check_3pid_allowed from ._base import BaseHandler @@ -141,7 +141,6 @@ class RegistrationHandler(BaseHandler): Raises: RegistrationError if there was a problem registering. """ - yield run_on_reactor() password_hash = None if password: password_hash = yield self.auth_handler().hash(password) @@ -453,8 +452,6 @@ class RegistrationHandler(BaseHandler): Raises: RegistrationError if there was a problem registering. """ - yield run_on_reactor() - if localpart is None: raise SynapseError(400, "Request must include user id") diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index f80311c11f..566a5d1ebf 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -23,7 +23,7 @@ from synapse.types import UserID, RoomAlias, RoomID, RoomStreamToken from synapse.api.constants import ( EventTypes, JoinRules, RoomCreationPreset ) -from synapse.api.errors import AuthError, StoreError, SynapseError +from synapse.api.errors import AuthError, Codes, StoreError, SynapseError from synapse.util import stringutils from synapse.visibility import filter_events_for_client @@ -117,7 +117,11 @@ class RoomCreationHandler(BaseHandler): ) if mapping: - raise SynapseError(400, "Room alias already taken") + raise SynapseError( + 400, + "Room alias already taken", + Codes.ROOM_IN_USE + ) else: room_alias = None diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 1eca26aa1e..2e3a77ca4b 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -64,6 +64,13 @@ class SearchHandler(BaseHandler): except Exception: raise SynapseError(400, "Invalid batch") + logger.info( + "Search batch properties: %r, %r, %r", + batch_group, batch_group_key, batch_token, + ) + + logger.info("Search content: %s", content) + try: room_cat = content["search_categories"]["room_events"] @@ -271,6 +278,8 @@ class SearchHandler(BaseHandler): # We should never get here due to the guard earlier. raise NotImplementedError() + logger.info("Found %d events to return", len(allowed_events)) + # If client has asked for "context" for each event (i.e. some surrounding # events and state), fetch that if event_context is not None: @@ -282,6 +291,11 @@ class SearchHandler(BaseHandler): event.room_id, event.event_id, before_limit, after_limit ) + logger.info( + "Context for search returned %d and %d events", + len(res["events_before"]), len(res["events_after"]), + ) + res["events_before"] = yield filter_events_for_client( self.store, user.to_string(), res["events_before"] ) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index a39f0f7343..7e4a114d4f 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -19,7 +19,6 @@ from twisted.internet import defer from synapse.api.constants import EventTypes, JoinRules, Membership from synapse.storage.roommember import ProfileInfo from synapse.util.metrics import Measure -from synapse.util.async import sleep from synapse.types import get_localpart_from_id from six import iteritems @@ -174,7 +173,7 @@ class UserDirectoryHandler(object): logger.info("Handling room %d/%d", num_processed_rooms + 1, len(room_ids)) yield self._handle_initial_room(room_id) num_processed_rooms += 1 - yield sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.) + yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.) logger.info("Processed all rooms.") @@ -188,7 +187,7 @@ class UserDirectoryHandler(object): logger.info("Handling user %d/%d", num_processed_users + 1, len(user_ids)) yield self._handle_local_user(user_id) num_processed_users += 1 - yield sleep(self.INITIAL_USER_SLEEP_MS / 1000.) + yield self.clock.sleep(self.INITIAL_USER_SLEEP_MS / 1000.) logger.info("Processed all users") @@ -236,7 +235,7 @@ class UserDirectoryHandler(object): count = 0 for user_id in user_ids: if count % self.INITIAL_ROOM_SLEEP_COUNT == 0: - yield sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.) + yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.) if not self.is_mine_id(user_id): count += 1 @@ -251,7 +250,7 @@ class UserDirectoryHandler(object): continue if count % self.INITIAL_ROOM_SLEEP_COUNT == 0: - yield sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.) + yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.) count += 1 user_set = (user_id, other_user_id) diff --git a/synapse/http/__init__.py b/synapse/http/__init__.py index 054372e179..58ef8d3ce4 100644 --- a/synapse/http/__init__.py +++ b/synapse/http/__init__.py @@ -13,6 +13,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import re + from twisted.internet.defer import CancelledError from twisted.python import failure @@ -34,3 +36,14 @@ def cancelled_to_request_timed_out_error(value, timeout): value.trap(CancelledError) raise RequestTimedOutError() return value + + +ACCESS_TOKEN_RE = re.compile(br'(\?.*access(_|%5[Ff])token=)[^&]*(.*)$') + + +def redact_uri(uri): + """Strips access tokens from the uri replaces with <redacted>""" + return ACCESS_TOKEN_RE.sub( + br'\1<redacted>\3', + uri + ) diff --git a/synapse/http/client.py b/synapse/http/client.py index 4d4eee3d64..5bdc484c15 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -19,7 +19,7 @@ from OpenSSL.SSL import VERIFY_NONE from synapse.api.errors import ( CodeMessageException, MatrixCodeMessageException, SynapseError, Codes, ) -from synapse.http import cancelled_to_request_timed_out_error +from synapse.http import cancelled_to_request_timed_out_error, redact_uri from synapse.util.async import add_timeout_to_deferred from synapse.util.caches import CACHE_SIZE_FACTOR from synapse.util.logcontext import make_deferred_yieldable @@ -42,7 +42,7 @@ from twisted.web._newclient import ResponseDone from six import StringIO from prometheus_client import Counter -import simplejson as json +from canonicaljson import json import logging import urllib @@ -90,31 +90,32 @@ class SimpleHttpClient(object): # counters to it outgoing_requests_counter.labels(method).inc() - logger.info("Sending request %s %s", method, uri) + # log request but strip `access_token` (AS requests for example include this) + logger.info("Sending request %s %s", method, redact_uri(uri)) try: request_deferred = self.agent.request( method, uri, *args, **kwargs ) add_timeout_to_deferred( - request_deferred, - 60, cancelled_to_request_timed_out_error, + request_deferred, 60, self.hs.get_reactor(), + cancelled_to_request_timed_out_error, ) response = yield make_deferred_yieldable(request_deferred) incoming_responses_counter.labels(method, response.code).inc() logger.info( "Received response to %s %s: %s", - method, uri, response.code + method, redact_uri(uri), response.code ) defer.returnValue(response) except Exception as e: incoming_responses_counter.labels(method, "ERR").inc() logger.info( "Error sending request to %s %s: %s %s", - method, uri, type(e).__name__, e.message + method, redact_uri(uri), type(e).__name__, e.message ) - raise e + raise @defer.inlineCallbacks def post_urlencoded_get_json(self, uri, args={}, headers=None): diff --git a/synapse/http/endpoint.py b/synapse/http/endpoint.py index 87a482650d..1b1123b292 100644 --- a/synapse/http/endpoint.py +++ b/synapse/http/endpoint.py @@ -12,8 +12,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import re + from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS -from twisted.internet import defer, reactor +from twisted.internet import defer from twisted.internet.error import ConnectError from twisted.names import client, dns from twisted.names.error import DNSNameError, DomainError @@ -38,6 +40,71 @@ _Server = collections.namedtuple( ) +def parse_server_name(server_name): + """Split a server name into host/port parts. + + Args: + server_name (str): server name to parse + + Returns: + Tuple[str, int|None]: host/port parts. + + Raises: + ValueError if the server name could not be parsed. + """ + try: + if server_name[-1] == ']': + # ipv6 literal, hopefully + return server_name, None + + domain_port = server_name.rsplit(":", 1) + domain = domain_port[0] + port = int(domain_port[1]) if domain_port[1:] else None + return domain, port + except Exception: + raise ValueError("Invalid server name '%s'" % server_name) + + +VALID_HOST_REGEX = re.compile( + "\\A[0-9a-zA-Z.-]+\\Z", +) + + +def parse_and_validate_server_name(server_name): + """Split a server name into host/port parts and do some basic validation. + + Args: + server_name (str): server name to parse + + Returns: + Tuple[str, int|None]: host/port parts. + + Raises: + ValueError if the server name could not be parsed. + """ + host, port = parse_server_name(server_name) + + # these tests don't need to be bulletproof as we'll find out soon enough + # if somebody is giving us invalid data. What we *do* need is to be sure + # that nobody is sneaking IP literals in that look like hostnames, etc. + + # look for ipv6 literals + if host[0] == '[': + if host[-1] != ']': + raise ValueError("Mismatched [...] in server name '%s'" % ( + server_name, + )) + return host, port + + # otherwise it should only be alphanumerics. + if not VALID_HOST_REGEX.match(host): + raise ValueError("Server name '%s' contains invalid characters" % ( + server_name, + )) + + return host, port + + def matrix_federation_endpoint(reactor, destination, ssl_context_factory=None, timeout=None): """Construct an endpoint for the given matrix destination. @@ -50,9 +117,7 @@ def matrix_federation_endpoint(reactor, destination, ssl_context_factory=None, timeout (int): connection timeout in seconds """ - domain_port = destination.split(":") - domain = domain_port[0] - port = int(domain_port[1]) if domain_port[1:] else None + domain, port = parse_server_name(destination) endpoint_kw_args = {} @@ -74,21 +139,22 @@ def matrix_federation_endpoint(reactor, destination, ssl_context_factory=None, reactor, "matrix", domain, protocol="tcp", default_port=default_port, endpoint=transport_endpoint, endpoint_kw_args=endpoint_kw_args - )) + ), reactor) else: return _WrappingEndpointFac(transport_endpoint( reactor, domain, port, **endpoint_kw_args - )) + ), reactor) class _WrappingEndpointFac(object): - def __init__(self, endpoint_fac): + def __init__(self, endpoint_fac, reactor): self.endpoint_fac = endpoint_fac + self.reactor = reactor @defer.inlineCallbacks def connect(self, protocolFactory): conn = yield self.endpoint_fac.connect(protocolFactory) - conn = _WrappedConnection(conn) + conn = _WrappedConnection(conn, self.reactor) defer.returnValue(conn) @@ -98,9 +164,10 @@ class _WrappedConnection(object): """ __slots__ = ["conn", "last_request"] - def __init__(self, conn): + def __init__(self, conn, reactor): object.__setattr__(self, "conn", conn) object.__setattr__(self, "last_request", time.time()) + self._reactor = reactor def __getattr__(self, name): return getattr(self.conn, name) @@ -131,14 +198,14 @@ class _WrappedConnection(object): # Time this connection out if we haven't send a request in the last # N minutes # TODO: Cancel the previous callLater? - reactor.callLater(3 * 60, self._time_things_out_maybe) + self._reactor.callLater(3 * 60, self._time_things_out_maybe) d = self.conn.request(request) def update_request_time(res): self.last_request = time.time() # TODO: Cancel the previous callLater? - reactor.callLater(3 * 60, self._time_things_out_maybe) + self._reactor.callLater(3 * 60, self._time_things_out_maybe) return res d.addCallback(update_request_time) diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 821aed362b..2cb9e3e231 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -22,12 +22,12 @@ from twisted.web._newclient import ResponseDone from synapse.http import cancelled_to_request_timed_out_error from synapse.http.endpoint import matrix_federation_endpoint import synapse.metrics -from synapse.util.async import sleep, add_timeout_to_deferred +from synapse.util.async import add_timeout_to_deferred from synapse.util import logcontext from synapse.util.logcontext import make_deferred_yieldable import synapse.util.retryutils -from canonicaljson import encode_canonical_json +from canonicaljson import encode_canonical_json, json from synapse.api.errors import ( SynapseError, Codes, HttpResponseException, FederationDeniedError, @@ -36,7 +36,6 @@ from synapse.api.errors import ( from signedjson.sign import sign_json import cgi -import simplejson as json import logging import random import sys @@ -193,6 +192,7 @@ class MatrixFederationHttpClient(object): add_timeout_to_deferred( request_deferred, timeout / 1000. if timeout else 60, + self.hs.get_reactor(), cancelled_to_request_timed_out_error, ) response = yield make_deferred_yieldable( @@ -234,7 +234,7 @@ class MatrixFederationHttpClient(object): delay = min(delay, 2) delay *= random.uniform(0.8, 1.4) - yield sleep(delay) + yield self.clock.sleep(delay) retries_left -= 1 else: raise @@ -260,14 +260,35 @@ class MatrixFederationHttpClient(object): defer.returnValue(response) def sign_request(self, destination, method, url_bytes, headers_dict, - content=None): + content=None, destination_is=None): + """ + Signs a request by adding an Authorization header to headers_dict + Args: + destination (bytes|None): The desination home server of the request. + May be None if the destination is an identity server, in which case + destination_is must be non-None. + method (bytes): The HTTP method of the request + url_bytes (bytes): The URI path of the request + headers_dict (dict): Dictionary of request headers to append to + content (bytes): The body of the request + destination_is (bytes): As 'destination', but if the destination is an + identity server + + Returns: + None + """ request = { "method": method, "uri": url_bytes, "origin": self.server_name, - "destination": destination, } + if destination is not None: + request["destination"] = destination + + if destination_is is not None: + request["destination_is"] = destination_is + if content is not None: request["content"] = content diff --git a/synapse/http/request_metrics.py b/synapse/http/request_metrics.py index dc06f6c443..1b711ca2de 100644 --- a/synapse/http/request_metrics.py +++ b/synapse/http/request_metrics.py @@ -117,13 +117,17 @@ def _get_in_flight_counts(): Returns: dict[tuple[str, str], int] """ - for rm in _in_flight_requests: + # Cast to a list to prevent it changing while the Prometheus + # thread is collecting metrics + reqs = list(_in_flight_requests) + + for rm in reqs: rm.update_metrics() # Map from (method, name) -> int, the number of in flight requests of that # type counts = {} - for rm in _in_flight_requests: + for rm in reqs: key = (rm.method, rm.name,) counts[key] = counts.get(key, 0) + 1 @@ -131,7 +135,7 @@ def _get_in_flight_counts(): LaterGauge( - "synapse_http_request_metrics_in_flight_requests_count", + "synapse_http_server_in_flight_requests_count", "", ["method", "servlet"], _get_in_flight_counts, diff --git a/synapse/http/server.py b/synapse/http/server.py index bc09b8b2be..517aaf7b5a 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -29,7 +29,7 @@ import synapse.metrics import synapse.events from canonicaljson import ( - encode_canonical_json, encode_pretty_printed_json + encode_canonical_json, encode_pretty_printed_json, json ) from twisted.internet import defer @@ -41,7 +41,6 @@ from twisted.web.util import redirectTo import collections import logging import urllib -import simplejson logger = logging.getLogger(__name__) @@ -410,7 +409,7 @@ def respond_with_json(request, code, json_object, send_cors=False, if canonical_json or synapse.events.USE_FROZEN_DICTS: json_bytes = encode_canonical_json(json_object) else: - json_bytes = simplejson.dumps(json_object) + json_bytes = json.dumps(json_object) return respond_with_json_bytes( request, code, json_bytes, diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py index ef8e62901b..ef3a01ddc7 100644 --- a/synapse/http/servlet.py +++ b/synapse/http/servlet.py @@ -18,7 +18,9 @@ from synapse.api.errors import SynapseError, Codes import logging -import simplejson + +from canonicaljson import json + logger = logging.getLogger(__name__) @@ -171,7 +173,7 @@ def parse_json_value_from_request(request, allow_empty_body=False): return None try: - content = simplejson.loads(content_bytes) + content = json.loads(content_bytes) except Exception as e: logger.warn("Unable to parse JSON: %s", e) raise SynapseError(400, "Content not JSON.", errcode=Codes.NOT_JSON) diff --git a/synapse/http/site.py b/synapse/http/site.py index 60299657b9..fe93643b1e 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -14,18 +14,16 @@ import contextlib import logging -import re import time from twisted.web.server import Site, Request +from synapse.http import redact_uri from synapse.http.request_metrics import RequestMetrics from synapse.util.logcontext import LoggingContext logger = logging.getLogger(__name__) -ACCESS_TOKEN_RE = re.compile(br'(\?.*access(_|%5[Ff])token=)[^&]*(.*)$') - _next_request_seq = 0 @@ -69,10 +67,7 @@ class SynapseRequest(Request): return "%s-%i" % (self.method, self.request_seq) def get_redacted_uri(self): - return ACCESS_TOKEN_RE.sub( - br'\1<redacted>\3', - self.uri - ) + return redact_uri(self.uri) def get_user_agent(self): return self.requestHeaders.getRawHeaders(b"User-Agent", [None])[-1] @@ -104,19 +99,36 @@ class SynapseRequest(Request): db_txn_count = context.db_txn_count db_txn_duration_sec = context.db_txn_duration_sec db_sched_duration_sec = context.db_sched_duration_sec + evt_db_fetch_count = context.evt_db_fetch_count except Exception: ru_utime, ru_stime = (0, 0) db_txn_count, db_txn_duration_sec = (0, 0) + evt_db_fetch_count = 0 end_time = time.time() + # need to decode as it could be raw utf-8 bytes + # from a IDN servname in an auth header + authenticated_entity = self.authenticated_entity + if authenticated_entity is not None: + authenticated_entity = authenticated_entity.decode("utf-8", "replace") + + # ...or could be raw utf-8 bytes in the User-Agent header. + # N.B. if you don't do this, the logger explodes cryptically + # with maximum recursion trying to log errors about + # the charset problem. + # c.f. https://github.com/matrix-org/synapse/issues/3471 + user_agent = self.get_user_agent() + if user_agent is not None: + user_agent = user_agent.decode("utf-8", "replace") + self.site.access_logger.info( "%s - %s - {%s}" " Processed request: %.3fsec (%.3fsec, %.3fsec) (%.3fsec/%.3fsec/%d)" - " %sB %s \"%s %s %s\" \"%s\"", + " %sB %s \"%s %s %s\" \"%s\" [%d dbevts]", self.getClientIP(), self.site.site_tag, - self.authenticated_entity, + authenticated_entity, end_time - self.start_time, ru_utime, ru_stime, @@ -128,7 +140,8 @@ class SynapseRequest(Request): self.method, self.get_redacted_uri(), self.clientproto, - self.get_user_agent(), + user_agent, + evt_db_fetch_count, ) try: diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 429e79c472..2d2397caae 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -62,7 +62,7 @@ class LaterGauge(object): calls = self.caller() except Exception: logger.exception( - "Exception running callback for LaterGuage(%s)", + "Exception running callback for LaterGauge(%s)", self.name, ) yield g @@ -140,14 +140,15 @@ gc_time = Histogram( class GCCounts(object): def collect(self): - cm = GaugeMetricFamily("python_gc_counts", "GC cycle counts", labels=["gen"]) + cm = GaugeMetricFamily("python_gc_counts", "GC object counts", labels=["gen"]) for n, m in enumerate(gc.get_count()): cm.add_metric([str(n)], m) yield cm -REGISTRY.register(GCCounts()) +if not running_on_pypy: + REGISTRY.register(GCCounts()) # # Twisted reactor metrics @@ -190,6 +191,22 @@ event_processing_last_ts = Gauge("synapse_event_processing_last_ts", "", ["name" # finished being processed. event_processing_lag = Gauge("synapse_event_processing_lag", "", ["name"]) +last_ticked = time.time() + + +class ReactorLastSeenMetric(object): + + def collect(self): + cm = GaugeMetricFamily( + "python_twisted_reactor_last_seen", + "Seconds since the Twisted reactor was last seen", + ) + cm.add_metric([], time.time() - last_ticked) + yield cm + + +REGISTRY.register(ReactorLastSeenMetric()) + def runUntilCurrentTimer(func): @@ -222,6 +239,11 @@ def runUntilCurrentTimer(func): tick_time.observe(end - start) pending_calls_metric.observe(num_pending) + # Update the time we last ticked, for the metric to test whether + # Synapse's reactor has frozen + global last_ticked + last_ticked = end + if running_on_pypy: return ret diff --git a/synapse/notifier.py b/synapse/notifier.py index 6dce20a284..3c0622a294 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -161,6 +161,7 @@ class Notifier(object): self.user_to_user_stream = {} self.room_to_user_streams = {} + self.hs = hs self.event_sources = hs.get_event_sources() self.store = hs.get_datastore() self.pending_new_room_events = [] @@ -340,6 +341,7 @@ class Notifier(object): add_timeout_to_deferred( listener.deferred, (end_time - now) / 1000., + self.hs.get_reactor(), ) with PreserveLoggingContext(): yield listener.deferred @@ -561,6 +563,7 @@ class Notifier(object): add_timeout_to_deferred( listener.deferred.addTimeout, (end_time - now) / 1000., + self.hs.get_reactor(), ) try: with PreserveLoggingContext(): diff --git a/synapse/push/emailpusher.py b/synapse/push/emailpusher.py index ba7286cb72..52d4f087ee 100644 --- a/synapse/push/emailpusher.py +++ b/synapse/push/emailpusher.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from twisted.internet import defer, reactor +from twisted.internet import defer from twisted.internet.error import AlreadyCalled, AlreadyCancelled import logging @@ -199,7 +199,7 @@ class EmailPusher(object): self.timed_call = None if soonest_due_at is not None: - self.timed_call = reactor.callLater( + self.timed_call = self.hs.get_reactor().callLater( self.seconds_until(soonest_due_at), self.on_timer ) diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py index bf7ff74a1a..7a481b5a1e 100644 --- a/synapse/push/httppusher.py +++ b/synapse/push/httppusher.py @@ -15,7 +15,7 @@ # limitations under the License. import logging -from twisted.internet import defer, reactor +from twisted.internet import defer from twisted.internet.error import AlreadyCalled, AlreadyCancelled from . import push_rule_evaluator @@ -220,7 +220,9 @@ class HttpPusher(object): ) else: logger.info("Push failed: delaying for %ds", self.backoff_delay) - self.timed_call = reactor.callLater(self.backoff_delay, self.on_timer) + self.timed_call = self.hs.get_reactor().callLater( + self.backoff_delay, self.on_timer + ) self.backoff_delay = min(self.backoff_delay * 2, self.MAX_BACKOFF_SEC) break diff --git a/synapse/push/pusherpool.py b/synapse/push/pusherpool.py index 750d11ca38..36bb5bbc65 100644 --- a/synapse/push/pusherpool.py +++ b/synapse/push/pusherpool.py @@ -19,7 +19,6 @@ import logging from twisted.internet import defer from synapse.push.pusher import PusherFactory -from synapse.util.async import run_on_reactor from synapse.util.logcontext import make_deferred_yieldable, run_in_background logger = logging.getLogger(__name__) @@ -125,7 +124,6 @@ class PusherPool: @defer.inlineCallbacks def on_new_notifications(self, min_stream_id, max_stream_id): - yield run_on_reactor() try: users_affected = yield self.store.get_push_action_users_in_range( min_stream_id, max_stream_id @@ -151,7 +149,6 @@ class PusherPool: @defer.inlineCallbacks def on_new_receipts(self, min_stream_id, max_stream_id, affected_room_ids): - yield run_on_reactor() try: # Need to subtract 1 from the minimum because the lower bound here # is not inclusive diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index 478c497722..987eec3ef2 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -50,21 +50,21 @@ REQUIREMENTS = { "bcrypt": ["bcrypt>=3.1.0"], "pillow": ["PIL"], "pydenticon": ["pydenticon"], - "blist": ["blist"], + "sortedcontainers": ["sortedcontainers"], "pysaml2>=3.0.0": ["saml2>=3.0.0"], "pymacaroons-pynacl": ["pymacaroons"], "msgpack-python>=0.3.0": ["msgpack"], "phonenumbers>=8.2.0": ["phonenumbers"], "six": ["six"], "prometheus_client": ["prometheus_client"], + "attrs": ["attr"], + "netaddr>=0.7.18": ["netaddr"], } + CONDITIONAL_REQUIREMENTS = { "web_client": { "matrix_angular_sdk>=0.6.8": ["syweb>=0.6.8"], }, - "preview_url": { - "netaddr>=0.7.18": ["netaddr"], - }, "email.enable_notifs": { "Jinja2>=2.8": ["Jinja2>=2.8"], "bleach>=1.4.2": ["bleach>=1.4.2"], diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py index a9baa2c1c3..f080f96cc1 100644 --- a/synapse/replication/http/send_event.py +++ b/synapse/replication/http/send_event.py @@ -21,7 +21,6 @@ from synapse.api.errors import ( from synapse.events import FrozenEvent from synapse.events.snapshot import EventContext from synapse.http.servlet import RestServlet, parse_json_object_from_request -from synapse.util.async import sleep from synapse.util.caches.response_cache import ResponseCache from synapse.util.metrics import Measure from synapse.types import Requester, UserID @@ -33,11 +32,12 @@ logger = logging.getLogger(__name__) @defer.inlineCallbacks -def send_event_to_master(client, host, port, requester, event, context, +def send_event_to_master(clock, client, host, port, requester, event, context, ratelimit, extra_users): """Send event to be handled on the master Args: + clock (synapse.util.Clock) client (SimpleHttpClient) host (str): host of master port (int): port on master listening for HTTP replication @@ -77,7 +77,7 @@ def send_event_to_master(client, host, port, requester, event, context, # If we timed out we probably don't need to worry about backing # off too much, but lets just wait a little anyway. - yield sleep(1) + yield clock.sleep(1) except MatrixCodeMessageException as e: # We convert to SynapseError as we know that it was a SynapseError # on the master process that we should send to the client. (And diff --git a/synapse/replication/slave/storage/events.py b/synapse/replication/slave/storage/events.py index b1f64ef0d8..97d3196633 100644 --- a/synapse/replication/slave/storage/events.py +++ b/synapse/replication/slave/storage/events.py @@ -23,6 +23,7 @@ from synapse.storage.roommember import RoomMemberWorkerStore from synapse.storage.state import StateGroupWorkerStore from synapse.storage.stream import StreamWorkerStore from synapse.storage.signatures import SignatureWorkerStore +from synapse.storage.user_erasure_store import UserErasureWorkerStore from ._base import BaseSlavedStore from ._slaved_id_tracker import SlavedIdTracker @@ -45,6 +46,7 @@ class SlavedEventStore(EventFederationWorkerStore, EventsWorkerStore, StateGroupWorkerStore, SignatureWorkerStore, + UserErasureWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py index 6d2513c4e2..bb852b00af 100644 --- a/synapse/replication/tcp/client.py +++ b/synapse/replication/tcp/client.py @@ -15,7 +15,7 @@ """A replication client for use by synapse workers. """ -from twisted.internet import reactor, defer +from twisted.internet import defer from twisted.internet.protocol import ReconnectingClientFactory from .commands import ( @@ -44,7 +44,7 @@ class ReplicationClientFactory(ReconnectingClientFactory): self.server_name = hs.config.server_name self._clock = hs.get_clock() # As self.clock is defined in super class - reactor.addSystemEventTrigger("before", "shutdown", self.stopTrying) + hs.get_reactor().addSystemEventTrigger("before", "shutdown", self.stopTrying) def startedConnecting(self, connector): logger.info("Connecting to replication: %r", connector.getDestination()) @@ -95,7 +95,7 @@ class ReplicationClientHandler(object): factory = ReplicationClientFactory(hs, client_name, self) host = hs.config.worker_replication_host port = hs.config.worker_replication_port - reactor.connectTCP(host, port, factory) + hs.get_reactor().connectTCP(host, port, factory) def on_rdata(self, stream_name, token, rows): """Called when we get new replication data. By default this just pokes diff --git a/synapse/replication/tcp/commands.py b/synapse/replication/tcp/commands.py index 12aac3cc6b..f3908df642 100644 --- a/synapse/replication/tcp/commands.py +++ b/synapse/replication/tcp/commands.py @@ -19,13 +19,17 @@ allowed to be sent by which side. """ import logging -import simplejson +import platform +if platform.python_implementation() == "PyPy": + import json + _json_encoder = json.JSONEncoder() +else: + import simplejson as json + _json_encoder = json.JSONEncoder(namedtuple_as_object=False) logger = logging.getLogger(__name__) -_json_encoder = simplejson.JSONEncoder(namedtuple_as_object=False) - class Command(object): """The base command class. @@ -102,7 +106,7 @@ class RdataCommand(Command): return cls( stream_name, None if token == "batch" else int(token), - simplejson.loads(row_json) + json.loads(row_json) ) def to_line(self): @@ -300,7 +304,7 @@ class InvalidateCacheCommand(Command): def from_line(cls, line): cache_func, keys_json = line.split(" ", 1) - return cls(cache_func, simplejson.loads(keys_json)) + return cls(cache_func, json.loads(keys_json)) def to_line(self): return " ".join(( @@ -329,7 +333,7 @@ class UserIpCommand(Command): def from_line(cls, line): user_id, jsn = line.split(" ", 1) - access_token, ip, user_agent, device_id, last_seen = simplejson.loads(jsn) + access_token, ip, user_agent, device_id, last_seen = json.loads(jsn) return cls( user_id, access_token, ip, user_agent, device_id, last_seen diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py index c870475cd1..171a698e14 100644 --- a/synapse/replication/tcp/protocol.py +++ b/synapse/replication/tcp/protocol.py @@ -564,11 +564,13 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol): # The following simply registers metrics for the replication connections pending_commands = LaterGauge( - "pending_commands", "", ["name", "conn_id"], + "synapse_replication_tcp_protocol_pending_commands", + "", + ["name", "conn_id"], lambda: { - (p.name, p.conn_id): len(p.pending_commands) - for p in connected_connections - }) + (p.name, p.conn_id): len(p.pending_commands) for p in connected_connections + }, +) def transport_buffer_size(protocol): @@ -579,11 +581,13 @@ def transport_buffer_size(protocol): transport_send_buffer = LaterGauge( - "synapse_replication_tcp_transport_send_buffer", "", ["name", "conn_id"], + "synapse_replication_tcp_protocol_transport_send_buffer", + "", + ["name", "conn_id"], lambda: { - (p.name, p.conn_id): transport_buffer_size(p) - for p in connected_connections - }) + (p.name, p.conn_id): transport_buffer_size(p) for p in connected_connections + }, +) def transport_kernel_read_buffer_size(protocol, read=True): @@ -602,37 +606,50 @@ def transport_kernel_read_buffer_size(protocol, read=True): tcp_transport_kernel_send_buffer = LaterGauge( - "synapse_replication_tcp_transport_kernel_send_buffer", "", ["name", "conn_id"], + "synapse_replication_tcp_protocol_transport_kernel_send_buffer", + "", + ["name", "conn_id"], lambda: { (p.name, p.conn_id): transport_kernel_read_buffer_size(p, False) for p in connected_connections - }) + }, +) tcp_transport_kernel_read_buffer = LaterGauge( - "synapse_replication_tcp_transport_kernel_read_buffer", "", ["name", "conn_id"], + "synapse_replication_tcp_protocol_transport_kernel_read_buffer", + "", + ["name", "conn_id"], lambda: { (p.name, p.conn_id): transport_kernel_read_buffer_size(p, True) for p in connected_connections - }) + }, +) tcp_inbound_commands = LaterGauge( - "synapse_replication_tcp_inbound_commands", "", ["command", "name", "conn_id"], + "synapse_replication_tcp_protocol_inbound_commands", + "", + ["command", "name", "conn_id"], lambda: { (k[0], p.name, p.conn_id): count for p in connected_connections for k, count in iteritems(p.inbound_commands_counter) - }) + }, +) tcp_outbound_commands = LaterGauge( - "synapse_replication_tcp_outbound_commands", "", ["command", "name", "conn_id"], + "synapse_replication_tcp_protocol_outbound_commands", + "", + ["command", "name", "conn_id"], lambda: { (k[0], p.name, p.conn_id): count for p in connected_connections for k, count in iteritems(p.outbound_commands_counter) - }) + }, +) # number of updates received for each RDATA stream -inbound_rdata_count = Counter("synapse_replication_tcp_inbound_rdata_count", "", - ["stream_name"]) +inbound_rdata_count = Counter( + "synapse_replication_tcp_protocol_inbound_rdata_count", "", ["stream_name"] +) diff --git a/synapse/replication/tcp/resource.py b/synapse/replication/tcp/resource.py index 63bd6d2652..95ad8c1b4c 100644 --- a/synapse/replication/tcp/resource.py +++ b/synapse/replication/tcp/resource.py @@ -15,7 +15,7 @@ """The server side of the replication stream. """ -from twisted.internet import defer, reactor +from twisted.internet import defer from twisted.internet.protocol import Factory from .streams import STREAMS_MAP, FederationStream @@ -109,7 +109,7 @@ class ReplicationStreamer(object): self.is_looping = False self.pending_updates = False - reactor.addSystemEventTrigger("before", "shutdown", self.on_shutdown) + hs.get_reactor().addSystemEventTrigger("before", "shutdown", self.on_shutdown) def on_shutdown(self): # close all connections on shutdown diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index 6835a7bba2..8fb08dc526 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -16,6 +16,8 @@ from twisted.internet import defer +from six.moves import http_client + from synapse.api.constants import Membership from synapse.api.errors import AuthError, SynapseError, Codes, NotFoundError from synapse.types import UserID, create_requester @@ -169,16 +171,12 @@ class PurgeHistoryRestServlet(ClientV1RestServlet): yield self.store.find_first_stream_ordering_after_ts(ts) ) - room_event_after_stream_ordering = ( + r = ( yield self.store.get_room_event_after_stream_ordering( room_id, stream_ordering, ) ) - if room_event_after_stream_ordering: - token = yield self.store.get_topological_token_for_event( - room_event_after_stream_ordering, - ) - else: + if not r: logger.warn( "[purge] purging events not possible: No event found " "(received_ts %i => stream_ordering %i)", @@ -189,8 +187,10 @@ class PurgeHistoryRestServlet(ClientV1RestServlet): "there is no event to be purged", errcode=Codes.NOT_FOUND, ) + (stream, topo, _event_id) = r + token = "t%d-%d" % (topo, stream) logger.info( - "[purge] purging up to token %d (received_ts %i => " + "[purge] purging up to token %s (received_ts %i => " "stream_ordering %i)", token, ts, stream_ordering, ) @@ -249,6 +249,15 @@ class DeactivateAccountRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_POST(self, request, target_user_id): + body = parse_json_object_from_request(request, allow_empty_body=True) + erase = body.get("erase", False) + if not isinstance(erase, bool): + raise SynapseError( + http_client.BAD_REQUEST, + "Param 'erase' must be a boolean, if given", + Codes.BAD_JSON, + ) + UserID.from_string(target_user_id) requester = yield self.auth.get_user_by_req(request) is_admin = yield self.auth.is_server_admin(requester.user) @@ -256,7 +265,9 @@ class DeactivateAccountRestServlet(ClientV1RestServlet): if not is_admin: raise AuthError(403, "You are not a server admin") - yield self._deactivate_account_handler.deactivate_account(target_user_id) + yield self._deactivate_account_handler.deactivate_account( + target_user_id, erase, + ) defer.returnValue((200, {})) diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/v1/login.py index 34df5be4e9..88ca5184cd 100644 --- a/synapse/rest/client/v1/login.py +++ b/synapse/rest/client/v1/login.py @@ -23,7 +23,8 @@ from synapse.util.msisdn import phone_number_to_msisdn from .base import ClientV1RestServlet, client_path_patterns -import simplejson as json +from canonicaljson import json + import urllib from six.moves.urllib import parse as urlparse diff --git a/synapse/rest/client/v1/register.py b/synapse/rest/client/v1/register.py index 9b3022e0b0..c10320dedf 100644 --- a/synapse/rest/client/v1/register.py +++ b/synapse/rest/client/v1/register.py @@ -24,8 +24,6 @@ import synapse.util.stringutils as stringutils from synapse.http.servlet import parse_json_object_from_request from synapse.types import create_requester -from synapse.util.async import run_on_reactor - from hashlib import sha1 import hmac import logging @@ -272,7 +270,6 @@ class RegisterRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def _do_password(self, request, register_json, session): - yield run_on_reactor() if (self.hs.config.enable_registration_captcha and not session[LoginType.RECAPTCHA]): # captcha should've been done by this stage! @@ -333,8 +330,6 @@ class RegisterRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def _do_shared_secret(self, request, register_json, session): - yield run_on_reactor() - if not isinstance(register_json.get("mac", None), string_types): raise SynapseError(400, "Expected mac.") if not isinstance(register_json.get("user", None), string_types): @@ -423,8 +418,6 @@ class CreateUserRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def _do_create(self, requester, user_json): - yield run_on_reactor() - if "localpart" not in user_json: raise SynapseError(400, "Expected 'localpart' key.") diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 0b984987ed..e6ae5db79b 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -31,7 +31,7 @@ from synapse.http.servlet import ( from six.moves.urllib import parse as urlparse import logging -import simplejson as json +from canonicaljson import json logger = logging.getLogger(__name__) diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py index 063d4f82f8..f6ef4ccfec 100644 --- a/synapse/rest/client/v2_alpha/account.py +++ b/synapse/rest/client/v2_alpha/account.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Copyright 2015, 2016 OpenMarket Ltd # Copyright 2017 Vector Creations Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -15,6 +16,7 @@ # limitations under the License. import logging +from six.moves import http_client from twisted.internet import defer from synapse.api.auth import has_access_token @@ -24,7 +26,6 @@ from synapse.http.servlet import ( RestServlet, assert_params_in_request, parse_json_object_from_request, ) -from synapse.util.async import run_on_reactor from synapse.util.msisdn import phone_number_to_msisdn from synapse.util.threepids import check_3pid_allowed from ._base import client_v2_patterns, interactive_auth_handler @@ -187,13 +188,20 @@ class DeactivateAccountRestServlet(RestServlet): @defer.inlineCallbacks def on_POST(self, request): body = parse_json_object_from_request(request) + erase = body.get("erase", False) + if not isinstance(erase, bool): + raise SynapseError( + http_client.BAD_REQUEST, + "Param 'erase' must be a boolean, if given", + Codes.BAD_JSON, + ) requester = yield self.auth.get_user_by_req(request) # allow ASes to dectivate their own users if requester.app_service: yield self._deactivate_account_handler.deactivate_account( - requester.user.to_string() + requester.user.to_string(), erase, ) defer.returnValue((200, {})) @@ -201,7 +209,7 @@ class DeactivateAccountRestServlet(RestServlet): requester, body, self.hs.get_ip_from_request(request), ) yield self._deactivate_account_handler.deactivate_account( - requester.user.to_string(), + requester.user.to_string(), erase, ) defer.returnValue((200, {})) @@ -300,8 +308,6 @@ class ThreepidRestServlet(RestServlet): @defer.inlineCallbacks def on_GET(self, request): - yield run_on_reactor() - requester = yield self.auth.get_user_by_req(request) threepids = yield self.datastore.user_get_threepids( @@ -388,9 +394,16 @@ class ThreepidDeleteRestServlet(RestServlet): requester = yield self.auth.get_user_by_req(request) user_id = requester.user.to_string() - yield self.auth_handler.delete_threepid( - user_id, body['medium'], body['address'] - ) + try: + yield self.auth_handler.delete_threepid( + user_id, body['medium'], body['address'] + ) + except Exception: + # NB. This endpoint should succeed if there is nothing to + # delete, so it should only throw if something is wrong + # that we ought to care about. + logger.exception("Failed to remove threepid") + raise SynapseError(500, "Failed to remove threepid") defer.returnValue((200, {})) diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 97e4891f3f..5430183b9a 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -34,7 +34,6 @@ import hmac import re from string import capwords from hashlib import sha1 -from synapse.util.async import run_on_reactor from synapse.util.ratelimitutils import FederationRateLimiter from six import string_types @@ -193,8 +192,6 @@ class RegisterRestServlet(RestServlet): @interactive_auth_handler @defer.inlineCallbacks def on_POST(self, request): - yield run_on_reactor() - body = parse_json_object_from_request(request) kind = "user" diff --git a/synapse/rest/client/v2_alpha/sync.py b/synapse/rest/client/v2_alpha/sync.py index a291cffbf1..d2aa47b326 100644 --- a/synapse/rest/client/v2_alpha/sync.py +++ b/synapse/rest/client/v2_alpha/sync.py @@ -33,7 +33,7 @@ from ._base import set_timeline_upper_limit import itertools import logging -import simplejson as json +from canonicaljson import json logger = logging.getLogger(__name__) diff --git a/synapse/rest/media/v0/content_repository.py b/synapse/rest/media/v0/content_repository.py index 956bd5da75..e44d4276d2 100644 --- a/synapse/rest/media/v0/content_repository.py +++ b/synapse/rest/media/v0/content_repository.py @@ -22,8 +22,9 @@ from synapse.api.errors import ( from twisted.protocols.basic import FileSender from twisted.web import server, resource +from canonicaljson import json + import base64 -import simplejson as json import logging import os import re diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 2ac767d2dc..218ba7a083 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -58,6 +58,7 @@ UPDATE_RECENTLY_ACCESSED_TS = 60 * 1000 class MediaRepository(object): def __init__(self, hs): + self.hs = hs self.auth = hs.get_auth() self.client = MatrixFederationHttpClient(hs) self.clock = hs.get_clock() @@ -94,7 +95,7 @@ class MediaRepository(object): storage_providers.append(provider) self.media_storage = MediaStorage( - self.primary_base_path, self.filepaths, storage_providers, + self.hs, self.primary_base_path, self.filepaths, storage_providers, ) self.clock.looping_call( diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py index d23fe10b07..d6b8ebbedb 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py @@ -37,13 +37,15 @@ class MediaStorage(object): """Responsible for storing/fetching files from local sources. Args: + hs (synapse.server.Homeserver) local_media_directory (str): Base path where we store media on disk filepaths (MediaFilePaths) storage_providers ([StorageProvider]): List of StorageProvider that are used to fetch and store files. """ - def __init__(self, local_media_directory, filepaths, storage_providers): + def __init__(self, hs, local_media_directory, filepaths, storage_providers): + self.hs = hs self.local_media_directory = local_media_directory self.filepaths = filepaths self.storage_providers = storage_providers @@ -175,7 +177,8 @@ class MediaStorage(object): res = yield provider.fetch(path, file_info) if res: with res: - consumer = BackgroundFileConsumer(open(local_path, "w")) + consumer = BackgroundFileConsumer( + open(local_path, "w"), self.hs.get_reactor()) yield res.write_to_consumer(consumer) yield consumer.wait() defer.returnValue(local_path) diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 565cef2b8d..adca490640 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -23,7 +23,8 @@ import re import shutil import sys import traceback -import simplejson as json + +from canonicaljson import json from six.moves import urllib_parse as urlparse from six import string_types diff --git a/synapse/server.py b/synapse/server.py index 58dbf78437..c29c19289a 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -40,7 +40,7 @@ from synapse.federation.transport.client import TransportLayerClient from synapse.federation.transaction_queue import TransactionQueue from synapse.handlers import Handlers from synapse.handlers.appservice import ApplicationServicesHandler -from synapse.handlers.auth import AuthHandler, MacaroonGeneartor +from synapse.handlers.auth import AuthHandler, MacaroonGenerator from synapse.handlers.deactivate_account import DeactivateAccountHandler from synapse.handlers.devicemessage import DeviceMessageHandler from synapse.handlers.device import DeviceHandler @@ -165,15 +165,19 @@ class HomeServer(object): 'server_notices_sender', ] - def __init__(self, hostname, **kwargs): + def __init__(self, hostname, reactor=None, **kwargs): """ Args: hostname : The hostname for the server. """ + if not reactor: + from twisted.internet import reactor + + self._reactor = reactor self.hostname = hostname self._building = {} - self.clock = Clock() + self.clock = Clock(reactor) self.distributor = Distributor() self.ratelimiter = Ratelimiter() @@ -186,6 +190,12 @@ class HomeServer(object): self.datastore = DataStore(self.get_db_conn(), self) logger.info("Finished setting up.") + def get_reactor(self): + """ + Fetch the Twisted reactor in use by this HomeServer. + """ + return self._reactor + def get_ip_from_request(self, request): # X-Forwarded-For is handled by our custom request type. return request.getClientIP() @@ -261,7 +271,7 @@ class HomeServer(object): return AuthHandler(self) def build_macaroon_generator(self): - return MacaroonGeneartor(self) + return MacaroonGenerator(self) def build_device_handler(self): return DeviceHandler(self) @@ -328,6 +338,7 @@ class HomeServer(object): return adbapi.ConnectionPool( name, + cp_reactor=self.get_reactor(), **self.db_config.get("args", {}) ) diff --git a/synapse/state.py b/synapse/state.py index 216418f58d..8098db94b4 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -694,10 +694,10 @@ def _create_auth_events_from_maps(unconflicted_state, conflicted_state, state_ma return auth_events -def _resolve_with_state(unconflicted_state_ids, conflicted_state_ds, auth_event_ids, +def _resolve_with_state(unconflicted_state_ids, conflicted_state_ids, auth_event_ids, state_map): conflicted_state = {} - for key, event_ids in iteritems(conflicted_state_ds): + for key, event_ids in iteritems(conflicted_state_ids): events = [state_map[ev_id] for ev_id in event_ids if ev_id in state_map] if len(events) > 1: conflicted_state[key] = events diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 979fa22438..e843b702b9 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -20,6 +20,7 @@ import time import logging from synapse.storage.devices import DeviceStore +from synapse.storage.user_erasure_store import UserErasureStore from .appservice import ( ApplicationServiceStore, ApplicationServiceTransactionStore ) @@ -88,6 +89,7 @@ class DataStore(RoomMemberStore, RoomStore, DeviceInboxStore, UserDirectoryStore, GroupServerStore, + UserErasureStore, ): def __init__(self, db_conn, hs): diff --git a/synapse/storage/account_data.py b/synapse/storage/account_data.py index f83ff0454a..7034a61399 100644 --- a/synapse/storage/account_data.py +++ b/synapse/storage/account_data.py @@ -20,10 +20,11 @@ from synapse.storage._base import SQLBaseStore from synapse.storage.util.id_generators import StreamIdGenerator from synapse.util.caches.stream_change_cache import StreamChangeCache -from synapse.util.caches.descriptors import cached, cachedList, cachedInlineCallbacks +from synapse.util.caches.descriptors import cached, cachedInlineCallbacks + +from canonicaljson import json import abc -import simplejson as json import logging logger = logging.getLogger(__name__) @@ -114,25 +115,6 @@ class AccountDataWorkerStore(SQLBaseStore): else: defer.returnValue(None) - @cachedList(cached_method_name="get_global_account_data_by_type_for_user", - num_args=2, list_name="user_ids", inlineCallbacks=True) - def get_global_account_data_by_type_for_users(self, data_type, user_ids): - rows = yield self._simple_select_many_batch( - table="account_data", - column="user_id", - iterable=user_ids, - keyvalues={ - "account_data_type": data_type, - }, - retcols=("user_id", "content",), - desc="get_global_account_data_by_type_for_users", - ) - - defer.returnValue({ - row["user_id"]: json.loads(row["content"]) if row["content"] else None - for row in rows - }) - @cached(num_args=2) def get_account_data_for_room(self, user_id, room_id): """Get all the client account_data for a user for a room. diff --git a/synapse/storage/appservice.py b/synapse/storage/appservice.py index 12ea8a158c..4d32d0bdf6 100644 --- a/synapse/storage/appservice.py +++ b/synapse/storage/appservice.py @@ -15,8 +15,8 @@ # limitations under the License. import logging import re -import simplejson as json from twisted.internet import defer +from canonicaljson import json from synapse.appservice import AppServiceTransaction from synapse.config.appservice import load_appservices diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index 8af325a9f5..af18964510 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -12,14 +12,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import synapse.util.async from ._base import SQLBaseStore from . import engines from twisted.internet import defer -import simplejson as json +from canonicaljson import json + import logging logger = logging.getLogger(__name__) @@ -92,7 +92,7 @@ class BackgroundUpdateStore(SQLBaseStore): logger.info("Starting background schema updates") while True: - yield synapse.util.async.sleep( + yield self.hs.get_clock().sleep( self.BACKGROUND_UPDATE_INTERVAL_MS / 1000.) try: diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index ce338514e8..968d2fed22 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -15,7 +15,7 @@ import logging -from twisted.internet import defer, reactor +from twisted.internet import defer from ._base import Cache from . import background_updates @@ -70,7 +70,9 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): self._client_ip_looper = self._clock.looping_call( self._update_client_ips_batch, 5 * 1000 ) - reactor.addSystemEventTrigger("before", "shutdown", self._update_client_ips_batch) + self.hs.get_reactor().addSystemEventTrigger( + "before", "shutdown", self._update_client_ips_batch + ) def insert_client_ip(self, user_id, access_token, ip, user_agent, device_id, now=None): diff --git a/synapse/storage/deviceinbox.py b/synapse/storage/deviceinbox.py index a879e5bfc1..38addbf9c0 100644 --- a/synapse/storage/deviceinbox.py +++ b/synapse/storage/deviceinbox.py @@ -14,7 +14,8 @@ # limitations under the License. import logging -import simplejson + +from canonicaljson import json from twisted.internet import defer @@ -85,7 +86,7 @@ class DeviceInboxStore(BackgroundUpdateStore): ) rows = [] for destination, edu in remote_messages_by_destination.items(): - edu_json = simplejson.dumps(edu) + edu_json = json.dumps(edu) rows.append((destination, stream_id, now_ms, edu_json)) txn.executemany(sql, rows) @@ -177,7 +178,7 @@ class DeviceInboxStore(BackgroundUpdateStore): " WHERE user_id = ?" ) txn.execute(sql, (user_id,)) - message_json = simplejson.dumps(messages_by_device["*"]) + message_json = json.dumps(messages_by_device["*"]) for row in txn: # Add the message for all devices for this user on this # server. @@ -199,7 +200,7 @@ class DeviceInboxStore(BackgroundUpdateStore): # Only insert into the local inbox if the device exists on # this server device = row[0] - message_json = simplejson.dumps(messages_by_device[device]) + message_json = json.dumps(messages_by_device[device]) messages_json_for_user[device] = message_json if messages_json_for_user: @@ -253,7 +254,7 @@ class DeviceInboxStore(BackgroundUpdateStore): messages = [] for row in txn: stream_pos = row[0] - messages.append(simplejson.loads(row[1])) + messages.append(json.loads(row[1])) if len(messages) < limit: stream_pos = current_stream_id return (messages, stream_pos) @@ -389,7 +390,7 @@ class DeviceInboxStore(BackgroundUpdateStore): messages = [] for row in txn: stream_pos = row[0] - messages.append(simplejson.loads(row[1])) + messages.append(json.loads(row[1])) if len(messages) < limit: stream_pos = current_stream_id return (messages, stream_pos) diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index d149d8392e..2ed9ada783 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -import simplejson as json from twisted.internet import defer @@ -21,6 +20,8 @@ from synapse.api.errors import StoreError from ._base import SQLBaseStore, Cache from synapse.util.caches.descriptors import cached, cachedList, cachedInlineCallbacks +from canonicaljson import json + from six import itervalues, iteritems logger = logging.getLogger(__name__) diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py index b146487943..181047c8b7 100644 --- a/synapse/storage/end_to_end_keys.py +++ b/synapse/storage/end_to_end_keys.py @@ -16,8 +16,7 @@ from twisted.internet import defer from synapse.util.caches.descriptors import cached -from canonicaljson import encode_canonical_json -import simplejson as json +from canonicaljson import encode_canonical_json, json from ._base import SQLBaseStore diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index d0350ee5fe..05cb3f61ce 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -16,11 +16,11 @@ from synapse.storage._base import SQLBaseStore, LoggingTransaction from twisted.internet import defer -from synapse.util.async import sleep from synapse.util.caches.descriptors import cachedInlineCallbacks import logging -import simplejson as json + +from canonicaljson import json from six import iteritems @@ -84,6 +84,8 @@ class EventPushActionsWorkerStore(SQLBaseStore): self.find_stream_orderings_looping_call = self._clock.looping_call( self._find_stream_orderings_for_times, 10 * 60 * 1000 ) + self._rotate_delay = 3 + self._rotate_count = 10000 @cachedInlineCallbacks(num_args=3, tree=True, max_entries=5000) def get_unread_event_push_actions_by_room_for_user( @@ -800,7 +802,7 @@ class EventPushActionsStore(EventPushActionsWorkerStore): ) if caught_up: break - yield sleep(5) + yield self.hs.get_clock().sleep(self._rotate_delay) finally: self._doing_notif_rotation = False @@ -821,8 +823,8 @@ class EventPushActionsStore(EventPushActionsWorkerStore): txn.execute(""" SELECT stream_ordering FROM event_push_actions WHERE stream_ordering > ? - ORDER BY stream_ordering ASC LIMIT 1 OFFSET 50000 - """, (old_rotate_stream_ordering,)) + ORDER BY stream_ordering ASC LIMIT 1 OFFSET ? + """, (old_rotate_stream_ordering, self._rotate_count)) stream_row = txn.fetchone() if stream_row: offset_stream_ordering, = stream_row diff --git a/synapse/storage/events.py b/synapse/storage/events.py index cb1082e864..a54abb9edd 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -19,7 +19,8 @@ from functools import wraps import itertools import logging -import simplejson as json +from canonicaljson import json + from twisted.internet import defer from synapse.storage.events_worker import EventsWorkerStore @@ -800,7 +801,8 @@ class EventsStore(EventsWorkerStore): ] ) - self._curr_state_delta_stream_cache.entity_has_changed( + txn.call_after( + self._curr_state_delta_stream_cache.entity_has_changed, room_id, max_stream_order, ) @@ -1044,7 +1046,6 @@ class EventsStore(EventsWorkerStore): "event_edge_hashes", "event_edges", "event_forward_extremities", - "event_push_actions", "event_reference_hashes", "event_search", "event_signatures", @@ -1064,6 +1065,14 @@ class EventsStore(EventsWorkerStore): [(ev.event_id,) for ev, _ in events_and_contexts] ) + for table in ( + "event_push_actions", + ): + txn.executemany( + "DELETE FROM %s WHERE room_id = ? AND event_id = ?" % (table,), + [(ev.event_id,) for ev, _ in events_and_contexts] + ) + def _store_event_txn(self, txn, events_and_contexts): """Insert new events into the event and event_json tables diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index 32d9d00ffb..896225aab9 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -14,13 +14,14 @@ # limitations under the License. from ._base import SQLBaseStore -from twisted.internet import defer, reactor +from twisted.internet import defer from synapse.events import FrozenEvent from synapse.events.utils import prune_event from synapse.util.logcontext import ( PreserveLoggingContext, make_deferred_yieldable, run_in_background, + LoggingContext, ) from synapse.util.metrics import Measure from synapse.api.errors import SynapseError @@ -28,7 +29,8 @@ from synapse.api.errors import SynapseError from collections import namedtuple import logging -import simplejson as json + +from canonicaljson import json # these are only included to make the type annotations work from synapse.events import EventBase # noqa: F401 @@ -145,6 +147,9 @@ class EventsWorkerStore(SQLBaseStore): missing_events_ids = [e for e in event_ids if e not in event_entry_map] if missing_events_ids: + log_ctx = LoggingContext.current_context() + log_ctx.record_event_fetch(len(missing_events_ids)) + missing_events = yield self._enqueue_events( missing_events_ids, check_redacted=check_redacted, @@ -265,7 +270,7 @@ class EventsWorkerStore(SQLBaseStore): except Exception: logger.exception("Failed to callback") with PreserveLoggingContext(): - reactor.callFromThread(fire, event_list, row_dict) + self.hs.get_reactor().callFromThread(fire, event_list, row_dict) except Exception as e: logger.exception("do_fetch") @@ -278,7 +283,7 @@ class EventsWorkerStore(SQLBaseStore): if event_list: with PreserveLoggingContext(): - reactor.callFromThread(fire, event_list) + self.hs.get_reactor().callFromThread(fire, event_list) @defer.inlineCallbacks def _enqueue_events(self, events, check_redacted=True, allow_rejected=False): diff --git a/synapse/storage/filtering.py b/synapse/storage/filtering.py index 2e2763126d..eae6027cee 100644 --- a/synapse/storage/filtering.py +++ b/synapse/storage/filtering.py @@ -19,8 +19,7 @@ from ._base import SQLBaseStore from synapse.api.errors import SynapseError, Codes from synapse.util.caches.descriptors import cachedInlineCallbacks -from canonicaljson import encode_canonical_json -import simplejson as json +from canonicaljson import encode_canonical_json, json class FilteringStore(SQLBaseStore): diff --git a/synapse/storage/group_server.py b/synapse/storage/group_server.py index da05ccb027..b77402d295 100644 --- a/synapse/storage/group_server.py +++ b/synapse/storage/group_server.py @@ -20,7 +20,7 @@ from synapse.api.errors import SynapseError from ._base import SQLBaseStore -import simplejson as json +from canonicaljson import json # The category ID for the "default" category. We don't store as null in the diff --git a/synapse/storage/push_rule.py b/synapse/storage/push_rule.py index 04a0b59a39..9e52e992b3 100644 --- a/synapse/storage/push_rule.py +++ b/synapse/storage/push_rule.py @@ -25,9 +25,10 @@ from synapse.push.baserules import list_with_base_rules from synapse.api.constants import EventTypes from twisted.internet import defer +from canonicaljson import json + import abc import logging -import simplejson as json logger = logging.getLogger(__name__) diff --git a/synapse/storage/pusher.py b/synapse/storage/pusher.py index 307660b99a..c6def861cf 100644 --- a/synapse/storage/pusher.py +++ b/synapse/storage/pusher.py @@ -17,12 +17,11 @@ from ._base import SQLBaseStore from twisted.internet import defer -from canonicaljson import encode_canonical_json +from canonicaljson import encode_canonical_json, json from synapse.util.caches.descriptors import cachedInlineCallbacks, cachedList import logging -import simplejson as json import types logger = logging.getLogger(__name__) diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index c93c228f6e..f230a3bab7 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -21,9 +21,10 @@ from synapse.util.caches.stream_change_cache import StreamChangeCache from twisted.internet import defer +from canonicaljson import json + import abc import logging -import simplejson as json logger = logging.getLogger(__name__) diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index d10cb3d165..361afbbf73 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -448,15 +448,6 @@ class RegistrationStore(RegistrationWorkerStore, defer.returnValue(ret['user_id']) defer.returnValue(None) - def user_delete_threepids(self, user_id): - return self._simple_delete( - "user_threepids", - keyvalues={ - "user_id": user_id, - }, - desc="user_delete_threepids", - ) - def user_delete_threepid(self, user_id, medium, address): return self._simple_delete( "user_threepids", @@ -620,7 +611,9 @@ class RegistrationStore(RegistrationWorkerStore, Removes the given user to the table of users who need to be parted from all the rooms they're in, effectively marking that user as fully deactivated. """ - return self._simple_delete_one( + # XXX: This should be simple_delete_one but we failed to put a unique index on + # the table, so somehow duplicate entries have ended up in it. + return self._simple_delete( "users_pending_deactivation", keyvalues={ "user_id": user_id, diff --git a/synapse/storage/room.py b/synapse/storage/room.py index ea6a189185..ca0eb187e5 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -20,9 +20,10 @@ from synapse.storage._base import SQLBaseStore from synapse.storage.search import SearchStore from synapse.util.caches.descriptors import cached, cachedInlineCallbacks +from canonicaljson import json + import collections import logging -import simplejson as json import re logger = logging.getLogger(__name__) diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 7bfc3d91b5..8fc9549a75 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -28,7 +28,7 @@ from synapse.api.constants import Membership, EventTypes from synapse.types import get_domain_from_id import logging -import simplejson as json +from canonicaljson import json from six import itervalues, iteritems @@ -455,7 +455,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): defer.returnValue(joined_hosts) - @cached(max_entries=10000, iterable=True) + @cached(max_entries=10000) def _get_joined_hosts_cache(self, room_id): return _JoinedHostsCache(self, room_id) @@ -578,7 +578,6 @@ class RoomMemberStore(RoomMemberWorkerStore): ) txn.execute(sql, (user_id, room_id)) - txn.call_after(self.was_forgotten_at.invalidate_all) txn.call_after(self.did_forget.invalidate, (user_id, room_id)) self._invalidate_cache_and_stream( txn, self.who_forgot_in_room, (room_id,) @@ -609,31 +608,6 @@ class RoomMemberStore(RoomMemberWorkerStore): count = yield self.runInteraction("did_forget_membership", f) defer.returnValue(count == 0) - @cachedInlineCallbacks(num_args=3) - def was_forgotten_at(self, user_id, room_id, event_id): - """Returns whether user_id has elected to discard history for room_id at - event_id. - - event_id must be a membership event.""" - def f(txn): - sql = ( - "SELECT" - " forgotten" - " FROM" - " room_memberships" - " WHERE" - " user_id = ?" - " AND" - " room_id = ?" - " AND" - " event_id = ?" - ) - txn.execute(sql, (user_id, room_id, event_id)) - rows = txn.fetchall() - return rows[0][0] - forgot = yield self.runInteraction("did_forget_membership_at", f) - defer.returnValue(forgot == 1) - @defer.inlineCallbacks def _background_add_membership_profile(self, progress, batch_size): target_min_stream_id = progress.get( diff --git a/synapse/storage/schema/delta/50/erasure_store.sql b/synapse/storage/schema/delta/50/erasure_store.sql new file mode 100644 index 0000000000..5d8641a9ab --- /dev/null +++ b/synapse/storage/schema/delta/50/erasure_store.sql @@ -0,0 +1,21 @@ +/* Copyright 2018 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- a table of users who have requested that their details be erased +CREATE TABLE erased_users ( + user_id TEXT NOT NULL +); + +CREATE UNIQUE INDEX erased_users_user ON erased_users(user_id); diff --git a/synapse/storage/search.py b/synapse/storage/search.py index f0fa5d7631..9b77c45318 100644 --- a/synapse/storage/search.py +++ b/synapse/storage/search.py @@ -16,7 +16,7 @@ from collections import namedtuple import logging import re -import simplejson as json +from canonicaljson import json from six import string_types diff --git a/synapse/storage/state.py b/synapse/storage/state.py index bdee14a8eb..cd9821c270 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -23,7 +23,7 @@ from twisted.internet import defer from synapse.storage.background_updates import BackgroundUpdateStore from synapse.storage.engines import PostgresEngine -from synapse.util.caches import intern_string, CACHE_SIZE_FACTOR +from synapse.util.caches import intern_string, get_cache_factor_for from synapse.util.caches.descriptors import cached, cachedList from synapse.util.caches.dictionary_cache import DictionaryCache from synapse.util.stringutils import to_ascii @@ -57,7 +57,7 @@ class StateGroupWorkerStore(SQLBaseStore): super(StateGroupWorkerStore, self).__init__(db_conn, hs) self._state_group_cache = DictionaryCache( - "*stateGroupCache*", 100000 * CACHE_SIZE_FACTOR + "*stateGroupCache*", 500000 * get_cache_factor_for("stateGroupCache") ) @cached(max_entries=100000, iterable=True) @@ -272,7 +272,7 @@ class StateGroupWorkerStore(SQLBaseStore): for typ in types: if typ[1] is None: where_clauses.append("(type = ?)") - where_args.extend(typ[0]) + where_args.append(typ[0]) wildcard_types = True else: where_clauses.append("(type = ? AND state_key = ?)") @@ -526,10 +526,23 @@ class StateGroupWorkerStore(SQLBaseStore): @defer.inlineCallbacks def _get_state_for_groups(self, groups, types=None): - """Given list of groups returns dict of group -> list of state events - with matching types. `types` is a list of `(type, state_key)`, where - a `state_key` of None matches all state_keys. If `types` is None then - all events are returned. + """Gets the state at each of a list of state groups, optionally + filtering by type/state_key + + Args: + groups (iterable[int]): list of state groups for which we want + to get the state. + types (None|iterable[(str, None|str)]): + indicates the state type/keys required. If None, the whole + state is fetched and returned. + + Otherwise, each entry should be a `(type, state_key)` tuple to + include in the response. A `state_key` of None is a wildcard + meaning that we require all state with that type. + + Returns: + Deferred[dict[int, dict[(type, state_key), EventBase]]] + a dictionary mapping from state group to state dictionary. """ if types: types = frozenset(types) @@ -538,7 +551,7 @@ class StateGroupWorkerStore(SQLBaseStore): if types is not None: for group in set(groups): state_dict_ids, _, got_all = self._get_some_state_from_cache( - group, types + group, types, ) results[group] = state_dict_ids @@ -559,26 +572,40 @@ class StateGroupWorkerStore(SQLBaseStore): # Okay, so we have some missing_types, lets fetch them. cache_seq_num = self._state_group_cache.sequence + # the DictionaryCache knows if it has *all* the state, but + # does not know if it has all of the keys of a particular type, + # which makes wildcard lookups expensive unless we have a complete + # cache. Hence, if we are doing a wildcard lookup, populate the + # cache fully so that we can do an efficient lookup next time. + + if types and any(k is None for (t, k) in types): + types_to_fetch = None + else: + types_to_fetch = types + group_to_state_dict = yield self._get_state_groups_from_groups( - missing_groups, types + missing_groups, types_to_fetch, ) - # Now we want to update the cache with all the things we fetched - # from the database. for group, group_state_dict in iteritems(group_to_state_dict): state_dict = results[group] - state_dict.update( - ((intern_string(k[0]), intern_string(k[1])), to_ascii(v)) - for k, v in iteritems(group_state_dict) - ) + # update the result, filtering by `types`. + if types: + for k, v in iteritems(group_state_dict): + (typ, _) = k + if k in types or (typ, None) in types: + state_dict[k] = v + else: + state_dict.update(group_state_dict) + # update the cache with all the things we fetched from the + # database. self._state_group_cache.update( cache_seq_num, key=group, - value=state_dict, - full=(types is None), - known_absent=types, + value=group_state_dict, + fetched_keys=types_to_fetch, ) defer.returnValue(results) @@ -685,7 +712,6 @@ class StateGroupWorkerStore(SQLBaseStore): self._state_group_cache.sequence, key=state_group, value=dict(current_state_ids), - full=True, ) return state_group diff --git a/synapse/storage/tags.py b/synapse/storage/tags.py index 6671d3cfca..04d123ed95 100644 --- a/synapse/storage/tags.py +++ b/synapse/storage/tags.py @@ -19,7 +19,8 @@ from synapse.storage.account_data import AccountDataWorkerStore from synapse.util.caches.descriptors import cached from twisted.internet import defer -import simplejson as json +from canonicaljson import json + import logging from six.moves import range diff --git a/synapse/storage/transactions.py b/synapse/storage/transactions.py index e485d19b84..acbc03446e 100644 --- a/synapse/storage/transactions.py +++ b/synapse/storage/transactions.py @@ -19,12 +19,11 @@ from synapse.util.caches.descriptors import cached from twisted.internet import defer import six -from canonicaljson import encode_canonical_json +from canonicaljson import encode_canonical_json, json from collections import namedtuple import logging -import simplejson as json # py2 sqlite has buffer hardcoded as only binary type, so we must use it, # despite being deprecated and removed in favor of memoryview diff --git a/synapse/storage/user_erasure_store.py b/synapse/storage/user_erasure_store.py new file mode 100644 index 0000000000..47bfc01e84 --- /dev/null +++ b/synapse/storage/user_erasure_store.py @@ -0,0 +1,103 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import operator + +from twisted.internet import defer + +from synapse.storage._base import SQLBaseStore +from synapse.util.caches.descriptors import cachedList, cached + + +class UserErasureWorkerStore(SQLBaseStore): + @cached() + def is_user_erased(self, user_id): + """ + Check if the given user id has requested erasure + + Args: + user_id (str): full user id to check + + Returns: + Deferred[bool]: True if the user has requested erasure + """ + return self._simple_select_onecol( + table="erased_users", + keyvalues={"user_id": user_id}, + retcol="1", + desc="is_user_erased", + ).addCallback(operator.truth) + + @cachedList( + cached_method_name="is_user_erased", + list_name="user_ids", + inlineCallbacks=True, + ) + def are_users_erased(self, user_ids): + """ + Checks which users in a list have requested erasure + + Args: + user_ids (iterable[str]): full user id to check + + Returns: + Deferred[dict[str, bool]]: + for each user, whether the user has requested erasure. + """ + # this serves the dual purpose of (a) making sure we can do len and + # iterate it multiple times, and (b) avoiding duplicates. + user_ids = tuple(set(user_ids)) + + def _get_erased_users(txn): + txn.execute( + "SELECT user_id FROM erased_users WHERE user_id IN (%s)" % ( + ",".join("?" * len(user_ids)) + ), + user_ids, + ) + return set(r[0] for r in txn) + + erased_users = yield self.runInteraction( + "are_users_erased", _get_erased_users, + ) + res = dict((u, u in erased_users) for u in user_ids) + defer.returnValue(res) + + +class UserErasureStore(UserErasureWorkerStore): + def mark_user_erased(self, user_id): + """Indicate that user_id wishes their message history to be erased. + + Args: + user_id (str): full user_id to be erased + """ + def f(txn): + # first check if they are already in the list + txn.execute( + "SELECT 1 FROM erased_users WHERE user_id = ?", + (user_id, ) + ) + if txn.fetchone(): + return + + # they are not already there: do the insert. + txn.execute( + "INSERT INTO erased_users (user_id) VALUES (?)", + (user_id, ) + ) + + self._invalidate_cache_and_stream( + txn, self.is_user_erased, (user_id,) + ) + return self.runInteraction("mark_user_erased", f) diff --git a/synapse/util/__init__.py b/synapse/util/__init__.py index fc11e26623..e9886ef299 100644 --- a/synapse/util/__init__.py +++ b/synapse/util/__init__.py @@ -13,15 +13,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -from synapse.util.logcontext import PreserveLoggingContext - -from twisted.internet import defer, reactor, task - -import time import logging - from itertools import islice +import attr +from twisted.internet import defer, task + +from synapse.util.logcontext import PreserveLoggingContext + logger = logging.getLogger(__name__) @@ -31,16 +30,27 @@ def unwrapFirstError(failure): return failure.value.subFailure +@attr.s class Clock(object): - """A small utility that obtains current time-of-day so that time may be - mocked during unit-tests. - - TODO(paul): Also move the sleep() functionality into it """ + A Clock wraps a Twisted reactor and provides utilities on top of it. + + Args: + reactor: The Twisted reactor to use. + """ + _reactor = attr.ib() + + @defer.inlineCallbacks + def sleep(self, seconds): + d = defer.Deferred() + with PreserveLoggingContext(): + self._reactor.callLater(seconds, d.callback, seconds) + res = yield d + defer.returnValue(res) def time(self): """Returns the current system time in seconds since epoch.""" - return time.time() + return self._reactor.seconds() def time_msec(self): """Returns the current system time in miliseconds since epoch.""" @@ -56,6 +66,7 @@ class Clock(object): msec(float): How long to wait between calls in milliseconds. """ call = task.LoopingCall(f) + call.clock = self._reactor call.start(msec / 1000.0, now=False) return call @@ -73,7 +84,7 @@ class Clock(object): callback(*args, **kwargs) with PreserveLoggingContext(): - return reactor.callLater(delay, wrapped_callback, *args, **kwargs) + return self._reactor.callLater(delay, wrapped_callback, *args, **kwargs) def cancel_call_later(self, timer, ignore_errs=False): try: diff --git a/synapse/util/async.py b/synapse/util/async.py index 9dd4e6b5bc..1668df4ce6 100644 --- a/synapse/util/async.py +++ b/synapse/util/async.py @@ -13,15 +13,14 @@ # See the License for the specific language governing permissions and # limitations under the License. - -from twisted.internet import defer, reactor +from twisted.internet import defer from twisted.internet.defer import CancelledError from twisted.python import failure from .logcontext import ( PreserveLoggingContext, make_deferred_yieldable, run_in_background ) -from synapse.util import logcontext, unwrapFirstError +from synapse.util import logcontext, unwrapFirstError, Clock from contextlib import contextmanager @@ -32,22 +31,6 @@ from six.moves import range logger = logging.getLogger(__name__) -@defer.inlineCallbacks -def sleep(seconds): - d = defer.Deferred() - with PreserveLoggingContext(): - reactor.callLater(seconds, d.callback, seconds) - res = yield d - defer.returnValue(res) - - -def run_on_reactor(): - """ This will cause the rest of the function to be invoked upon the next - iteration of the main loop - """ - return sleep(0) - - class ObservableDeferred(object): """Wraps a deferred object so that we can add observer deferreds. These observer deferreds do not affect the callback chain of the original @@ -180,13 +163,18 @@ class Linearizer(object): # do some work. """ - def __init__(self, name=None): + def __init__(self, name=None, clock=None): if name is None: self.name = id(self) else: self.name = name self.key_to_defer = {} + if not clock: + from twisted.internet import reactor + clock = Clock(reactor) + self._clock = clock + @defer.inlineCallbacks def queue(self, key): # If there is already a deferred in the queue, we pull it out so that @@ -227,7 +215,7 @@ class Linearizer(object): # the context manager, but it needs to happen while we hold the # lock, and the context manager's exit code must be synchronous, # so actually this is the only sensible place. - yield run_on_reactor() + yield self._clock.sleep(0) else: logger.info("Acquired uncontended linearizer lock %r for key %r", @@ -404,7 +392,7 @@ class DeferredTimeoutError(Exception): """ -def add_timeout_to_deferred(deferred, timeout, on_timeout_cancel=None): +def add_timeout_to_deferred(deferred, timeout, reactor, on_timeout_cancel=None): """ Add a timeout to a deferred by scheduling it to be cancelled after timeout seconds. @@ -419,6 +407,7 @@ def add_timeout_to_deferred(deferred, timeout, on_timeout_cancel=None): Args: deferred (defer.Deferred): deferred to be timed out timeout (Number): seconds to time out after + reactor (twisted.internet.reactor): the Twisted reactor to use on_timeout_cancel (callable): A callable which is called immediately after the deferred times out, and not if this deferred is diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py index 183faf75a1..900575eb3c 100644 --- a/synapse/util/caches/__init__.py +++ b/synapse/util/caches/__init__.py @@ -22,6 +22,16 @@ import six CACHE_SIZE_FACTOR = float(os.environ.get("SYNAPSE_CACHE_FACTOR", 0.5)) + +def get_cache_factor_for(cache_name): + env_var = "SYNAPSE_CACHE_FACTOR_" + cache_name.upper() + factor = os.environ.get(env_var) + if factor: + return float(factor) + + return CACHE_SIZE_FACTOR + + caches_by_name = {} collectors_by_name = {} diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py index fc1874b65b..65a1042de1 100644 --- a/synapse/util/caches/descriptors.py +++ b/synapse/util/caches/descriptors.py @@ -17,7 +17,7 @@ import logging from synapse.util.async import ObservableDeferred from synapse.util import unwrapFirstError, logcontext -from synapse.util.caches import CACHE_SIZE_FACTOR +from synapse.util.caches import get_cache_factor_for from synapse.util.caches.lrucache import LruCache from synapse.util.caches.treecache import TreeCache, iterate_tree_cache_entry from synapse.util.stringutils import to_ascii @@ -313,7 +313,7 @@ class CacheDescriptor(_CacheDescriptorBase): orig, num_args=num_args, inlineCallbacks=inlineCallbacks, cache_context=cache_context) - max_entries = int(max_entries * CACHE_SIZE_FACTOR) + max_entries = int(max_entries * get_cache_factor_for(orig.__name__)) self.max_entries = max_entries self.tree = tree diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py index bdc21e348f..95793d466d 100644 --- a/synapse/util/caches/dictionary_cache.py +++ b/synapse/util/caches/dictionary_cache.py @@ -107,29 +107,28 @@ class DictionaryCache(object): self.sequence += 1 self.cache.clear() - def update(self, sequence, key, value, full=False, known_absent=None): + def update(self, sequence, key, value, fetched_keys=None): """Updates the entry in the cache Args: sequence - key - value (dict): The value to update the cache with. - full (bool): Whether the given value is the full dict, or just a - partial subset there of. If not full then any existing entries - for the key will be updated. - known_absent (set): Set of keys that we know don't exist in the full - dict. + key (K) + value (dict[X,Y]): The value to update the cache with. + fetched_keys (None|set[X]): All of the dictionary keys which were + fetched from the database. + + If None, this is the complete value for key K. Otherwise, it + is used to infer a list of keys which we know don't exist in + the full dict. """ self.check_thread() if self.sequence == sequence: # Only update the cache if the caches sequence number matches the # number that the cache had before the SELECT was started (SYN-369) - if known_absent is None: - known_absent = set() - if full: - self._insert(key, value, known_absent) + if fetched_keys is None: + self._insert(key, value, set()) else: - self._update_or_insert(key, value, known_absent) + self._update_or_insert(key, value, fetched_keys) def _update_or_insert(self, key, value, known_absent): # We pop and reinsert as we need to tell the cache the size may have diff --git a/synapse/util/caches/stream_change_cache.py b/synapse/util/caches/stream_change_cache.py index a7fe0397fa..0fb8620001 100644 --- a/synapse/util/caches/stream_change_cache.py +++ b/synapse/util/caches/stream_change_cache.py @@ -13,10 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -from synapse.util.caches import register_cache, CACHE_SIZE_FACTOR +from synapse.util import caches -from blist import sorteddict +from sortedcontainers import SortedDict import logging @@ -32,16 +32,18 @@ class StreamChangeCache(object): entities that may have changed since that position. If position key is too old then the cache will simply return all given entities. """ - def __init__(self, name, current_stream_pos, max_size=10000, prefilled_cache={}): - self._max_size = int(max_size * CACHE_SIZE_FACTOR) + + def __init__(self, name, current_stream_pos, max_size=10000, prefilled_cache=None): + self._max_size = int(max_size * caches.CACHE_SIZE_FACTOR) self._entity_to_key = {} - self._cache = sorteddict() + self._cache = SortedDict() self._earliest_known_stream_pos = current_stream_pos self.name = name - self.metrics = register_cache("cache", self.name, self._cache) + self.metrics = caches.register_cache("cache", self.name, self._cache) - for entity, stream_pos in prefilled_cache.items(): - self.entity_has_changed(entity, stream_pos) + if prefilled_cache: + for entity, stream_pos in prefilled_cache.items(): + self.entity_has_changed(entity, stream_pos) def has_entity_changed(self, entity, stream_pos): """Returns True if the entity may have been updated since stream_pos @@ -65,22 +67,26 @@ class StreamChangeCache(object): return False def get_entities_changed(self, entities, stream_pos): - """Returns subset of entities that have had new things since the - given position. If the position is too old it will just return the given list. + """ + Returns subset of entities that have had new things since the given + position. Entities unknown to the cache will be returned. If the + position is too old it will just return the given list. """ assert type(stream_pos) is int if stream_pos >= self._earliest_known_stream_pos: - keys = self._cache.keys() - i = keys.bisect_right(stream_pos) + not_known_entities = set(entities) - set(self._entity_to_key) - result = set( - self._cache[k] for k in keys[i:] - ).intersection(entities) + result = ( + {self._cache[k] for k in self._cache.islice( + start=self._cache.bisect_right(stream_pos))} + .intersection(entities) + .union(not_known_entities) + ) self.metrics.inc_hits() else: - result = entities + result = set(entities) self.metrics.inc_misses() return result @@ -90,12 +96,13 @@ class StreamChangeCache(object): """ assert type(stream_pos) is int + if not self._cache: + # If we have no cache, nothing can have changed. + return False + if stream_pos >= self._earliest_known_stream_pos: self.metrics.inc_hits() - keys = self._cache.keys() - i = keys.bisect_right(stream_pos) - - return i < len(keys) + return self._cache.bisect_right(stream_pos) < len(self._cache) else: self.metrics.inc_misses() return True @@ -107,10 +114,8 @@ class StreamChangeCache(object): assert type(stream_pos) is int if stream_pos >= self._earliest_known_stream_pos: - keys = self._cache.keys() - i = keys.bisect_right(stream_pos) - - return [self._cache[k] for k in keys[i:]] + return [self._cache[k] for k in self._cache.islice( + start=self._cache.bisect_right(stream_pos))] else: return None @@ -129,8 +134,10 @@ class StreamChangeCache(object): self._entity_to_key[entity] = stream_pos while len(self._cache) > self._max_size: - k, r = self._cache.popitem() - self._earliest_known_stream_pos = max(k, self._earliest_known_stream_pos) + k, r = self._cache.popitem(0) + self._earliest_known_stream_pos = max( + k, self._earliest_known_stream_pos, + ) self._entity_to_key.pop(r, None) def get_max_pos_of_last_change(self, entity): diff --git a/synapse/util/file_consumer.py b/synapse/util/file_consumer.py index 3380970e4e..c78801015b 100644 --- a/synapse/util/file_consumer.py +++ b/synapse/util/file_consumer.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from twisted.internet import threads, reactor +from twisted.internet import threads from synapse.util.logcontext import make_deferred_yieldable, run_in_background @@ -27,6 +27,7 @@ class BackgroundFileConsumer(object): Args: file_obj (file): The file like object to write to. Closed when finished. + reactor (twisted.internet.reactor): the Twisted reactor to use """ # For PushProducers pause if we have this many unwritten slices @@ -34,9 +35,11 @@ class BackgroundFileConsumer(object): # And resume once the size of the queue is less than this _RESUME_ON_QUEUE_SIZE = 2 - def __init__(self, file_obj): + def __init__(self, file_obj, reactor): self._file_obj = file_obj + self._reactor = reactor + # Producer we're registered with self._producer = None @@ -71,7 +74,10 @@ class BackgroundFileConsumer(object): self._producer = producer self.streaming = streaming self._finished_deferred = run_in_background( - threads.deferToThread, self._writer + threads.deferToThreadPool, + self._reactor, + self._reactor.getThreadPool(), + self._writer, ) if not streaming: self._producer.resumeProducing() @@ -109,7 +115,7 @@ class BackgroundFileConsumer(object): # producer. if self._producer and self._paused_producer: if self._bytes_queue.qsize() <= self._RESUME_ON_QUEUE_SIZE: - reactor.callFromThread(self._resume_paused_producer) + self._reactor.callFromThread(self._resume_paused_producer) bytes = self._bytes_queue.get() @@ -121,7 +127,7 @@ class BackgroundFileConsumer(object): # If its a pull producer then we need to explicitly ask for # more stuff. if not self.streaming and self._producer: - reactor.callFromThread(self._producer.resumeProducing) + self._reactor.callFromThread(self._producer.resumeProducing) except Exception as e: self._write_exception = e raise diff --git a/synapse/util/frozenutils.py b/synapse/util/frozenutils.py index 15f0a7ba9e..535e7d0e7a 100644 --- a/synapse/util/frozenutils.py +++ b/synapse/util/frozenutils.py @@ -14,7 +14,7 @@ # limitations under the License. from frozendict import frozendict -import simplejson as json +from canonicaljson import json from six import string_types diff --git a/synapse/util/logcontext.py b/synapse/util/logcontext.py index a58c723403..df2b71b791 100644 --- a/synapse/util/logcontext.py +++ b/synapse/util/logcontext.py @@ -60,6 +60,7 @@ class LoggingContext(object): __slots__ = [ "previous_context", "name", "ru_stime", "ru_utime", "db_txn_count", "db_txn_duration_sec", "db_sched_duration_sec", + "evt_db_fetch_count", "usage_start", "main_thread", "alive", "request", "tag", @@ -90,6 +91,9 @@ class LoggingContext(object): def add_database_scheduled(self, sched_sec): pass + def record_event_fetch(self, event_count): + pass + def __nonzero__(self): return False __bool__ = __nonzero__ # python3 @@ -109,6 +113,9 @@ class LoggingContext(object): # sec spent waiting for db txns to be scheduled self.db_sched_duration_sec = 0 + # number of events this thread has fetched from the db + self.evt_db_fetch_count = 0 + # If alive has the thread resource usage when the logcontext last # became active. self.usage_start = None @@ -243,6 +250,14 @@ class LoggingContext(object): """ self.db_sched_duration_sec += sched_sec + def record_event_fetch(self, event_count): + """Record a number of events being fetched from the db + + Args: + event_count (int): number of events being fetched + """ + self.evt_db_fetch_count += event_count + class LoggingContextFilter(logging.Filter): """Logging filter that adds values from the current logging context to each diff --git a/synapse/util/ratelimitutils.py b/synapse/util/ratelimitutils.py index 0ab63c3d7d..c5a45cef7c 100644 --- a/synapse/util/ratelimitutils.py +++ b/synapse/util/ratelimitutils.py @@ -17,7 +17,6 @@ from twisted.internet import defer from synapse.api.errors import LimitExceededError -from synapse.util.async import sleep from synapse.util.logcontext import ( run_in_background, make_deferred_yieldable, PreserveLoggingContext, @@ -153,7 +152,7 @@ class _PerHostRatelimiter(object): "Ratelimit [%s]: sleeping req", id(request_id), ) - ret_defer = run_in_background(sleep, self.sleep_msec / 1000.0) + ret_defer = run_in_background(self.clock.sleep, self.sleep_msec / 1000.0) self.sleeping_requests.add(request_id) diff --git a/synapse/visibility.py b/synapse/visibility.py index aaca2c584c..65d79cf0d0 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -12,15 +12,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import itertools +import logging +import operator from twisted.internet import defer -from synapse.api.constants import Membership, EventTypes - -from synapse.util.logcontext import make_deferred_yieldable, preserve_fn - -import logging - +from synapse.api.constants import EventTypes, Membership +from synapse.events.utils import prune_event +from synapse.util.logcontext import ( + make_deferred_yieldable, preserve_fn, +) logger = logging.getLogger(__name__) @@ -43,21 +45,35 @@ MEMBERSHIP_PRIORITY = ( @defer.inlineCallbacks -def filter_events_for_clients(store, user_tuples, events, event_id_to_state, - always_include_ids=frozenset()): - """ Returns dict of user_id -> list of events that user is allowed to - see. +def filter_events_for_client(store, user_id, events, is_peeking=False, + always_include_ids=frozenset()): + """ + Check which events a user is allowed to see Args: - user_tuples (str, bool): (user id, is_peeking) for each user to be - checked. is_peeking should be true if: - * the user is not currently a member of the room, and: - * the user has not been a member of the room since the - given events - events ([synapse.events.EventBase]): list of events to filter + store (synapse.storage.DataStore): our datastore (can also be a worker + store) + user_id(str): user id to be checked + events(list[synapse.events.EventBase]): sequence of events to be checked + is_peeking(bool): should be True if: + * the user is not currently a member of the room, and: + * the user has not been a member of the room since the given + events always_include_ids (set(event_id)): set of event ids to specifically include (unless sender is ignored) + + Returns: + Deferred[list[synapse.events.EventBase]] """ + types = ( + (EventTypes.RoomHistoryVisibility, ""), + (EventTypes.Member, user_id), + ) + event_id_to_state = yield store.get_state_for_events( + frozenset(e.event_id for e in events), + types=types, + ) + forgotten = yield make_deferred_yieldable(defer.gatherResults([ defer.maybeDeferred( preserve_fn(store.who_forgot_in_room), @@ -71,31 +87,37 @@ def filter_events_for_clients(store, user_tuples, events, event_id_to_state, row["event_id"] for rows in forgotten for row in rows ) - ignore_dict_content = yield store.get_global_account_data_by_type_for_users( - "m.ignored_user_list", user_ids=[user_id for user_id, _ in user_tuples] + ignore_dict_content = yield store.get_global_account_data_by_type_for_user( + "m.ignored_user_list", user_id, ) # FIXME: This will explode if people upload something incorrect. - ignore_dict = { - user_id: frozenset( - content.get("ignored_users", {}).keys() if content else [] - ) - for user_id, content in ignore_dict_content.items() - } + ignore_list = frozenset( + ignore_dict_content.get("ignored_users", {}).keys() + if ignore_dict_content else [] + ) - def allowed(event, user_id, is_peeking, ignore_list): + erased_senders = yield store.are_users_erased((e.sender for e in events)) + + def allowed(event): """ Args: event (synapse.events.EventBase): event to check - user_id (str) - is_peeking (bool) - ignore_list (list): list of users to ignore + + Returns: + None|EventBase: + None if the user cannot see this event at all + + a redacted copy of the event if they can only see a redacted + version + + the original event if they can see it as normal. """ if not event.is_state() and event.sender in ignore_list: - return False + return None if event.event_id in always_include_ids: - return True + return event state = event_id_to_state[event.event_id] @@ -109,10 +131,6 @@ def filter_events_for_clients(store, user_tuples, events, event_id_to_state, if visibility not in VISIBILITY_PRIORITY: visibility = "shared" - # if it was world_readable, it's easy: everyone can read it - if visibility == "world_readable": - return True - # Always allow history visibility events on boundaries. This is done # by setting the effective visibility to the least restrictive # of the old vs new. @@ -146,7 +164,7 @@ def filter_events_for_clients(store, user_tuples, events, event_id_to_state, if membership == "leave" and ( prev_membership == "join" or prev_membership == "invite" ): - return True + return event new_priority = MEMBERSHIP_PRIORITY.index(membership) old_priority = MEMBERSHIP_PRIORITY.index(prev_membership) @@ -157,70 +175,55 @@ def filter_events_for_clients(store, user_tuples, events, event_id_to_state, if membership is None: membership_event = state.get((EventTypes.Member, user_id), None) if membership_event: + # XXX why do we do this? + # https://github.com/matrix-org/synapse/issues/3350 if membership_event.event_id not in event_id_forgotten: membership = membership_event.membership # if the user was a member of the room at the time of the event, # they can see it. if membership == Membership.JOIN: - return True + return event + + # otherwise, it depends on the room visibility. if visibility == "joined": # we weren't a member at the time of the event, so we can't # see this event. - return False + return None elif visibility == "invited": # user can also see the event if they were *invited* at the time # of the event. - return membership == Membership.INVITE + return ( + event if membership == Membership.INVITE else None + ) - else: - # visibility is shared: user can also see the event if they have - # become a member since the event + elif visibility == "shared" and is_peeking: + # if the visibility is shared, users cannot see the event unless + # they have *subequently* joined the room (or were members at the + # time, of course) # # XXX: if the user has subsequently joined and then left again, # ideally we would share history up to the point they left. But - # we don't know when they left. - return not is_peeking + # we don't know when they left. We just treat it as though they + # never joined, and restrict access. + return None - defer.returnValue({ - user_id: [ - event - for event in events - if allowed(event, user_id, is_peeking, ignore_dict.get(user_id, [])) - ] - for user_id, is_peeking in user_tuples - }) + # the visibility is either shared or world_readable, and the user was + # not a member at the time. We allow it, provided the original sender + # has not requested their data to be erased, in which case, we return + # a redacted version. + if erased_senders[event.sender]: + return prune_event(event) + return event -@defer.inlineCallbacks -def filter_events_for_client(store, user_id, events, is_peeking=False, - always_include_ids=frozenset()): - """ - Check which events a user is allowed to see + # check each event: gives an iterable[None|EventBase] + filtered_events = itertools.imap(allowed, events) - Args: - user_id(str): user id to be checked - events([synapse.events.EventBase]): list of events to be checked - is_peeking(bool): should be True if: - * the user is not currently a member of the room, and: - * the user has not been a member of the room since the given - events + # remove the None entries + filtered_events = filter(operator.truth, filtered_events) - Returns: - [synapse.events.EventBase] - """ - types = ( - (EventTypes.RoomHistoryVisibility, ""), - (EventTypes.Member, user_id), - ) - event_id_to_state = yield store.get_state_for_events( - frozenset(e.event_id for e in events), - types=types - ) - res = yield filter_events_for_clients( - store, [(user_id, is_peeking)], events, event_id_to_state, - always_include_ids=always_include_ids, - ) - defer.returnValue(res.get(user_id, [])) + # we turn it into a list before returning it. + defer.returnValue(list(filtered_events)) diff --git a/tests/api/test_auth.py b/tests/api/test_auth.py index 4575dd9834..aec3b62897 100644 --- a/tests/api/test_auth.py +++ b/tests/api/test_auth.py @@ -86,16 +86,53 @@ class AuthTestCase(unittest.TestCase): @defer.inlineCallbacks def test_get_user_by_req_appservice_valid_token(self): - app_service = Mock(token="foobar", url="a_url", sender=self.test_user) + app_service = Mock( + token="foobar", url="a_url", sender=self.test_user, + ip_range_whitelist=None, + ) self.store.get_app_service_by_token = Mock(return_value=app_service) self.store.get_user_by_access_token = Mock(return_value=None) request = Mock(args={}) + request.getClientIP.return_value = "127.0.0.1" request.args["access_token"] = [self.test_token] request.requestHeaders.getRawHeaders = mock_getRawHeaders() requester = yield self.auth.get_user_by_req(request) self.assertEquals(requester.user.to_string(), self.test_user) + @defer.inlineCallbacks + def test_get_user_by_req_appservice_valid_token_good_ip(self): + from netaddr import IPSet + app_service = Mock( + token="foobar", url="a_url", sender=self.test_user, + ip_range_whitelist=IPSet(["192.168/16"]), + ) + self.store.get_app_service_by_token = Mock(return_value=app_service) + self.store.get_user_by_access_token = Mock(return_value=None) + + request = Mock(args={}) + request.getClientIP.return_value = "192.168.10.10" + request.args["access_token"] = [self.test_token] + request.requestHeaders.getRawHeaders = mock_getRawHeaders() + requester = yield self.auth.get_user_by_req(request) + self.assertEquals(requester.user.to_string(), self.test_user) + + def test_get_user_by_req_appservice_valid_token_bad_ip(self): + from netaddr import IPSet + app_service = Mock( + token="foobar", url="a_url", sender=self.test_user, + ip_range_whitelist=IPSet(["192.168/16"]), + ) + self.store.get_app_service_by_token = Mock(return_value=app_service) + self.store.get_user_by_access_token = Mock(return_value=None) + + request = Mock(args={}) + request.getClientIP.return_value = "131.111.8.42" + request.args["access_token"] = [self.test_token] + request.requestHeaders.getRawHeaders = mock_getRawHeaders() + d = self.auth.get_user_by_req(request) + self.failureResultOf(d, AuthError) + def test_get_user_by_req_appservice_bad_token(self): self.store.get_app_service_by_token = Mock(return_value=None) self.store.get_user_by_access_token = Mock(return_value=None) @@ -119,12 +156,16 @@ class AuthTestCase(unittest.TestCase): @defer.inlineCallbacks def test_get_user_by_req_appservice_valid_token_valid_user_id(self): masquerading_user_id = "@doppelganger:matrix.org" - app_service = Mock(token="foobar", url="a_url", sender=self.test_user) + app_service = Mock( + token="foobar", url="a_url", sender=self.test_user, + ip_range_whitelist=None, + ) app_service.is_interested_in_user = Mock(return_value=True) self.store.get_app_service_by_token = Mock(return_value=app_service) self.store.get_user_by_access_token = Mock(return_value=None) request = Mock(args={}) + request.getClientIP.return_value = "127.0.0.1" request.args["access_token"] = [self.test_token] request.args["user_id"] = [masquerading_user_id] request.requestHeaders.getRawHeaders = mock_getRawHeaders() @@ -133,12 +174,16 @@ class AuthTestCase(unittest.TestCase): def test_get_user_by_req_appservice_valid_token_bad_user_id(self): masquerading_user_id = "@doppelganger:matrix.org" - app_service = Mock(token="foobar", url="a_url", sender=self.test_user) + app_service = Mock( + token="foobar", url="a_url", sender=self.test_user, + ip_range_whitelist=None, + ) app_service.is_interested_in_user = Mock(return_value=False) self.store.get_app_service_by_token = Mock(return_value=app_service) self.store.get_user_by_access_token = Mock(return_value=None) request = Mock(args={}) + request.getClientIP.return_value = "127.0.0.1" request.args["access_token"] = [self.test_token] request.args["user_id"] = [masquerading_user_id] request.requestHeaders.getRawHeaders = mock_getRawHeaders() diff --git a/tests/crypto/test_keyring.py b/tests/crypto/test_keyring.py index 149e443022..cc1c862ba4 100644 --- a/tests/crypto/test_keyring.py +++ b/tests/crypto/test_keyring.py @@ -19,10 +19,10 @@ import signedjson.sign from mock import Mock from synapse.api.errors import SynapseError from synapse.crypto import keyring -from synapse.util import async, logcontext +from synapse.util import logcontext, Clock from synapse.util.logcontext import LoggingContext from tests import unittest, utils -from twisted.internet import defer +from twisted.internet import defer, reactor class MockPerspectiveServer(object): @@ -118,6 +118,7 @@ class KeyringTestCase(unittest.TestCase): @defer.inlineCallbacks def test_verify_json_objects_for_server_awaits_previous_requests(self): + clock = Clock(reactor) key1 = signedjson.key.generate_signing_key(1) kr = keyring.Keyring(self.hs) @@ -167,7 +168,7 @@ class KeyringTestCase(unittest.TestCase): # wait a tick for it to send the request to the perspectives server # (it first tries the datastore) - yield async.sleep(1) # XXX find out why this takes so long! + yield clock.sleep(1) # XXX find out why this takes so long! self.http_client.post_json.assert_called_once() self.assertIs(LoggingContext.current_context(), context_11) @@ -183,7 +184,7 @@ class KeyringTestCase(unittest.TestCase): res_deferreds_2 = kr.verify_json_objects_for_server( [("server10", json1)], ) - yield async.sleep(1) + yield clock.sleep(1) self.http_client.post_json.assert_not_called() res_deferreds_2[0].addBoth(self.check_context, None) diff --git a/tests/federation/__init__.py b/tests/federation/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/federation/test_federation_server.py b/tests/federation/test_federation_server.py new file mode 100644 index 0000000000..4e8dc8fea0 --- /dev/null +++ b/tests/federation/test_federation_server.py @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging + +from synapse.events import FrozenEvent +from synapse.federation.federation_server import server_matches_acl_event +from tests import unittest + + +@unittest.DEBUG +class ServerACLsTestCase(unittest.TestCase): + def test_blacklisted_server(self): + e = _create_acl_event({ + "allow": ["*"], + "deny": ["evil.com"], + }) + logging.info("ACL event: %s", e.content) + + self.assertFalse(server_matches_acl_event("evil.com", e)) + self.assertFalse(server_matches_acl_event("EVIL.COM", e)) + + self.assertTrue(server_matches_acl_event("evil.com.au", e)) + self.assertTrue(server_matches_acl_event("honestly.not.evil.com", e)) + + def test_block_ip_literals(self): + e = _create_acl_event({ + "allow_ip_literals": False, + "allow": ["*"], + }) + logging.info("ACL event: %s", e.content) + + self.assertFalse(server_matches_acl_event("1.2.3.4", e)) + self.assertTrue(server_matches_acl_event("1a.2.3.4", e)) + self.assertFalse(server_matches_acl_event("[1:2::]", e)) + self.assertTrue(server_matches_acl_event("1:2:3:4", e)) + + +def _create_acl_event(content): + return FrozenEvent({ + "room_id": "!a:b", + "event_id": "$a:b", + "type": "m.room.server_acls", + "sender": "@a:b", + "content": content + }) diff --git a/tests/http/__init__.py b/tests/http/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/http/test_endpoint.py b/tests/http/test_endpoint.py new file mode 100644 index 0000000000..b8a48d20a4 --- /dev/null +++ b/tests/http/test_endpoint.py @@ -0,0 +1,55 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from synapse.http.endpoint import ( + parse_server_name, + parse_and_validate_server_name, +) +from tests import unittest + + +class ServerNameTestCase(unittest.TestCase): + def test_parse_server_name(self): + test_data = { + 'localhost': ('localhost', None), + 'my-example.com:1234': ('my-example.com', 1234), + '1.2.3.4': ('1.2.3.4', None), + '[0abc:1def::1234]': ('[0abc:1def::1234]', None), + '1.2.3.4:1': ('1.2.3.4', 1), + '[0abc:1def::1234]:8080': ('[0abc:1def::1234]', 8080), + } + + for i, o in test_data.items(): + self.assertEqual(parse_server_name(i), o) + + def test_validate_bad_server_names(self): + test_data = [ + "", # empty + "localhost:http", # non-numeric port + "1234]", # smells like ipv6 literal but isn't + "[1234", + "underscore_.com", + "percent%65.com", + "1234:5678:80", # too many colons + ] + for i in test_data: + try: + parse_and_validate_server_name(i) + self.fail( + "Expected parse_and_validate_server_name('%s') to throw" % ( + i, + ), + ) + except ValueError: + pass diff --git a/tests/replication/slave/storage/test_account_data.py b/tests/replication/slave/storage/test_account_data.py index da54d478ce..f47a42e45d 100644 --- a/tests/replication/slave/storage/test_account_data.py +++ b/tests/replication/slave/storage/test_account_data.py @@ -37,10 +37,6 @@ class SlavedAccountDataStoreTestCase(BaseSlavedStoreTestCase): "get_global_account_data_by_type_for_user", [TYPE, USER_ID], {"a": 1} ) - yield self.check( - "get_global_account_data_by_type_for_users", - [TYPE, [USER_ID]], {USER_ID: {"a": 1}} - ) yield self.master_store.add_account_data_for_user( USER_ID, TYPE, {"a": 2} @@ -50,7 +46,3 @@ class SlavedAccountDataStoreTestCase(BaseSlavedStoreTestCase): "get_global_account_data_by_type_for_user", [TYPE, USER_ID], {"a": 2} ) - yield self.check( - "get_global_account_data_by_type_for_users", - [TYPE, [USER_ID]], {USER_ID: {"a": 2}} - ) diff --git a/tests/rest/client/test_transactions.py b/tests/rest/client/test_transactions.py index b5bc2fa255..6a757289db 100644 --- a/tests/rest/client/test_transactions.py +++ b/tests/rest/client/test_transactions.py @@ -1,9 +1,9 @@ from synapse.rest.client.transactions import HttpTransactionCache from synapse.rest.client.transactions import CLEANUP_PERIOD_MS -from twisted.internet import defer +from twisted.internet import defer, reactor from mock import Mock, call -from synapse.util import async +from synapse.util import Clock from synapse.util.logcontext import LoggingContext from tests import unittest from tests.utils import MockClock @@ -46,7 +46,7 @@ class HttpTransactionCacheTestCase(unittest.TestCase): def test_logcontexts_with_async_result(self): @defer.inlineCallbacks def cb(): - yield async.sleep(0) + yield Clock(reactor).sleep(0) defer.returnValue("yay") @defer.inlineCallbacks diff --git a/tests/rest/media/v1/test_media_storage.py b/tests/rest/media/v1/test_media_storage.py index eef38b6781..c5e2f5549a 100644 --- a/tests/rest/media/v1/test_media_storage.py +++ b/tests/rest/media/v1/test_media_storage.py @@ -14,7 +14,7 @@ # limitations under the License. -from twisted.internet import defer +from twisted.internet import defer, reactor from synapse.rest.media.v1._base import FileInfo from synapse.rest.media.v1.media_storage import MediaStorage @@ -38,6 +38,7 @@ class MediaStorageTests(unittest.TestCase): self.secondary_base_path = os.path.join(self.test_dir, "secondary") hs = Mock() + hs.get_reactor = Mock(return_value=reactor) hs.config.media_store_path = self.primary_base_path storage_providers = [FileStorageProviderBackend( @@ -46,7 +47,7 @@ class MediaStorageTests(unittest.TestCase): self.filepaths = MediaFilePaths(self.primary_base_path) self.media_storage = MediaStorage( - self.primary_base_path, self.filepaths, storage_providers, + hs, self.primary_base_path, self.filepaths, storage_providers, ) def tearDown(self): diff --git a/tests/server.py b/tests/server.py new file mode 100644 index 0000000000..73069dff52 --- /dev/null +++ b/tests/server.py @@ -0,0 +1,181 @@ +from io import BytesIO + +import attr +import json +from six import text_type + +from twisted.python.failure import Failure +from twisted.internet.defer import Deferred +from twisted.test.proto_helpers import MemoryReactorClock + +from synapse.http.site import SynapseRequest +from twisted.internet import threads +from tests.utils import setup_test_homeserver as _sth + + +@attr.s +class FakeChannel(object): + """ + A fake Twisted Web Channel (the part that interfaces with the + wire). + """ + + result = attr.ib(factory=dict) + + @property + def json_body(self): + if not self.result: + raise Exception("No result yet.") + return json.loads(self.result["body"]) + + def writeHeaders(self, version, code, reason, headers): + self.result["version"] = version + self.result["code"] = code + self.result["reason"] = reason + self.result["headers"] = headers + + def write(self, content): + if "body" not in self.result: + self.result["body"] = b"" + + self.result["body"] += content + + def requestDone(self, _self): + self.result["done"] = True + + def getPeer(self): + return None + + def getHost(self): + return None + + @property + def transport(self): + return self + + +class FakeSite: + """ + A fake Twisted Web Site, with mocks of the extra things that + Synapse adds. + """ + + server_version_string = b"1" + site_tag = "test" + + @property + def access_logger(self): + class FakeLogger: + def info(self, *args, **kwargs): + pass + + return FakeLogger() + + +def make_request(method, path, content=b""): + """ + Make a web request using the given method and path, feed it the + content, and return the Request and the Channel underneath. + """ + + if isinstance(content, text_type): + content = content.encode('utf8') + + site = FakeSite() + channel = FakeChannel() + + req = SynapseRequest(site, channel) + req.process = lambda: b"" + req.content = BytesIO(content) + req.requestReceived(method, path, b"1.1") + + return req, channel + + +def wait_until_result(clock, channel, timeout=100): + """ + Wait until the channel has a result. + """ + clock.run() + x = 0 + + while not channel.result: + x += 1 + + if x > timeout: + raise Exception("Timed out waiting for request to finish.") + + clock.advance(0.1) + + +class ThreadedMemoryReactorClock(MemoryReactorClock): + """ + A MemoryReactorClock that supports callFromThread. + """ + def callFromThread(self, callback, *args, **kwargs): + """ + Make the callback fire in the next reactor iteration. + """ + d = Deferred() + d.addCallback(lambda x: callback(*args, **kwargs)) + self.callLater(0, d.callback, True) + return d + + +def setup_test_homeserver(*args, **kwargs): + """ + Set up a synchronous test server, driven by the reactor used by + the homeserver. + """ + d = _sth(*args, **kwargs).result + + # Make the thread pool synchronous. + clock = d.get_clock() + pool = d.get_db_pool() + + def runWithConnection(func, *args, **kwargs): + return threads.deferToThreadPool( + pool._reactor, + pool.threadpool, + pool._runWithConnection, + func, + *args, + **kwargs + ) + + def runInteraction(interaction, *args, **kwargs): + return threads.deferToThreadPool( + pool._reactor, + pool.threadpool, + pool._runInteraction, + interaction, + *args, + **kwargs + ) + + pool.runWithConnection = runWithConnection + pool.runInteraction = runInteraction + + class ThreadPool: + """ + Threadless thread pool. + """ + def start(self): + pass + + def callInThreadWithCallback(self, onResult, function, *args, **kwargs): + def _(res): + if isinstance(res, Failure): + onResult(False, res) + else: + onResult(True, res) + + d = Deferred() + d.addCallback(lambda x: function(*args, **kwargs)) + d.addBoth(_) + clock._reactor.callLater(0, d.callback, True) + return d + + clock.threadpool = ThreadPool() + pool.threadpool = ThreadPool() + return d diff --git a/tests/test_distributor.py b/tests/test_distributor.py index 010aeaee7e..c066381698 100644 --- a/tests/test_distributor.py +++ b/tests/test_distributor.py @@ -19,7 +19,6 @@ from twisted.internet import defer from mock import Mock, patch from synapse.util.distributor import Distributor -from synapse.util.async import run_on_reactor class DistributorTestCase(unittest.TestCase): @@ -95,7 +94,6 @@ class DistributorTestCase(unittest.TestCase): @defer.inlineCallbacks def observer(): - yield run_on_reactor() raise MyException("Oopsie") self.dist.observe("whail", observer) diff --git a/tests/test_federation.py b/tests/test_federation.py new file mode 100644 index 0000000000..fc80a69369 --- /dev/null +++ b/tests/test_federation.py @@ -0,0 +1,243 @@ + +from twisted.internet.defer import succeed, maybeDeferred + +from synapse.util import Clock +from synapse.events import FrozenEvent +from synapse.types import Requester, UserID + +from tests import unittest +from tests.server import setup_test_homeserver, ThreadedMemoryReactorClock + +from mock import Mock + + +class MessageAcceptTests(unittest.TestCase): + def setUp(self): + + self.http_client = Mock() + self.reactor = ThreadedMemoryReactorClock() + self.hs_clock = Clock(self.reactor) + self.homeserver = setup_test_homeserver( + http_client=self.http_client, clock=self.hs_clock, reactor=self.reactor + ) + + user_id = UserID("us", "test") + our_user = Requester(user_id, None, False, None, None) + room_creator = self.homeserver.get_room_creation_handler() + room = room_creator.create_room( + our_user, room_creator.PRESETS_DICT["public_chat"], ratelimit=False + ) + self.reactor.advance(0.1) + self.room_id = self.successResultOf(room)["room_id"] + + # Figure out what the most recent event is + most_recent = self.successResultOf( + maybeDeferred( + self.homeserver.datastore.get_latest_event_ids_in_room, self.room_id + ) + )[0] + + join_event = FrozenEvent( + { + "room_id": self.room_id, + "sender": "@baduser:test.serv", + "state_key": "@baduser:test.serv", + "event_id": "$join:test.serv", + "depth": 1000, + "origin_server_ts": 1, + "type": "m.room.member", + "origin": "test.servx", + "content": {"membership": "join"}, + "auth_events": [], + "prev_state": [(most_recent, {})], + "prev_events": [(most_recent, {})], + } + ) + + self.handler = self.homeserver.get_handlers().federation_handler + self.handler.do_auth = lambda *a, **b: succeed(True) + self.client = self.homeserver.get_federation_client() + self.client._check_sigs_and_hash_and_fetch = lambda dest, pdus, **k: succeed( + pdus + ) + + # Send the join, it should return None (which is not an error) + d = self.handler.on_receive_pdu( + "test.serv", join_event, sent_to_us_directly=True + ) + self.reactor.advance(1) + self.assertEqual(self.successResultOf(d), None) + + # Make sure we actually joined the room + self.assertEqual( + self.successResultOf( + maybeDeferred( + self.homeserver.datastore.get_latest_event_ids_in_room, self.room_id + ) + )[0], + "$join:test.serv", + ) + + def test_cant_hide_direct_ancestors(self): + """ + If you send a message, you must be able to provide the direct + prev_events that said event references. + """ + + def post_json(destination, path, data, headers=None, timeout=0): + # If it asks us for new missing events, give them NOTHING + if path.startswith("/_matrix/federation/v1/get_missing_events/"): + return {"events": []} + + self.http_client.post_json = post_json + + # Figure out what the most recent event is + most_recent = self.successResultOf( + maybeDeferred( + self.homeserver.datastore.get_latest_event_ids_in_room, self.room_id + ) + )[0] + + # Now lie about an event + lying_event = FrozenEvent( + { + "room_id": self.room_id, + "sender": "@baduser:test.serv", + "event_id": "one:test.serv", + "depth": 1000, + "origin_server_ts": 1, + "type": "m.room.message", + "origin": "test.serv", + "content": "hewwo?", + "auth_events": [], + "prev_events": [("two:test.serv", {}), (most_recent, {})], + } + ) + + d = self.handler.on_receive_pdu( + "test.serv", lying_event, sent_to_us_directly=True + ) + + # Step the reactor, so the database fetches come back + self.reactor.advance(1) + + # on_receive_pdu should throw an error + failure = self.failureResultOf(d) + self.assertEqual( + failure.value.args[0], + ( + "ERROR 403: Your server isn't divulging details about prev_events " + "referenced in this event." + ), + ) + + # Make sure the invalid event isn't there + extrem = maybeDeferred( + self.homeserver.datastore.get_latest_event_ids_in_room, self.room_id + ) + self.assertEqual(self.successResultOf(extrem)[0], "$join:test.serv") + + @unittest.DEBUG + def test_cant_hide_past_history(self): + """ + If you send a message, you must be able to provide the direct + prev_events that said event references. + """ + + def post_json(destination, path, data, headers=None, timeout=0): + if path.startswith("/_matrix/federation/v1/get_missing_events/"): + return { + "events": [ + { + "room_id": self.room_id, + "sender": "@baduser:test.serv", + "event_id": "three:test.serv", + "depth": 1000, + "origin_server_ts": 1, + "type": "m.room.message", + "origin": "test.serv", + "content": "hewwo?", + "auth_events": [], + "prev_events": [("four:test.serv", {})], + } + ] + } + + self.http_client.post_json = post_json + + def get_json(destination, path, args, headers=None): + if path.startswith("/_matrix/federation/v1/state_ids/"): + d = self.successResultOf( + self.homeserver.datastore.get_state_ids_for_event("one:test.serv") + ) + + return succeed( + { + "pdu_ids": [ + y + for x, y in d.items() + if x == ("m.room.member", "@us:test") + ], + "auth_chain_ids": d.values(), + } + ) + + self.http_client.get_json = get_json + + # Figure out what the most recent event is + most_recent = self.successResultOf( + maybeDeferred( + self.homeserver.datastore.get_latest_event_ids_in_room, self.room_id + ) + )[0] + + # Make a good event + good_event = FrozenEvent( + { + "room_id": self.room_id, + "sender": "@baduser:test.serv", + "event_id": "one:test.serv", + "depth": 1000, + "origin_server_ts": 1, + "type": "m.room.message", + "origin": "test.serv", + "content": "hewwo?", + "auth_events": [], + "prev_events": [(most_recent, {})], + } + ) + + d = self.handler.on_receive_pdu( + "test.serv", good_event, sent_to_us_directly=True + ) + self.reactor.advance(1) + self.assertEqual(self.successResultOf(d), None) + + bad_event = FrozenEvent( + { + "room_id": self.room_id, + "sender": "@baduser:test.serv", + "event_id": "two:test.serv", + "depth": 1000, + "origin_server_ts": 1, + "type": "m.room.message", + "origin": "test.serv", + "content": "hewwo?", + "auth_events": [], + "prev_events": [("one:test.serv", {}), ("three:test.serv", {})], + } + ) + + d = self.handler.on_receive_pdu( + "test.serv", bad_event, sent_to_us_directly=True + ) + self.reactor.advance(1) + + extrem = maybeDeferred( + self.homeserver.datastore.get_latest_event_ids_in_room, self.room_id + ) + self.assertEqual(self.successResultOf(extrem)[0], "two:test.serv") + + state = self.homeserver.get_state_handler().get_current_state_ids(self.room_id) + self.reactor.advance(1) + self.assertIn(("m.room.member", "@us:test"), self.successResultOf(state).keys()) diff --git a/tests/test_server.py b/tests/test_server.py new file mode 100644 index 0000000000..8ad822c43b --- /dev/null +++ b/tests/test_server.py @@ -0,0 +1,128 @@ +import json +import re + +from twisted.internet.defer import Deferred +from twisted.test.proto_helpers import MemoryReactorClock + +from synapse.util import Clock +from synapse.api.errors import Codes, SynapseError +from synapse.http.server import JsonResource +from tests import unittest +from tests.server import make_request, setup_test_homeserver + + +class JsonResourceTests(unittest.TestCase): + def setUp(self): + self.reactor = MemoryReactorClock() + self.hs_clock = Clock(self.reactor) + self.homeserver = setup_test_homeserver( + http_client=None, clock=self.hs_clock, reactor=self.reactor + ) + + def test_handler_for_request(self): + """ + JsonResource.handler_for_request gives correctly decoded URL args to + the callback, while Twisted will give the raw bytes of URL query + arguments. + """ + got_kwargs = {} + + def _callback(request, **kwargs): + got_kwargs.update(kwargs) + return (200, kwargs) + + res = JsonResource(self.homeserver) + res.register_paths("GET", [re.compile("^/foo/(?P<room_id>[^/]*)$")], _callback) + + request, channel = make_request(b"GET", b"/foo/%E2%98%83?a=%E2%98%83") + request.render(res) + + self.assertEqual(request.args, {b'a': [u"\N{SNOWMAN}".encode('utf8')]}) + self.assertEqual(got_kwargs, {u"room_id": u"\N{SNOWMAN}"}) + + def test_callback_direct_exception(self): + """ + If the web callback raises an uncaught exception, it will be translated + into a 500. + """ + + def _callback(request, **kwargs): + raise Exception("boo") + + res = JsonResource(self.homeserver) + res.register_paths("GET", [re.compile("^/foo$")], _callback) + + request, channel = make_request(b"GET", b"/foo") + request.render(res) + + self.assertEqual(channel.result["code"], b'500') + + def test_callback_indirect_exception(self): + """ + If the web callback raises an uncaught exception in a Deferred, it will + be translated into a 500. + """ + + def _throw(*args): + raise Exception("boo") + + def _callback(request, **kwargs): + d = Deferred() + d.addCallback(_throw) + self.reactor.callLater(1, d.callback, True) + return d + + res = JsonResource(self.homeserver) + res.register_paths("GET", [re.compile("^/foo$")], _callback) + + request, channel = make_request(b"GET", b"/foo") + request.render(res) + + # No error has been raised yet + self.assertTrue("code" not in channel.result) + + # Advance time, now there's an error + self.reactor.advance(1) + self.assertEqual(channel.result["code"], b'500') + + def test_callback_synapseerror(self): + """ + If the web callback raises a SynapseError, it returns the appropriate + status code and message set in it. + """ + + def _callback(request, **kwargs): + raise SynapseError(403, "Forbidden!!one!", Codes.FORBIDDEN) + + res = JsonResource(self.homeserver) + res.register_paths("GET", [re.compile("^/foo$")], _callback) + + request, channel = make_request(b"GET", b"/foo") + request.render(res) + + self.assertEqual(channel.result["code"], b'403') + reply_body = json.loads(channel.result["body"]) + self.assertEqual(reply_body["error"], "Forbidden!!one!") + self.assertEqual(reply_body["errcode"], "M_FORBIDDEN") + + def test_no_handler(self): + """ + If there is no handler to process the request, Synapse will return 400. + """ + + def _callback(request, **kwargs): + """ + Not ever actually called! + """ + self.fail("shouldn't ever get here") + + res = JsonResource(self.homeserver) + res.register_paths("GET", [re.compile("^/foo$")], _callback) + + request, channel = make_request(b"GET", b"/foobar") + request.render(res) + + self.assertEqual(channel.result["code"], b'400') + reply_body = json.loads(channel.result["body"]) + self.assertEqual(reply_body["error"], "Unrecognized request") + self.assertEqual(reply_body["errcode"], "M_UNRECOGNIZED") diff --git a/tests/unittest.py b/tests/unittest.py index 184fe880f3..b25f2db5d5 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -35,7 +35,10 @@ class ToTwistedHandler(logging.Handler): def emit(self, record): log_entry = self.format(record) log_level = record.levelname.lower().replace('warning', 'warn') - self.tx_log.emit(twisted.logger.LogLevel.levelWithName(log_level), log_entry) + self.tx_log.emit( + twisted.logger.LogLevel.levelWithName(log_level), + log_entry.replace("{", r"(").replace("}", r")"), + ) handler = ToTwistedHandler() diff --git a/tests/util/caches/test_descriptors.py b/tests/util/caches/test_descriptors.py index 2516fe40f4..a94d566c96 100644 --- a/tests/util/caches/test_descriptors.py +++ b/tests/util/caches/test_descriptors.py @@ -18,15 +18,20 @@ import logging import mock from synapse.api.errors import SynapseError -from synapse.util import async from synapse.util import logcontext -from twisted.internet import defer +from twisted.internet import defer, reactor from synapse.util.caches import descriptors from tests import unittest logger = logging.getLogger(__name__) +def run_on_reactor(): + d = defer.Deferred() + reactor.callLater(0, d.callback, 0) + return logcontext.make_deferred_yieldable(d) + + class CacheTestCase(unittest.TestCase): def test_invalidate_all(self): cache = descriptors.Cache("testcache") @@ -195,7 +200,8 @@ class DescriptorTestCase(unittest.TestCase): def fn(self, arg1): @defer.inlineCallbacks def inner_fn(): - yield async.run_on_reactor() + # we want this to behave like an asynchronous function + yield run_on_reactor() raise SynapseError(400, "blah") return inner_fn() @@ -205,7 +211,12 @@ class DescriptorTestCase(unittest.TestCase): with logcontext.LoggingContext() as c1: c1.name = "c1" try: - yield obj.fn(1) + d = obj.fn(1) + self.assertEqual( + logcontext.LoggingContext.current_context(), + logcontext.LoggingContext.sentinel, + ) + yield d self.fail("No exception thrown") except SynapseError: pass diff --git a/tests/util/test_dict_cache.py b/tests/util/test_dict_cache.py index bc92f85fa6..543ac5bed9 100644 --- a/tests/util/test_dict_cache.py +++ b/tests/util/test_dict_cache.py @@ -32,7 +32,7 @@ class DictCacheTestCase(unittest.TestCase): seq = self.cache.sequence test_value = {"test": "test_simple_cache_hit_full"} - self.cache.update(seq, key, test_value, full=True) + self.cache.update(seq, key, test_value) c = self.cache.get(key) self.assertEqual(test_value, c.value) @@ -44,7 +44,7 @@ class DictCacheTestCase(unittest.TestCase): test_value = { "test": "test_simple_cache_hit_partial" } - self.cache.update(seq, key, test_value, full=True) + self.cache.update(seq, key, test_value) c = self.cache.get(key, ["test"]) self.assertEqual(test_value, c.value) @@ -56,7 +56,7 @@ class DictCacheTestCase(unittest.TestCase): test_value = { "test": "test_simple_cache_miss_partial" } - self.cache.update(seq, key, test_value, full=True) + self.cache.update(seq, key, test_value) c = self.cache.get(key, ["test2"]) self.assertEqual({}, c.value) @@ -70,7 +70,7 @@ class DictCacheTestCase(unittest.TestCase): "test2": "test_simple_cache_hit_miss_partial2", "test3": "test_simple_cache_hit_miss_partial3", } - self.cache.update(seq, key, test_value, full=True) + self.cache.update(seq, key, test_value) c = self.cache.get(key, ["test2"]) self.assertEqual({"test2": "test_simple_cache_hit_miss_partial2"}, c.value) @@ -82,13 +82,13 @@ class DictCacheTestCase(unittest.TestCase): test_value_1 = { "test": "test_simple_cache_hit_miss_partial", } - self.cache.update(seq, key, test_value_1, full=False) + self.cache.update(seq, key, test_value_1, fetched_keys=set("test")) seq = self.cache.sequence test_value_2 = { "test2": "test_simple_cache_hit_miss_partial2", } - self.cache.update(seq, key, test_value_2, full=False) + self.cache.update(seq, key, test_value_2, fetched_keys=set("test2")) c = self.cache.get(key) self.assertEqual( diff --git a/tests/util/test_file_consumer.py b/tests/util/test_file_consumer.py index d6e1082779..c2aae8f54c 100644 --- a/tests/util/test_file_consumer.py +++ b/tests/util/test_file_consumer.py @@ -30,7 +30,7 @@ class FileConsumerTests(unittest.TestCase): @defer.inlineCallbacks def test_pull_consumer(self): string_file = StringIO() - consumer = BackgroundFileConsumer(string_file) + consumer = BackgroundFileConsumer(string_file, reactor=reactor) try: producer = DummyPullProducer() @@ -54,7 +54,7 @@ class FileConsumerTests(unittest.TestCase): @defer.inlineCallbacks def test_push_consumer(self): string_file = BlockingStringWrite() - consumer = BackgroundFileConsumer(string_file) + consumer = BackgroundFileConsumer(string_file, reactor=reactor) try: producer = NonCallableMock(spec_set=[]) @@ -80,7 +80,7 @@ class FileConsumerTests(unittest.TestCase): @defer.inlineCallbacks def test_push_producer_feedback(self): string_file = BlockingStringWrite() - consumer = BackgroundFileConsumer(string_file) + consumer = BackgroundFileConsumer(string_file, reactor=reactor) try: producer = NonCallableMock(spec_set=["pauseProducing", "resumeProducing"]) diff --git a/tests/util/test_linearizer.py b/tests/util/test_linearizer.py index 4865eb4bc6..bf7e3aa885 100644 --- a/tests/util/test_linearizer.py +++ b/tests/util/test_linearizer.py @@ -12,10 +12,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from synapse.util import async, logcontext + +from synapse.util import logcontext, Clock from tests import unittest -from twisted.internet import defer +from twisted.internet import defer, reactor from synapse.util.async import Linearizer from six.moves import range @@ -53,7 +54,7 @@ class LinearizerTestCase(unittest.TestCase): self.assertEqual( logcontext.LoggingContext.current_context(), lc) if sleep: - yield async.sleep(0) + yield Clock(reactor).sleep(0) self.assertEqual( logcontext.LoggingContext.current_context(), lc) diff --git a/tests/util/test_logcontext.py b/tests/util/test_logcontext.py index ad78d884e0..9cf90fcfc4 100644 --- a/tests/util/test_logcontext.py +++ b/tests/util/test_logcontext.py @@ -3,8 +3,7 @@ from twisted.internet import defer from twisted.internet import reactor from .. import unittest -from synapse.util.async import sleep -from synapse.util import logcontext +from synapse.util import logcontext, Clock from synapse.util.logcontext import LoggingContext @@ -22,18 +21,20 @@ class LoggingContextTestCase(unittest.TestCase): @defer.inlineCallbacks def test_sleep(self): + clock = Clock(reactor) + @defer.inlineCallbacks def competing_callback(): with LoggingContext() as competing_context: competing_context.request = "competing" - yield sleep(0) + yield clock.sleep(0) self._check_test_key("competing") reactor.callLater(0, competing_callback) with LoggingContext() as context_one: context_one.request = "one" - yield sleep(0) + yield clock.sleep(0) self._check_test_key("one") def _test_run_in_background(self, function): @@ -87,7 +88,7 @@ class LoggingContextTestCase(unittest.TestCase): def test_run_in_background_with_blocking_fn(self): @defer.inlineCallbacks def blocking_function(): - yield sleep(0) + yield Clock(reactor).sleep(0) return self._test_run_in_background(blocking_function) diff --git a/tests/util/test_stream_change_cache.py b/tests/util/test_stream_change_cache.py new file mode 100644 index 0000000000..67ece166c7 --- /dev/null +++ b/tests/util/test_stream_change_cache.py @@ -0,0 +1,198 @@ +from tests import unittest +from mock import patch + +from synapse.util.caches.stream_change_cache import StreamChangeCache + + +class StreamChangeCacheTests(unittest.TestCase): + """ + Tests for StreamChangeCache. + """ + + def test_prefilled_cache(self): + """ + Providing a prefilled cache to StreamChangeCache will result in a cache + with the prefilled-cache entered in. + """ + cache = StreamChangeCache("#test", 1, prefilled_cache={"user@foo.com": 2}) + self.assertTrue(cache.has_entity_changed("user@foo.com", 1)) + + def test_has_entity_changed(self): + """ + StreamChangeCache.entity_has_changed will mark entities as changed, and + has_entity_changed will observe the changed entities. + """ + cache = StreamChangeCache("#test", 3) + + cache.entity_has_changed("user@foo.com", 6) + cache.entity_has_changed("bar@baz.net", 7) + + # If it's been changed after that stream position, return True + self.assertTrue(cache.has_entity_changed("user@foo.com", 4)) + self.assertTrue(cache.has_entity_changed("bar@baz.net", 4)) + + # If it's been changed at that stream position, return False + self.assertFalse(cache.has_entity_changed("user@foo.com", 6)) + + # If there's no changes after that stream position, return False + self.assertFalse(cache.has_entity_changed("user@foo.com", 7)) + + # If the entity does not exist, return False. + self.assertFalse(cache.has_entity_changed("not@here.website", 7)) + + # If we request before the stream cache's earliest known position, + # return True, whether it's a known entity or not. + self.assertTrue(cache.has_entity_changed("user@foo.com", 0)) + self.assertTrue(cache.has_entity_changed("not@here.website", 0)) + + @patch("synapse.util.caches.CACHE_SIZE_FACTOR", 1.0) + def test_has_entity_changed_pops_off_start(self): + """ + StreamChangeCache.entity_has_changed will respect the max size and + purge the oldest items upon reaching that max size. + """ + cache = StreamChangeCache("#test", 1, max_size=2) + + cache.entity_has_changed("user@foo.com", 2) + cache.entity_has_changed("bar@baz.net", 3) + cache.entity_has_changed("user@elsewhere.org", 4) + + # The cache is at the max size, 2 + self.assertEqual(len(cache._cache), 2) + + # The oldest item has been popped off + self.assertTrue("user@foo.com" not in cache._entity_to_key) + + # If we update an existing entity, it keeps the two existing entities + cache.entity_has_changed("bar@baz.net", 5) + self.assertEqual( + set(["bar@baz.net", "user@elsewhere.org"]), set(cache._entity_to_key) + ) + + def test_get_all_entities_changed(self): + """ + StreamChangeCache.get_all_entities_changed will return all changed + entities since the given position. If the position is before the start + of the known stream, it returns None instead. + """ + cache = StreamChangeCache("#test", 1) + + cache.entity_has_changed("user@foo.com", 2) + cache.entity_has_changed("bar@baz.net", 3) + cache.entity_has_changed("user@elsewhere.org", 4) + + self.assertEqual( + cache.get_all_entities_changed(1), + ["user@foo.com", "bar@baz.net", "user@elsewhere.org"], + ) + self.assertEqual( + cache.get_all_entities_changed(2), ["bar@baz.net", "user@elsewhere.org"] + ) + self.assertEqual(cache.get_all_entities_changed(3), ["user@elsewhere.org"]) + self.assertEqual(cache.get_all_entities_changed(0), None) + + def test_has_any_entity_changed(self): + """ + StreamChangeCache.has_any_entity_changed will return True if any + entities have been changed since the provided stream position, and + False if they have not. If the cache has entries and the provided + stream position is before it, it will return True, otherwise False if + the cache has no entries. + """ + cache = StreamChangeCache("#test", 1) + + # With no entities, it returns False for the past, present, and future. + self.assertFalse(cache.has_any_entity_changed(0)) + self.assertFalse(cache.has_any_entity_changed(1)) + self.assertFalse(cache.has_any_entity_changed(2)) + + # We add an entity + cache.entity_has_changed("user@foo.com", 2) + + # With an entity, it returns True for the past, the stream start + # position, and False for the stream position the entity was changed + # on and ones after it. + self.assertTrue(cache.has_any_entity_changed(0)) + self.assertTrue(cache.has_any_entity_changed(1)) + self.assertFalse(cache.has_any_entity_changed(2)) + self.assertFalse(cache.has_any_entity_changed(3)) + + def test_get_entities_changed(self): + """ + StreamChangeCache.get_entities_changed will return the entities in the + given list that have changed since the provided stream ID. If the + stream position is earlier than the earliest known position, it will + return all of the entities queried for. + """ + cache = StreamChangeCache("#test", 1) + + cache.entity_has_changed("user@foo.com", 2) + cache.entity_has_changed("bar@baz.net", 3) + cache.entity_has_changed("user@elsewhere.org", 4) + + # Query all the entries, but mid-way through the stream. We should only + # get the ones after that point. + self.assertEqual( + cache.get_entities_changed( + ["user@foo.com", "bar@baz.net", "user@elsewhere.org"], stream_pos=2 + ), + set(["bar@baz.net", "user@elsewhere.org"]), + ) + + # Query all the entries mid-way through the stream, but include one + # that doesn't exist in it. We should get back the one that doesn't + # exist, too. + self.assertEqual( + cache.get_entities_changed( + [ + "user@foo.com", + "bar@baz.net", + "user@elsewhere.org", + "not@here.website", + ], + stream_pos=2, + ), + set(["bar@baz.net", "user@elsewhere.org", "not@here.website"]), + ) + + # Query all the entries, but before the first known point. We will get + # all the entries we queried for, including ones that don't exist. + self.assertEqual( + cache.get_entities_changed( + [ + "user@foo.com", + "bar@baz.net", + "user@elsewhere.org", + "not@here.website", + ], + stream_pos=0, + ), + set( + [ + "user@foo.com", + "bar@baz.net", + "user@elsewhere.org", + "not@here.website", + ] + ), + ) + + def test_max_pos(self): + """ + StreamChangeCache.get_max_pos_of_last_change will return the most + recent point where the entity could have changed. If the entity is not + known, the stream start is provided instead. + """ + cache = StreamChangeCache("#test", 1) + + cache.entity_has_changed("user@foo.com", 2) + cache.entity_has_changed("bar@baz.net", 3) + cache.entity_has_changed("user@elsewhere.org", 4) + + # Known entities will return the point where they were changed. + self.assertEqual(cache.get_max_pos_of_last_change("user@foo.com"), 2) + self.assertEqual(cache.get_max_pos_of_last_change("bar@baz.net"), 3) + self.assertEqual(cache.get_max_pos_of_last_change("user@elsewhere.org"), 4) + + # Unknown entities will return the stream start position. + self.assertEqual(cache.get_max_pos_of_last_change("not@here.website"), 1) diff --git a/tests/utils.py b/tests/utils.py index 4e6e45c79b..9cc44a80aa 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -37,11 +37,15 @@ USE_POSTGRES_FOR_TESTS = False @defer.inlineCallbacks -def setup_test_homeserver(name="test", datastore=None, config=None, **kargs): +def setup_test_homeserver(name="test", datastore=None, config=None, reactor=None, + **kargs): """Setup a homeserver suitable for running tests against. Keyword arguments are passed to the Homeserver constructor. If no datastore is supplied a datastore backed by an in-memory sqlite db will be given to the HS. """ + if reactor is None: + from twisted.internet import reactor + if config is None: config = Mock() config.signing_key = [MockKey()] @@ -111,6 +115,7 @@ def setup_test_homeserver(name="test", datastore=None, config=None, **kargs): database_engine=db_engine, room_list_handler=object(), tls_server_context_factory=Mock(), + reactor=reactor, **kargs ) db_conn = hs.get_db_conn() diff --git a/tox.ini b/tox.ini index 99b35f399a..61a20a10cb 100644 --- a/tox.ini +++ b/tox.ini @@ -52,33 +52,41 @@ commands = /usr/bin/find "{toxinidir}" -name '*.pyc' -delete coverage run {env:COVERAGE_OPTS:} --source="{toxinidir}/synapse" \ "{envbindir}/trial" {env:TRIAL_FLAGS:} {posargs:tests/config \ - tests/appservice/test_scheduler.py \ + tests/api/test_filtering.py \ + tests/api/test_ratelimiting.py \ + tests/appservice \ + tests/crypto \ + tests/events \ + tests/handlers/test_appservice.py \ tests/handlers/test_auth.py \ + tests/handlers/test_device.py \ + tests/handlers/test_directory.py \ + tests/handlers/test_e2e_keys.py \ tests/handlers/test_presence.py \ + tests/handlers/test_profile.py \ tests/handlers/test_register.py \ + tests/replication/slave/storage/test_account_data.py \ + tests/replication/slave/storage/test_receipts.py \ tests/storage/test_appservice.py \ + tests/storage/test_background_update.py \ tests/storage/test_base.py \ + tests/storage/test__base.py \ tests/storage/test_client_ips.py \ tests/storage/test_devices.py \ tests/storage/test_end_to_end_keys.py \ tests/storage/test_event_push_actions.py \ + tests/storage/test_keys.py \ + tests/storage/test_presence.py \ tests/storage/test_profile.py \ + tests/storage/test_registration.py \ tests/storage/test_room.py \ + tests/storage/test_user_directory.py \ tests/test_distributor.py \ tests/test_dns.py \ tests/test_preview.py \ tests/test_test_utils.py \ tests/test_types.py \ - tests/util/test_dict_cache.py \ - tests/util/test_expiring_cache.py \ - tests/util/test_file_consumer.py \ - tests/util/test_limiter.py \ - tests/util/test_linearizer.py \ - tests/util/test_logcontext.py \ - tests/util/test_logformatter.py \ - tests/util/test_rwlock.py \ - tests/util/test_snapshot_cache.py \ - tests/util/test_wheel_timer.py} \ + tests/util} \ {env:TOXSUFFIX:} {env:DUMP_COVERAGE_COMMAND:coverage report -m} @@ -94,3 +102,11 @@ basepython = python2.7 deps = flake8 commands = /bin/sh -c "flake8 synapse tests {env:PEP8SUFFIX:}" + + +[testenv:check-newsfragment] +skip_install = True +deps = towncrier>=18.6.0rc1 +commands = + python -m towncrier.check --compare-with=origin/develop +basepython = python3.6 \ No newline at end of file