Add ability to shard the federation sender (#7798)

2024-11-22 01:25:44 +03:00 · 2020-07-10 18:26:36 +01:00 · 2020-07-10 18:26:36 +01:00 · f299441cc6
commit f299441cc6
parent f1245dc3c0
15 changed files with 670 additions and 157 deletions
--- a/changelog.d/7798.feature
+++ b/changelog.d/7798.feature
@ -0,0 +1 @@
+Add experimental support for running multiple federation sender processes.
--- a/docs/sample_config.yaml
+++ b/docs/sample_config.yaml
@ -118,38 +118,6 @@ pid_file: DATADIR/homeserver.pid
 #
 #enable_search: false

-# Restrict federation to the following whitelist of domains.
-# N.B. we recommend also firewalling your federation listener to limit
-# inbound federation traffic as early as possible, rather than relying
-# purely on this application-layer restriction.  If not specified, the
-# default is to whitelist everything.
-#
-#federation_domain_whitelist:
-#  - lon.example.com
-#  - nyc.example.com
-#  - syd.example.com
-
-# Prevent federation requests from being sent to the following
-# blacklist IP address CIDR ranges. If this option is not specified, or
-# specified with an empty list, no ip range blacklist will be enforced.
-#
-# As of Synapse v1.4.0 this option also affects any outbound requests to identity
-# servers provided by user input.
-#
-# (0.0.0.0 and :: are always blacklisted, whether or not they are explicitly
-# listed here, since they correspond to unroutable addresses.)
-#
-federation_ip_range_blacklist:
-  - '127.0.0.0/8'
-  - '10.0.0.0/8'
-  - '172.16.0.0/12'
-  - '192.168.0.0/16'
-  - '100.64.0.0/10'
-  - '169.254.0.0/16'
-  - '::1/128'
-  - 'fe80::/64'
-  - 'fc00::/7'
-
 # List of ports that Synapse should listen on, their purpose and their
 # configuration.
 #
@ -608,6 +576,39 @@ acme:



+# Restrict federation to the following whitelist of domains.
+# N.B. we recommend also firewalling your federation listener to limit
+# inbound federation traffic as early as possible, rather than relying
+# purely on this application-layer restriction.  If not specified, the
+# default is to whitelist everything.
+#
+#federation_domain_whitelist:
+#  - lon.example.com
+#  - nyc.example.com
+#  - syd.example.com
+
+# Prevent federation requests from being sent to the following
+# blacklist IP address CIDR ranges. If this option is not specified, or
+# specified with an empty list, no ip range blacklist will be enforced.
+#
+# As of Synapse v1.4.0 this option also affects any outbound requests to identity
+# servers provided by user input.
+#
+# (0.0.0.0 and :: are always blacklisted, whether or not they are explicitly
+# listed here, since they correspond to unroutable addresses.)
+#
+federation_ip_range_blacklist:
+  - '127.0.0.0/8'
+  - '10.0.0.0/8'
+  - '172.16.0.0/12'
+  - '192.168.0.0/16'
+  - '100.64.0.0/10'
+  - '169.254.0.0/16'
+  - '::1/128'
+  - 'fe80::/64'
+  - 'fc00::/7'
+
+
 ## Caching ##

 # Caching can be configured through the following options.
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@ -511,25 +511,7 @@ class GenericWorkerSlavedStore(
    SearchWorkerStore,
    BaseSlavedStore,
 ):
-    def __init__(self, database, db_conn, hs):
-        super(GenericWorkerSlavedStore, self).__init__(database, db_conn, hs)
-
-        # We pull out the current federation stream position now so that we
-        # always have a known value for the federation position in memory so
-        # that we don't have to bounce via a deferred once when we start the
-        # replication streams.
-        self.federation_out_pos_startup = self._get_federation_out_pos(db_conn)
-
-    def _get_federation_out_pos(self, db_conn):
-        sql = "SELECT stream_id FROM federation_stream_position WHERE type = ?"
-        sql = self.database_engine.convert_param_style(sql)
-
-        txn = db_conn.cursor()
-        txn.execute(sql, ("federation",))
-        rows = txn.fetchall()
-        txn.close()
-
-        return rows[0][0] if rows else -1
+    pass


 class GenericWorkerServer(HomeServer):
@ -812,19 +794,11 @@ class FederationSenderHandler(object):
        self.federation_sender = hs.get_federation_sender()
        self._hs = hs

-        # if the worker is restarted, we want to pick up where we left off in
-        # the replication stream, so load the position from the database.
-        #
-        # XXX is this actually worthwhile? Whenever the master is restarted, we'll
-        # drop some rows anyway (which is mostly fine because we're only dropping
-        # typing and presence notifications). If the replication stream is
-        # unreliable, why do we do all this hoop-jumping to store the position in the
-        # database? See also https://github.com/matrix-org/synapse/issues/7535.
-        #
-        self.federation_position = self.store.federation_out_pos_startup
+        # Stores the latest position in the federation stream we've gotten up
+        # to. This is always set before we use it.
+        self.federation_position = None

        self._fed_position_linearizer = Linearizer(name="_fed_position_linearizer")
-        self._last_ack = self.federation_position

    def on_start(self):
        # There may be some events that are persisted but haven't been sent,
@ -932,7 +906,6 @@ class FederationSenderHandler(object):
                # We ACK this token over replication so that the master can drop
                # its in memory queues
                self._hs.get_tcp_replication().send_federation_ack(current_position)
-                self._last_ack = current_position
        except Exception:
            logger.exception("Error updating federation stream position")

@ -960,7 +933,7 @@ def start(config_options):
    )

    if config.worker_app == "synapse.app.appservice":
-        if config.notify_appservices:
+        if config.appservice.notify_appservices:
            sys.stderr.write(
                "\nThe appservices must be disabled in the main synapse process"
                "\nbefore they can be run in a separate worker."
@ -970,13 +943,13 @@ def start(config_options):
            sys.exit(1)

        # Force the appservice to start since they will be disabled in the main config
-        config.notify_appservices = True
+        config.appservice.notify_appservices = True
    else:
        # For other worker types we force this to off.
-        config.notify_appservices = False
+        config.appservice.notify_appservices = False

    if config.worker_app == "synapse.app.pusher":
-        if config.start_pushers:
+        if config.server.start_pushers:
            sys.stderr.write(
                "\nThe pushers must be disabled in the main synapse process"
                "\nbefore they can be run in a separate worker."
@ -986,13 +959,13 @@ def start(config_options):
            sys.exit(1)

        # Force the pushers to start since they will be disabled in the main config
-        config.start_pushers = True
+        config.server.start_pushers = True
    else:
        # For other worker types we force this to off.
-        config.start_pushers = False
+        config.server.start_pushers = False

    if config.worker_app == "synapse.app.user_dir":
-        if config.update_user_directory:
+        if config.server.update_user_directory:
            sys.stderr.write(
                "\nThe update_user_directory must be disabled in the main synapse process"
                "\nbefore they can be run in a separate worker."
@ -1002,13 +975,13 @@ def start(config_options):
            sys.exit(1)

        # Force the pushers to start since they will be disabled in the main config
-        config.update_user_directory = True
+        config.server.update_user_directory = True
    else:
        # For other worker types we force this to off.
-        config.update_user_directory = False
+        config.server.update_user_directory = False

    if config.worker_app == "synapse.app.federation_sender":
-        if config.send_federation:
+        if config.federation.send_federation:
            sys.stderr.write(
                "\nThe send_federation must be disabled in the main synapse process"
                "\nbefore they can be run in a separate worker."
@ -1018,10 +991,10 @@ def start(config_options):
            sys.exit(1)

        # Force the pushers to start since they will be disabled in the main config
-        config.send_federation = True
+        config.federation.send_federation = True
    else:
        # For other worker types we force this to off.
-        config.send_federation = False
+        config.federation.send_federation = False

    synapse.events.USE_FROZEN_DICTS = config.use_frozen_dicts

--- a/synapse/config/federation.py
+++ b/synapse/config/federation.py
@ -0,0 +1,129 @@
+# -*- coding: utf-8 -*-
+# Copyright 2020 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from hashlib import sha256
+from typing import List, Optional
+
+import attr
+from netaddr import IPSet
+
+from ._base import Config, ConfigError
+
+
+@attr.s
+class ShardedFederationSendingConfig:
+    """Algorithm for choosing which federation sender instance is responsible
+    for which destionation host.
+    """
+
+    instances = attr.ib(type=List[str])
+
+    def should_send_to(self, instance_name: str, destination: str) -> bool:
+        """Whether this instance is responsible for sending transcations for
+        the given host.
+        """
+
+        # If multiple federation senders are not defined we always return true.
+        if not self.instances or len(self.instances) == 1:
+            return True
+
+        # We shard by taking the hash, modulo it by the number of federation
+        # senders and then checking whether this instance matches the instance
+        # at that index.
+        #
+        # (Technically this introduces some bias and is not entirely uniform, but
+        # since the hash is so large the bias is ridiculously small).
+        dest_hash = sha256(destination.encode("utf8")).digest()
+        dest_int = int.from_bytes(dest_hash, byteorder="little")
+        remainder = dest_int % (len(self.instances))
+        return self.instances[remainder] == instance_name
+
+
+class FederationConfig(Config):
+    section = "federation"
+
+    def read_config(self, config, **kwargs):
+        # Whether to send federation traffic out in this process. This only
+        # applies to some federation traffic, and so shouldn't be used to
+        # "disable" federation
+        self.send_federation = config.get("send_federation", True)
+
+        federation_sender_instances = config.get("federation_sender_instances") or []
+        self.federation_shard_config = ShardedFederationSendingConfig(
+            federation_sender_instances
+        )
+
+        # FIXME: federation_domain_whitelist needs sytests
+        self.federation_domain_whitelist = None  # type: Optional[dict]
+        federation_domain_whitelist = config.get("federation_domain_whitelist", None)
+
+        if federation_domain_whitelist is not None:
+            # turn the whitelist into a hash for speed of lookup
+            self.federation_domain_whitelist = {}
+
+            for domain in federation_domain_whitelist:
+                self.federation_domain_whitelist[domain] = True
+
+        self.federation_ip_range_blacklist = config.get(
+            "federation_ip_range_blacklist", []
+        )
+
+        # Attempt to create an IPSet from the given ranges
+        try:
+            self.federation_ip_range_blacklist = IPSet(
+                self.federation_ip_range_blacklist
+            )
+
+            # Always blacklist 0.0.0.0, ::
+            self.federation_ip_range_blacklist.update(["0.0.0.0", "::"])
+        except Exception as e:
+            raise ConfigError(
+                "Invalid range(s) provided in federation_ip_range_blacklist: %s" % e
+            )
+
+    def generate_config_section(self, config_dir_path, server_name, **kwargs):
+        return """\
+        # Restrict federation to the following whitelist of domains.
+        # N.B. we recommend also firewalling your federation listener to limit
+        # inbound federation traffic as early as possible, rather than relying
+        # purely on this application-layer restriction.  If not specified, the
+        # default is to whitelist everything.
+        #
+        #federation_domain_whitelist:
+        #  - lon.example.com
+        #  - nyc.example.com
+        #  - syd.example.com
+
+        # Prevent federation requests from being sent to the following
+        # blacklist IP address CIDR ranges. If this option is not specified, or
+        # specified with an empty list, no ip range blacklist will be enforced.
+        #
+        # As of Synapse v1.4.0 this option also affects any outbound requests to identity
+        # servers provided by user input.
+        #
+        # (0.0.0.0 and :: are always blacklisted, whether or not they are explicitly
+        # listed here, since they correspond to unroutable addresses.)
+        #
+        federation_ip_range_blacklist:
+          - '127.0.0.0/8'
+          - '10.0.0.0/8'
+          - '172.16.0.0/12'
+          - '192.168.0.0/16'
+          - '100.64.0.0/10'
+          - '169.254.0.0/16'
+          - '::1/128'
+          - 'fe80::/64'
+          - 'fc00::/7'
+        """
--- a/synapse/config/homeserver.py
+++ b/synapse/config/homeserver.py
@ -23,6 +23,7 @@ from .cas import CasConfig
 from .consent_config import ConsentConfig
 from .database import DatabaseConfig
 from .emailconfig import EmailConfig
+from .federation import FederationConfig
 from .groups import GroupsConfig
 from .jwt_config import JWTConfig
 from .key import KeyConfig
@ -57,6 +58,7 @@ class HomeServerConfig(RootConfig):
    config_classes = [
        ServerConfig,
        TlsConfig,
+        FederationConfig,
        CacheConfig,
        DatabaseConfig,
        LoggingConfig,
@ -90,4 +92,5 @@ class HomeServerConfig(RootConfig):
        ThirdPartyRulesConfig,
        TracerConfig,
        RedisConfig,
+        FederationConfig,
    ]
--- a/synapse/config/server.py
+++ b/synapse/config/server.py
@ -23,7 +23,6 @@ from typing import Any, Dict, Iterable, List, Optional

 import attr
 import yaml
-from netaddr import IPSet

 from synapse.api.room_versions import KNOWN_ROOM_VERSIONS
 from synapse.http.endpoint import parse_and_validate_server_name
@ -136,11 +135,6 @@ class ServerConfig(Config):
        self.use_frozen_dicts = config.get("use_frozen_dicts", False)
        self.public_baseurl = config.get("public_baseurl")

-        # Whether to send federation traffic out in this process. This only
-        # applies to some federation traffic, and so shouldn't be used to
-        # "disable" federation
-        self.send_federation = config.get("send_federation", True)
-
        # Whether to enable user presence.
        self.use_presence = config.get("use_presence", True)

@ -263,34 +257,6 @@ class ServerConfig(Config):
        # due to resource constraints
        self.admin_contact = config.get("admin_contact", None)

-        # FIXME: federation_domain_whitelist needs sytests
-        self.federation_domain_whitelist = None  # type: Optional[dict]
-        federation_domain_whitelist = config.get("federation_domain_whitelist", None)
-
-        if federation_domain_whitelist is not None:
-            # turn the whitelist into a hash for speed of lookup
-            self.federation_domain_whitelist = {}
-
-            for domain in federation_domain_whitelist:
-                self.federation_domain_whitelist[domain] = True
-
-        self.federation_ip_range_blacklist = config.get(
-            "federation_ip_range_blacklist", []
-        )
-
-        # Attempt to create an IPSet from the given ranges
-        try:
-            self.federation_ip_range_blacklist = IPSet(
-                self.federation_ip_range_blacklist
-            )
-
-            # Always blacklist 0.0.0.0, ::
-            self.federation_ip_range_blacklist.update(["0.0.0.0", "::"])
-        except Exception as e:
-            raise ConfigError(
-                "Invalid range(s) provided in federation_ip_range_blacklist: %s" % e
-            )
-
        if self.public_baseurl is not None:
            if self.public_baseurl[-1] != "/":
                self.public_baseurl += "/"
@ -743,38 +709,6 @@ class ServerConfig(Config):
        #
        #enable_search: false

-        # Restrict federation to the following whitelist of domains.
-        # N.B. we recommend also firewalling your federation listener to limit
-        # inbound federation traffic as early as possible, rather than relying
-        # purely on this application-layer restriction.  If not specified, the
-        # default is to whitelist everything.
-        #
-        #federation_domain_whitelist:
-        #  - lon.example.com
-        #  - nyc.example.com
-        #  - syd.example.com
-
-        # Prevent federation requests from being sent to the following
-        # blacklist IP address CIDR ranges. If this option is not specified, or
-        # specified with an empty list, no ip range blacklist will be enforced.
-        #
-        # As of Synapse v1.4.0 this option also affects any outbound requests to identity
-        # servers provided by user input.
-        #
-        # (0.0.0.0 and :: are always blacklisted, whether or not they are explicitly
-        # listed here, since they correspond to unroutable addresses.)
-        #
-        federation_ip_range_blacklist:
-          - '127.0.0.0/8'
-          - '10.0.0.0/8'
-          - '172.16.0.0/12'
-          - '192.168.0.0/16'
-          - '100.64.0.0/10'
-          - '169.254.0.0/16'
-          - '::1/128'
-          - 'fe80::/64'
-          - 'fc00::/7'
-
        # List of ports that Synapse should listen on, their purpose and their
        # configuration.
        #
--- a/synapse/federation/send_queue.py
+++ b/synapse/federation/send_queue.py
@ -55,6 +55,11 @@ class FederationRemoteSendQueue(object):
        self.notifier = hs.get_notifier()
        self.is_mine_id = hs.is_mine_id

+        # We may have multiple federation sender instances, so we need to track
+        # their positions separately.
+        self._sender_instances = hs.config.federation.federation_shard_config.instances
+        self._sender_positions = {}
+
        # Pending presence map user_id -> UserPresenceState
        self.presence_map = {}  # type: Dict[str, UserPresenceState]

@ -261,7 +266,14 @@ class FederationRemoteSendQueue(object):
    def get_current_token(self):
        return self.pos - 1

-    def federation_ack(self, token):
+    def federation_ack(self, instance_name, token):
+        if self._sender_instances:
+            # If we have configured multiple federation sender instances we need
+            # to track their positions separately, and only clear the queue up
+            # to the token all instances have acked.
+            self._sender_positions[instance_name] = token
+            token = min(self._sender_positions.values())
+
        self._clear_queue_before_pos(token)

    async def get_replication_rows(
--- a/synapse/federation/sender/init.py
+++ b/synapse/federation/sender/init.py
@ -69,6 +69,9 @@ class FederationSender(object):

        self._transaction_manager = TransactionManager(hs)

+        self._instance_name = hs.get_instance_name()
+        self._federation_shard_config = hs.config.federation.federation_shard_config
+
        # map from destination to PerDestinationQueue
        self._per_destination_queues = {}  # type: Dict[str, PerDestinationQueue]

@ -191,7 +194,13 @@ class FederationSender(object):
                        )
                        return

-                    destinations = set(destinations)
+                    destinations = {
+                        d
+                        for d in destinations
+                        if self._federation_shard_config.should_send_to(
+                            self._instance_name, d
+                        )
+                    }

                    if send_on_behalf_of is not None:
                        # If we are sending the event on behalf of another server
@ -322,7 +331,12 @@ class FederationSender(object):

        # Work out which remote servers should be poked and poke them.
        domains = yield self.state.get_current_hosts_in_room(room_id)
-        domains = [d for d in domains if d != self.server_name]
+        domains = [
+            d
+            for d in domains
+            if d != self.server_name
+            and self._federation_shard_config.should_send_to(self._instance_name, d)
+        ]
        if not domains:
            return

@ -427,6 +441,10 @@ class FederationSender(object):
        for destination in destinations:
            if destination == self.server_name:
                continue
+            if not self._federation_shard_config.should_send_to(
+                self._instance_name, destination
+            ):
+                continue
            self._get_per_destination_queue(destination).send_presence(states)

    @measure_func("txnqueue._process_presence")
@ -441,6 +459,12 @@ class FederationSender(object):
            for destination in destinations:
                if destination == self.server_name:
                    continue
+
+                if not self._federation_shard_config.should_send_to(
+                    self._instance_name, destination
+                ):
+                    continue
+
                self._get_per_destination_queue(destination).send_presence(states)

    def build_and_send_edu(
@ -462,6 +486,11 @@ class FederationSender(object):
            logger.info("Not sending EDU to ourselves")
            return

+        if not self._federation_shard_config.should_send_to(
+            self._instance_name, destination
+        ):
+            return
+
        edu = Edu(
            origin=self.server_name,
            destination=destination,
@ -478,6 +507,11 @@ class FederationSender(object):
            edu: edu to send
            key: clobbering key for this edu
        """
+        if not self._federation_shard_config.should_send_to(
+            self._instance_name, edu.destination
+        ):
+            return
+
        queue = self._get_per_destination_queue(edu.destination)
        if key:
            queue.send_keyed_edu(edu, key)
@ -489,6 +523,11 @@ class FederationSender(object):
            logger.warning("Not sending device update to ourselves")
            return

+        if not self._federation_shard_config.should_send_to(
+            self._instance_name, destination
+        ):
+            return
+
        self._get_per_destination_queue(destination).attempt_new_transaction()

    def wake_destination(self, destination: str):
@ -502,6 +541,11 @@ class FederationSender(object):
            logger.warning("Not waking up ourselves")
            return

+        if not self._federation_shard_config.should_send_to(
+            self._instance_name, destination
+        ):
+            return
+
        self._get_per_destination_queue(destination).attempt_new_transaction()

    @staticmethod
--- a/synapse/federation/sender/per_destination_queue.py
+++ b/synapse/federation/sender/per_destination_queue.py
@ -74,6 +74,20 @@ class PerDestinationQueue(object):
        self._clock = hs.get_clock()
        self._store = hs.get_datastore()
        self._transaction_manager = transaction_manager
+        self._instance_name = hs.get_instance_name()
+        self._federation_shard_config = hs.config.federation.federation_shard_config
+
+        self._should_send_on_this_instance = True
+        if not self._federation_shard_config.should_send_to(
+            self._instance_name, destination
+        ):
+            # We don't raise an exception here to avoid taking out any other
+            # processing. We have a guard in `attempt_new_transaction` that
+            # ensure we don't start sending stuff.
+            logger.error(
+                "Create a per destination queue for %s on wrong worker", destination,
+            )
+            self._should_send_on_this_instance = False

        self._destination = destination
        self.transmission_loop_running = False
@ -180,6 +194,14 @@ class PerDestinationQueue(object):
            logger.debug("TX [%s] Transaction already in progress", self._destination)
            return

+        if not self._should_send_on_this_instance:
+            # We don't raise an exception here to avoid taking out any other
+            # processing.
+            logger.error(
+                "Trying to start a transaction to %s on wrong worker", self._destination
+            )
+            return
+
        logger.debug("TX [%s] Starting transaction loop", self._destination)

        run_as_background_process(
--- a/synapse/replication/tcp/commands.py
+++ b/synapse/replication/tcp/commands.py
@ -293,20 +293,22 @@ class FederationAckCommand(Command):

    Format::

-        FEDERATION_ACK <token>
+        FEDERATION_ACK <instance_name> <token>
    """

    NAME = "FEDERATION_ACK"

-    def __init__(self, token):
+    def __init__(self, instance_name, token):
+        self.instance_name = instance_name
        self.token = token

    @classmethod
    def from_line(cls, line):
-        return cls(int(line))
+        instance_name, token = line.split(" ")
+        return cls(instance_name, int(token))

    def to_line(self):
-        return str(self.token)
+        return "%s %s" % (self.instance_name, self.token)


 class RemovePusherCommand(Command):
--- a/synapse/replication/tcp/handler.py
+++ b/synapse/replication/tcp/handler.py
@ -238,7 +238,7 @@ class ReplicationCommandHandler:
        federation_ack_counter.inc()

        if self._federation_sender:
-            self._federation_sender.federation_ack(cmd.token)
+            self._federation_sender.federation_ack(cmd.instance_name, cmd.token)

    async def on_REMOVE_PUSHER(
        self, conn: AbstractConnection, cmd: RemovePusherCommand
@ -527,7 +527,7 @@ class ReplicationCommandHandler:
        """Ack data for the federation stream. This allows the master to drop
        data stored purely in memory.
        """
-        self.send_command(FederationAckCommand(token))
+        self.send_command(FederationAckCommand(self._instance_name, token))

    def send_user_sync(
        self, instance_id: str, user_id: str, is_syncing: bool, last_sync_ms: int
--- a/synapse/storage/data_stores/main/schema/delta/58/10federation_pos_instance_name.sql
+++ b/synapse/storage/data_stores/main/schema/delta/58/10federation_pos_instance_name.sql
@ -0,0 +1,22 @@
+/* Copyright 2020 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+-- We need to store the stream positions by instance in a sharded config world.
+--
+-- We default to master as we want the column to be NOT NULL and we correctly
+-- reset the instance name to match the config each time we start up.
+ALTER TABLE federation_stream_position ADD COLUMN instance_name TEXT NOT NULL DEFAULT 'master';
+
+CREATE UNIQUE INDEX federation_stream_position_instance ON federation_stream_position(type, instance_name);
--- a/synapse/storage/data_stores/main/stream.py
+++ b/synapse/storage/data_stores/main/stream.py
@ -45,7 +45,7 @@ from twisted.internet import defer
 from synapse.logging.context import make_deferred_yieldable, run_in_background
 from synapse.storage._base import SQLBaseStore
 from synapse.storage.data_stores.main.events_worker import EventsWorkerStore
-from synapse.storage.database import Database
+from synapse.storage.database import Database, make_in_list_sql_clause
 from synapse.storage.engines import PostgresEngine
 from synapse.types import RoomStreamToken
 from synapse.util.caches.stream_change_cache import StreamChangeCache
@ -253,6 +253,16 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
    def __init__(self, database: Database, db_conn, hs):
        super(StreamWorkerStore, self).__init__(database, db_conn, hs)

+        self._instance_name = hs.get_instance_name()
+        self._send_federation = hs.should_send_federation()
+        self._federation_shard_config = hs.config.federation.federation_shard_config
+
+        # If we're a process that sends federation we may need to reset the
+        # `federation_stream_position` table to match the current sharding
+        # config. We don't do this now as otherwise two processes could conflict
+        # during startup which would cause one to die.
+        self._need_to_reset_federation_stream_positions = self._send_federation
+
        events_max = self.get_room_max_stream_ordering()
        event_cache_prefill, min_event_val = self.db.get_cache_dict(
            db_conn,
@ -793,22 +803,95 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):

        return upper_bound, events

-    def get_federation_out_pos(self, typ):
-        return self.db.simple_select_one_onecol(
+    async def get_federation_out_pos(self, typ: str) -> int:
+        if self._need_to_reset_federation_stream_positions:
+            await self.db.runInteraction(
+                "_reset_federation_positions_txn", self._reset_federation_positions_txn
+            )
+            self._need_to_reset_federation_stream_positions = False
+
+        return await self.db.simple_select_one_onecol(
            table="federation_stream_position",
            retcol="stream_id",
-            keyvalues={"type": typ},
+            keyvalues={"type": typ, "instance_name": self._instance_name},
            desc="get_federation_out_pos",
        )

-    def update_federation_out_pos(self, typ, stream_id):
-        return self.db.simple_update_one(
+    async def update_federation_out_pos(self, typ, stream_id):
+        if self._need_to_reset_federation_stream_positions:
+            await self.db.runInteraction(
+                "_reset_federation_positions_txn", self._reset_federation_positions_txn
+            )
+            self._need_to_reset_federation_stream_positions = False
+
+        return await self.db.simple_update_one(
            table="federation_stream_position",
-            keyvalues={"type": typ},
+            keyvalues={"type": typ, "instance_name": self._instance_name},
            updatevalues={"stream_id": stream_id},
            desc="update_federation_out_pos",
        )

+    def _reset_federation_positions_txn(self, txn):
+        """Fiddles with the `federation_stream_position` table to make it match
+        the configured federation sender instances during start up.
+        """
+
+        # The federation sender instances may have changed, so we need to
+        # massage the `federation_stream_position` table to have a row per type
+        # per instance sending federation. If there is a mismatch we update the
+        # table with the correct rows using the *minimum* stream ID seen. This
+        # may result in resending of events/EDUs to remote servers, but that is
+        # preferable to dropping them.
+
+        if not self._send_federation:
+            return
+
+        # Pull out the configured instances. If we don't have a shard config then
+        # we assume that we're the only instance sending.
+        configured_instances = self._federation_shard_config.instances
+        if not configured_instances:
+            configured_instances = [self._instance_name]
+        elif self._instance_name not in configured_instances:
+            return
+
+        instances_in_table = self.db.simple_select_onecol_txn(
+            txn,
+            table="federation_stream_position",
+            keyvalues={},
+            retcol="instance_name",
+        )
+
+        if set(instances_in_table) == set(configured_instances):
+            # Nothing to do
+            return
+
+        sql = """
+            SELECT type, MIN(stream_id) FROM federation_stream_position
+            GROUP BY type
+        """
+        txn.execute(sql)
+        min_positions = dict(txn)  # Map from type -> min position
+
+        # Ensure we do actually have some values here
+        assert set(min_positions) == {"federation", "events"}
+
+        sql = """
+            DELETE FROM federation_stream_position
+            WHERE NOT (%s)
+        """
+        clause, args = make_in_list_sql_clause(
+            txn.database_engine, "instance_name", configured_instances
+        )
+        txn.execute(sql % (clause,), args)
+
+        for typ, stream_id in min_positions.items():
+            self.db.simple_upsert_txn(
+                txn,
+                table="federation_stream_position",
+                keyvalues={"type": typ, "instance_name": self._instance_name},
+                values={"stream_id": stream_id},
+            )
+
    def has_room_changed_since(self, room_id, stream_id):
        return self._events_stream_cache.has_entity_changed(room_id, stream_id)

--- a/tests/replication/test_federation_ack.py
+++ b/tests/replication/test_federation_ack.py
@ -32,6 +32,7 @@ class FederationAckTestCase(HomeserverTestCase):

    def make_homeserver(self, reactor, clock):
        hs = self.setup_test_homeserver(homeserverToUse=GenericWorkerServer)
+
        return hs

    def test_federation_ack_sent(self):
--- a/tests/replication/test_federation_sender_shard.py
+++ b/tests/replication/test_federation_sender_shard.py
@ -0,0 +1,286 @@
+# -*- coding: utf-8 -*-
+# Copyright 2020 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+
+from mock import Mock
+
+from twisted.internet import defer
+
+from synapse.api.constants import EventTypes, Membership
+from synapse.app.generic_worker import GenericWorkerServer
+from synapse.events.builder import EventBuilderFactory
+from synapse.replication.http import streams
+from synapse.replication.tcp.handler import ReplicationCommandHandler
+from synapse.replication.tcp.protocol import ClientReplicationStreamProtocol
+from synapse.replication.tcp.resource import ReplicationStreamProtocolFactory
+from synapse.rest.admin import register_servlets_for_client_rest_resource
+from synapse.rest.client.v1 import login, room
+from synapse.types import UserID
+
+from tests import unittest
+from tests.server import FakeTransport
+
+logger = logging.getLogger(__name__)
+
+
+class BaseStreamTestCase(unittest.HomeserverTestCase):
+    """Base class for tests of the replication streams"""
+
+    servlets = [
+        streams.register_servlets,
+    ]
+
+    def prepare(self, reactor, clock, hs):
+        # build a replication server
+        self.server_factory = ReplicationStreamProtocolFactory(hs)
+        self.streamer = hs.get_replication_streamer()
+
+        store = hs.get_datastore()
+        self.database = store.db
+
+        self.reactor.lookups["testserv"] = "1.2.3.4"
+
+    def default_config(self):
+        conf = super().default_config()
+        conf["send_federation"] = False
+        return conf
+
+    def make_worker_hs(self, extra_config={}):
+        config = self._get_worker_hs_config()
+        config.update(extra_config)
+
+        mock_federation_client = Mock(spec=["put_json"])
+        mock_federation_client.put_json.side_effect = lambda *_, **__: defer.succeed({})
+
+        worker_hs = self.setup_test_homeserver(
+            http_client=mock_federation_client,
+            homeserverToUse=GenericWorkerServer,
+            config=config,
+            reactor=self.reactor,
+        )
+
+        store = worker_hs.get_datastore()
+        store.db._db_pool = self.database._db_pool
+
+        repl_handler = ReplicationCommandHandler(worker_hs)
+        client = ClientReplicationStreamProtocol(
+            worker_hs, "client", "test", self.clock, repl_handler,
+        )
+        server = self.server_factory.buildProtocol(None)
+
+        client_transport = FakeTransport(server, self.reactor)
+        client.makeConnection(client_transport)
+
+        server_transport = FakeTransport(client, self.reactor)
+        server.makeConnection(server_transport)
+
+        return worker_hs
+
+    def _get_worker_hs_config(self) -> dict:
+        config = self.default_config()
+        config["worker_app"] = "synapse.app.federation_sender"
+        config["worker_replication_host"] = "testserv"
+        config["worker_replication_http_port"] = "8765"
+        return config
+
+    def replicate(self):
+        """Tell the master side of replication that something has happened, and then
+        wait for the replication to occur.
+        """
+        self.streamer.on_notifier_poke()
+        self.pump()
+
+    def create_room_with_remote_server(self, user, token, remote_server="other_server"):
+        room = self.helper.create_room_as(user, tok=token)
+        store = self.hs.get_datastore()
+        federation = self.hs.get_handlers().federation_handler
+
+        prev_event_ids = self.get_success(store.get_latest_event_ids_in_room(room))
+        room_version = self.get_success(store.get_room_version(room))
+
+        factory = EventBuilderFactory(self.hs)
+        factory.hostname = remote_server
+
+        user_id = UserID("user", remote_server).to_string()
+
+        event_dict = {
+            "type": EventTypes.Member,
+            "state_key": user_id,
+            "content": {"membership": Membership.JOIN},
+            "sender": user_id,
+            "room_id": room,
+        }
+
+        builder = factory.for_room_version(room_version, event_dict)
+        join_event = self.get_success(builder.build(prev_event_ids))
+
+        self.get_success(federation.on_send_join_request(remote_server, join_event))
+        self.replicate()
+
+        return room
+
+
+class FederationSenderTestCase(BaseStreamTestCase):
+    servlets = [
+        login.register_servlets,
+        register_servlets_for_client_rest_resource,
+        room.register_servlets,
+    ]
+
+    def test_send_event_single_sender(self):
+        """Test that using a single federation sender worker correctly sends a
+        new event.
+        """
+        worker_hs = self.make_worker_hs({"send_federation": True})
+        mock_client = worker_hs.get_http_client()
+
+        user = self.register_user("user", "pass")
+        token = self.login("user", "pass")
+
+        room = self.create_room_with_remote_server(user, token)
+
+        mock_client.put_json.reset_mock()
+
+        self.create_and_send_event(room, UserID.from_string(user))
+        self.replicate()
+
+        # Assert that the event was sent out over federation.
+        mock_client.put_json.assert_called()
+        self.assertEqual(mock_client.put_json.call_args[0][0], "other_server")
+        self.assertTrue(mock_client.put_json.call_args[1]["data"].get("pdus"))
+
+    def test_send_event_sharded(self):
+        """Test that using two federation sender workers correctly sends
+        new events.
+        """
+        worker1 = self.make_worker_hs(
+            {
+                "send_federation": True,
+                "worker_name": "sender1",
+                "federation_sender_instances": ["sender1", "sender2"],
+            }
+        )
+        mock_client1 = worker1.get_http_client()
+
+        worker2 = self.make_worker_hs(
+            {
+                "send_federation": True,
+                "worker_name": "sender2",
+                "federation_sender_instances": ["sender1", "sender2"],
+            }
+        )
+        mock_client2 = worker2.get_http_client()
+
+        user = self.register_user("user2", "pass")
+        token = self.login("user2", "pass")
+
+        sent_on_1 = False
+        sent_on_2 = False
+        for i in range(20):
+            server_name = "other_server_%d" % (i,)
+            room = self.create_room_with_remote_server(user, token, server_name)
+            mock_client1.reset_mock()
+            mock_client2.reset_mock()
+
+            self.create_and_send_event(room, UserID.from_string(user))
+            self.replicate()
+
+            if mock_client1.put_json.called:
+                sent_on_1 = True
+                mock_client2.put_json.assert_not_called()
+                self.assertEqual(mock_client1.put_json.call_args[0][0], server_name)
+                self.assertTrue(mock_client1.put_json.call_args[1]["data"].get("pdus"))
+            elif mock_client2.put_json.called:
+                sent_on_2 = True
+                mock_client1.put_json.assert_not_called()
+                self.assertEqual(mock_client2.put_json.call_args[0][0], server_name)
+                self.assertTrue(mock_client2.put_json.call_args[1]["data"].get("pdus"))
+            else:
+                raise AssertionError(
+                    "Expected send transaction from one or the other sender"
+                )
+
+            if sent_on_1 and sent_on_2:
+                break
+
+        self.assertTrue(sent_on_1)
+        self.assertTrue(sent_on_2)
+
+    def test_send_typing_sharded(self):
+        """Test that using two federation sender workers correctly sends
+        new typing EDUs.
+        """
+        worker1 = self.make_worker_hs(
+            {
+                "send_federation": True,
+                "worker_name": "sender1",
+                "federation_sender_instances": ["sender1", "sender2"],
+            }
+        )
+        mock_client1 = worker1.get_http_client()
+
+        worker2 = self.make_worker_hs(
+            {
+                "send_federation": True,
+                "worker_name": "sender2",
+                "federation_sender_instances": ["sender1", "sender2"],
+            }
+        )
+        mock_client2 = worker2.get_http_client()
+
+        user = self.register_user("user3", "pass")
+        token = self.login("user3", "pass")
+
+        typing_handler = self.hs.get_typing_handler()
+
+        sent_on_1 = False
+        sent_on_2 = False
+        for i in range(20):
+            server_name = "other_server_%d" % (i,)
+            room = self.create_room_with_remote_server(user, token, server_name)
+            mock_client1.reset_mock()
+            mock_client2.reset_mock()
+
+            self.get_success(
+                typing_handler.started_typing(
+                    target_user=UserID.from_string(user),
+                    auth_user=UserID.from_string(user),
+                    room_id=room,
+                    timeout=20000,
+                )
+            )
+
+            self.replicate()
+
+            if mock_client1.put_json.called:
+                sent_on_1 = True
+                mock_client2.put_json.assert_not_called()
+                self.assertEqual(mock_client1.put_json.call_args[0][0], server_name)
+                self.assertTrue(mock_client1.put_json.call_args[1]["data"].get("edus"))
+            elif mock_client2.put_json.called:
+                sent_on_2 = True
+                mock_client1.put_json.assert_not_called()
+                self.assertEqual(mock_client2.put_json.call_args[0][0], server_name)
+                self.assertTrue(mock_client2.put_json.call_args[1]["data"].get("edus"))
+            else:
+                raise AssertionError(
+                    "Expected send transaction from one or the other sender"
+                )
+
+            if sent_on_1 and sent_on_2:
+                break
+
+        self.assertTrue(sent_on_1)
+        self.assertTrue(sent_on_2)
				`@ -0,0 +1 @@`
				`Add experimental support for running multiple federation sender processes.`