Servers-known-about statistic (#5981)

This commit is contained in:
Amber Brown 2019-09-07 01:45:51 +10:00 committed by GitHub
parent 78801e7f9e
commit 55d5b3af88
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 225 additions and 59 deletions

1
changelog.d/5981.feature Normal file
View file

@ -0,0 +1 @@
Setting metrics_flags.known_servers to True in the configuration will publish the synapse_federation_known_servers metric over Prometheus. This represents the total number of servers your server knows about (i.e. is in rooms with), including itself.

View file

@ -958,6 +958,16 @@ account_threepid_delegates:
#sentry: #sentry:
# dsn: "..." # dsn: "..."
# Flags to enable Prometheus metrics which are not suitable to be
# enabled by default, either for performance reasons or limited use.
#
metrics_flags:
# Publish synapse_federation_known_servers, a g auge of the number of
# servers this homeserver knows about, including itself. May cause
# performance problems on large homeservers.
#
#known_servers: true
# Whether or not to report anonymized homeserver usage statistics. # Whether or not to report anonymized homeserver usage statistics.
# report_stats: true|false # report_stats: true|false

View file

@ -1,5 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2015, 2016 OpenMarket Ltd # Copyright 2015, 2016 OpenMarket Ltd
# Copyright 2019 The Matrix.org Foundation C.I.C.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
@ -13,6 +14,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import attr
from ._base import Config, ConfigError from ._base import Config, ConfigError
MISSING_SENTRY = """Missing sentry-sdk library. This is required to enable sentry MISSING_SENTRY = """Missing sentry-sdk library. This is required to enable sentry
@ -20,6 +23,18 @@ MISSING_SENTRY = """Missing sentry-sdk library. This is required to enable sentr
""" """
@attr.s
class MetricsFlags(object):
known_servers = attr.ib(default=False, validator=attr.validators.instance_of(bool))
@classmethod
def all_off(cls):
"""
Instantiate the flags with all options set to off.
"""
return cls(**{x.name: False for x in attr.fields(cls)})
class MetricsConfig(Config): class MetricsConfig(Config):
def read_config(self, config, **kwargs): def read_config(self, config, **kwargs):
self.enable_metrics = config.get("enable_metrics", False) self.enable_metrics = config.get("enable_metrics", False)
@ -27,6 +42,12 @@ class MetricsConfig(Config):
self.metrics_port = config.get("metrics_port") self.metrics_port = config.get("metrics_port")
self.metrics_bind_host = config.get("metrics_bind_host", "127.0.0.1") self.metrics_bind_host = config.get("metrics_bind_host", "127.0.0.1")
if self.enable_metrics:
_metrics_config = config.get("metrics_flags") or {}
self.metrics_flags = MetricsFlags(**_metrics_config)
else:
self.metrics_flags = MetricsFlags.all_off()
self.sentry_enabled = "sentry" in config self.sentry_enabled = "sentry" in config
if self.sentry_enabled: if self.sentry_enabled:
try: try:
@ -58,6 +79,16 @@ class MetricsConfig(Config):
#sentry: #sentry:
# dsn: "..." # dsn: "..."
# Flags to enable Prometheus metrics which are not suitable to be
# enabled by default, either for performance reasons or limited use.
#
metrics_flags:
# Publish synapse_federation_known_servers, a g auge of the number of
# servers this homeserver knows about, including itself. May cause
# performance problems on large homeservers.
#
#known_servers: true
# Whether or not to report anonymized homeserver usage statistics. # Whether or not to report anonymized homeserver usage statistics.
""" """

View file

@ -24,8 +24,10 @@ from canonicaljson import json
from twisted.internet import defer from twisted.internet import defer
from synapse.api.constants import EventTypes, Membership from synapse.api.constants import EventTypes, Membership
from synapse.metrics import LaterGauge
from synapse.metrics.background_process_metrics import run_as_background_process from synapse.metrics.background_process_metrics import run_as_background_process
from synapse.storage._base import LoggingTransaction from synapse.storage._base import LoggingTransaction
from synapse.storage.engines import Sqlite3Engine
from synapse.storage.events_worker import EventsWorkerStore from synapse.storage.events_worker import EventsWorkerStore
from synapse.types import get_domain_from_id from synapse.types import get_domain_from_id
from synapse.util.async_helpers import Linearizer from synapse.util.async_helpers import Linearizer
@ -74,6 +76,63 @@ class RoomMemberWorkerStore(EventsWorkerStore):
self._check_safe_current_state_events_membership_updated_txn(txn) self._check_safe_current_state_events_membership_updated_txn(txn)
txn.close() txn.close()
if self.hs.config.metrics_flags.known_servers:
self._known_servers_count = 1
self.hs.get_clock().looping_call(
run_as_background_process,
60 * 1000,
"_count_known_servers",
self._count_known_servers,
)
self.hs.get_clock().call_later(
1000,
run_as_background_process,
"_count_known_servers",
self._count_known_servers,
)
LaterGauge(
"synapse_federation_known_servers",
"",
[],
lambda: self._known_servers_count,
)
@defer.inlineCallbacks
def _count_known_servers(self):
"""
Count the servers that this server knows about.
The statistic is stored on the class for the
`synapse_federation_known_servers` LaterGauge to collect.
"""
def _transact(txn):
if isinstance(self.database_engine, Sqlite3Engine):
query = """
SELECT COUNT(DISTINCT substr(out.user_id, pos+1))
FROM (
SELECT rm.user_id as user_id, instr(rm.user_id, ':')
AS pos FROM room_memberships as rm
INNER JOIN current_state_events as c ON rm.event_id = c.event_id
WHERE c.type = 'm.room.member'
) as out
"""
else:
query = """
SELECT COUNT(DISTINCT split_part(state_key, ':', 2))
FROM current_state_events
WHERE type = 'm.room.member' AND membership = 'join';
"""
txn.execute(query)
return list(txn)[0][0]
count = yield self.runInteraction("get_known_servers", _transact)
# We always know about ourselves, even if we have nothing in
# room_memberships (for example, the server is new).
self._known_servers_count = max([count, 1])
return self._known_servers_count
def _check_safe_current_state_events_membership_updated_txn(self, txn): def _check_safe_current_state_events_membership_updated_txn(self, txn):
"""Checks if it is safe to assume the new current_state_events """Checks if it is safe to assume the new current_state_events
membership column is up to date membership column is up to date

View file

@ -17,6 +17,8 @@ import os.path
import re import re
import shutil import shutil
import tempfile import tempfile
from contextlib import redirect_stdout
from io import StringIO
from synapse.config.homeserver import HomeServerConfig from synapse.config.homeserver import HomeServerConfig
@ -32,17 +34,18 @@ class ConfigGenerationTestCase(unittest.TestCase):
shutil.rmtree(self.dir) shutil.rmtree(self.dir)
def test_generate_config_generates_files(self): def test_generate_config_generates_files(self):
HomeServerConfig.load_or_generate_config( with redirect_stdout(StringIO()):
"", HomeServerConfig.load_or_generate_config(
[ "",
"--generate-config", [
"-c", "--generate-config",
self.file, "-c",
"--report-stats=yes", self.file,
"-H", "--report-stats=yes",
"lemurs.win", "-H",
], "lemurs.win",
) ],
)
self.assertSetEqual( self.assertSetEqual(
set(["homeserver.yaml", "lemurs.win.log.config", "lemurs.win.signing.key"]), set(["homeserver.yaml", "lemurs.win.log.config", "lemurs.win.signing.key"]),

View file

@ -15,6 +15,8 @@
import os.path import os.path
import shutil import shutil
import tempfile import tempfile
from contextlib import redirect_stdout
from io import StringIO
import yaml import yaml
@ -26,7 +28,6 @@ from tests import unittest
class ConfigLoadingTestCase(unittest.TestCase): class ConfigLoadingTestCase(unittest.TestCase):
def setUp(self): def setUp(self):
self.dir = tempfile.mkdtemp() self.dir = tempfile.mkdtemp()
print(self.dir)
self.file = os.path.join(self.dir, "homeserver.yaml") self.file = os.path.join(self.dir, "homeserver.yaml")
def tearDown(self): def tearDown(self):
@ -94,18 +95,27 @@ class ConfigLoadingTestCase(unittest.TestCase):
) )
self.assertTrue(config.enable_registration) self.assertTrue(config.enable_registration)
def test_stats_enabled(self):
self.generate_config_and_remove_lines_containing("enable_metrics")
self.add_lines_to_config(["enable_metrics: true"])
# The default Metrics Flags are off by default.
config = HomeServerConfig.load_config("", ["-c", self.file])
self.assertFalse(config.metrics_flags.known_servers)
def generate_config(self): def generate_config(self):
HomeServerConfig.load_or_generate_config( with redirect_stdout(StringIO()):
"", HomeServerConfig.load_or_generate_config(
[ "",
"--generate-config", [
"-c", "--generate-config",
self.file, "-c",
"--report-stats=yes", self.file,
"-H", "--report-stats=yes",
"lemurs.win", "-H",
], "lemurs.win",
) ],
)
def generate_config_and_remove_lines_containing(self, needle): def generate_config_and_remove_lines_containing(self, needle):
self.generate_config() self.generate_config()

View file

@ -1,5 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2014-2016 OpenMarket Ltd # Copyright 2014-2016 OpenMarket Ltd
# Copyright 2019 The Matrix.org Foundation C.I.C.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
@ -13,78 +14,129 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from unittest.mock import Mock
from mock import Mock
from twisted.internet import defer
from synapse.api.constants import EventTypes, Membership from synapse.api.constants import EventTypes, Membership
from synapse.api.room_versions import RoomVersions from synapse.api.room_versions import RoomVersions
from synapse.types import Requester, RoomID, UserID from synapse.rest.admin import register_servlets_for_client_rest_resource
from synapse.rest.client.v1 import login, room
from synapse.types import Requester, UserID
from tests import unittest from tests import unittest
from tests.utils import create_room, setup_test_homeserver
class RoomMemberStoreTestCase(unittest.TestCase): class RoomMemberStoreTestCase(unittest.HomeserverTestCase):
@defer.inlineCallbacks
def setUp(self): servlets = [
hs = yield setup_test_homeserver( login.register_servlets,
self.addCleanup, resource_for_federation=Mock(), http_client=None register_servlets_for_client_rest_resource,
room.register_servlets,
]
def make_homeserver(self, reactor, clock):
hs = self.setup_test_homeserver(
resource_for_federation=Mock(), http_client=None
) )
return hs
def prepare(self, reactor, clock, hs):
# We can't test the RoomMemberStore on its own without the other event # We can't test the RoomMemberStore on its own without the other event
# storage logic # storage logic
self.store = hs.get_datastore() self.store = hs.get_datastore()
self.event_builder_factory = hs.get_event_builder_factory() self.event_builder_factory = hs.get_event_builder_factory()
self.event_creation_handler = hs.get_event_creation_handler() self.event_creation_handler = hs.get_event_creation_handler()
self.u_alice = UserID.from_string("@alice:test") self.u_alice = self.register_user("alice", "pass")
self.u_bob = UserID.from_string("@bob:test") self.t_alice = self.login("alice", "pass")
self.u_bob = self.register_user("bob", "pass")
# User elsewhere on another host # User elsewhere on another host
self.u_charlie = UserID.from_string("@charlie:elsewhere") self.u_charlie = UserID.from_string("@charlie:elsewhere")
self.room = RoomID.from_string("!abc123:test")
yield create_room(hs, self.room.to_string(), self.u_alice.to_string())
@defer.inlineCallbacks
def inject_room_member(self, room, user, membership, replaces_state=None): def inject_room_member(self, room, user, membership, replaces_state=None):
builder = self.event_builder_factory.for_room_version( builder = self.event_builder_factory.for_room_version(
RoomVersions.V1, RoomVersions.V1,
{ {
"type": EventTypes.Member, "type": EventTypes.Member,
"sender": user.to_string(), "sender": user,
"state_key": user.to_string(), "state_key": user,
"room_id": room.to_string(), "room_id": room,
"content": {"membership": membership}, "content": {"membership": membership},
}, },
) )
event, context = yield self.event_creation_handler.create_new_client_event( event, context = self.get_success(
builder self.event_creation_handler.create_new_client_event(builder)
) )
yield self.store.persist_event(event, context) self.get_success(self.store.persist_event(event, context))
return event return event
@defer.inlineCallbacks
def test_one_member(self): def test_one_member(self):
yield self.inject_room_member(self.room, self.u_alice, Membership.JOIN)
self.assertEquals( # Alice creates the room, and is automatically joined
[self.room.to_string()], self.room = self.helper.create_room_as(self.u_alice, tok=self.t_alice)
[
m.room_id rooms_for_user = self.get_success(
for m in ( self.store.get_rooms_for_user_where_membership_is(
yield self.store.get_rooms_for_user_where_membership_is( self.u_alice, [Membership.JOIN]
self.u_alice.to_string(), [Membership.JOIN] )
)
)
],
) )
self.assertEquals([self.room], [m.room_id for m in rooms_for_user])
def test_count_known_servers(self):
"""
_count_known_servers will calculate how many servers are in a room.
"""
self.room = self.helper.create_room_as(self.u_alice, tok=self.t_alice)
self.inject_room_member(self.room, self.u_bob, Membership.JOIN)
self.inject_room_member(self.room, self.u_charlie.to_string(), Membership.JOIN)
servers = self.get_success(self.store._count_known_servers())
self.assertEqual(servers, 2)
def test_count_known_servers_stat_counter_disabled(self):
"""
If enabled, the metrics for how many servers are known will be counted.
"""
self.assertTrue("_known_servers_count" not in self.store.__dict__.keys())
self.room = self.helper.create_room_as(self.u_alice, tok=self.t_alice)
self.inject_room_member(self.room, self.u_bob, Membership.JOIN)
self.inject_room_member(self.room, self.u_charlie.to_string(), Membership.JOIN)
self.pump(20)
self.assertTrue("_known_servers_count" not in self.store.__dict__.keys())
@unittest.override_config(
{"enable_metrics": True, "metrics_flags": {"known_servers": True}}
)
def test_count_known_servers_stat_counter_enabled(self):
"""
If enabled, the metrics for how many servers are known will be counted.
"""
# Initialises to 1 -- itself
self.assertEqual(self.store._known_servers_count, 1)
self.pump(20)
# No rooms have been joined, so technically the SQL returns 0, but it
# will still say it knows about itself.
self.assertEqual(self.store._known_servers_count, 1)
self.room = self.helper.create_room_as(self.u_alice, tok=self.t_alice)
self.inject_room_member(self.room, self.u_bob, Membership.JOIN)
self.inject_room_member(self.room, self.u_charlie.to_string(), Membership.JOIN)
self.pump(20)
# It now knows about Charlie's server.
self.assertEqual(self.store._known_servers_count, 2)
class CurrentStateMembershipUpdateTestCase(unittest.HomeserverTestCase): class CurrentStateMembershipUpdateTestCase(unittest.HomeserverTestCase):
def prepare(self, reactor, clock, homeserver): def prepare(self, reactor, clock, homeserver):