From 985953f5287fb791c976ab16bee481c8e3280881 Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Fri, 4 Oct 2019 12:05:24 +0100 Subject: [PATCH] fix misshandling mau reaping where reserved users are present --- synapse/storage/monthly_active_users.py | 58 +++++++++++++++------- tests/storage/test_monthly_active_users.py | 49 ++++++++++++++++++ 2 files changed, 89 insertions(+), 18 deletions(-) diff --git a/synapse/storage/monthly_active_users.py b/synapse/storage/monthly_active_users.py index 752e9788a2..0df2119258 100644 --- a/synapse/storage/monthly_active_users.py +++ b/synapse/storage/monthly_active_users.py @@ -97,6 +97,7 @@ class MonthlyActiveUsersStore(SQLBaseStore): txn.execute(sql, query_args) + max_mau_value = self.hs.config.max_mau_value if self.hs.config.limit_usage_by_mau: # If MAU user count still exceeds the MAU threshold, then delete on # a least recently active basis. @@ -106,29 +107,44 @@ class MonthlyActiveUsersStore(SQLBaseStore): # While Postgres does not require 'LIMIT', but also does not support # negative LIMIT values. So there is no way to write it that both can # support - safe_guard = self.hs.config.max_mau_value - len(self.reserved_users) - # Must be greater than zero for postgres - safe_guard = safe_guard if safe_guard > 0 else 0 - query_args = [safe_guard] - - base_sql = """ - DELETE FROM monthly_active_users - WHERE user_id NOT IN ( - SELECT user_id FROM monthly_active_users - ORDER BY timestamp DESC - LIMIT ? + if len(self.reserved_users) == 0: + sql = """ + DELETE FROM monthly_active_users + WHERE user_id NOT IN ( + SELECT user_id FROM monthly_active_users + ORDER BY timestamp DESC + LIMIT ? ) - """ + """ + txn.execute(sql, (max_mau_value,)) # Need if/else since 'AND user_id NOT IN ({})' fails on Postgres # when len(reserved_users) == 0. Works fine on sqlite. - if len(self.reserved_users) > 0: - query_args.extend(self.reserved_users) - sql = base_sql + """ AND user_id NOT IN ({})""".format( + else: + safe_guard = max_mau_value - len(self.reserved_users) + # Must be greater than zero for postgres + safe_guard = safe_guard if safe_guard > 0 else 0 + # It is important to filter reserved users twice to guard + # against the case where the reserved user is present in the + # SELECT, meaning that a legitmate mau is deleted. + sql = """ + DELETE FROM monthly_active_users + WHERE user_id NOT IN ( + SELECT user_id FROM monthly_active_users + WHERE user_id NOT IN ({})""".format( + ",".join(questionmarks) + ) + """ + ORDER BY timestamp DESC + LIMIT ? + ) + AND user_id NOT IN ({})""".format( ",".join(questionmarks) ) - else: - sql = base_sql - txn.execute(sql, query_args) + query_args = [] + query_args.extend(self.reserved_users) + query_args.append(safe_guard) + query_args.extend(self.reserved_users) + + txn.execute(sql, query_args) yield self.runInteraction("reap_monthly_active_users", _reap_users) # It seems poor to invalidate the whole cache, Postgres supports @@ -167,12 +183,18 @@ class MonthlyActiveUsersStore(SQLBaseStore): Defered[int]: Number of real reserved users """ count = 0 + users = () for tp in self.hs.config.mau_limits_reserved_threepids: user_id = yield self.hs.get_datastore().get_user_id_by_threepid( tp["medium"], tp["address"] ) if user_id: count = count + 1 + users = users + (user_id,) + + # Update reserved_users to ensure it stays in sync, this is important + # for reaping. + self.reserved_users = users return count @defer.inlineCallbacks diff --git a/tests/storage/test_monthly_active_users.py b/tests/storage/test_monthly_active_users.py index 1494650d10..62aa2bfed7 100644 --- a/tests/storage/test_monthly_active_users.py +++ b/tests/storage/test_monthly_active_users.py @@ -158,6 +158,54 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase): count = self.store.get_monthly_active_count() self.assertEquals(self.get_success(count), 0) + def test_reap_monthly_active_users_reserved_users(self): + """ Tests that reaping correctly handles reaping where reserved users are + present""" + + self.hs.config.max_mau_value = 5 + initial_users = 5 + reserved_user_number = initial_users - 1 + threepids = [] + for i in range(initial_users): + user = "@user%d:server" % i + email = "user%d@example.com" % i + self.store.upsert_monthly_active_user(user) + threepids.append({"medium": "email", "address": email},) + # Need to ensure that the most recent entries in the + # monthly_active_users table are reserved + now = int(self.hs.get_clock().time_msec()) + if i != 0: + self.store.register_user(user_id=user, password_hash=None) + self.pump() + self.store.user_add_threepid(user, "email", email, now, now) + + self.hs.config.mau_limits_reserved_threepids = threepids + self.store.runInteraction( + "initialise", self.store._initialise_reserved_users, threepids + ) + self.pump() + count = self.store.get_monthly_active_count() + self.assertTrue(self.get_success(count), initial_users) + + count = self.store.get_registered_reserved_users_count() + self.assertEquals(self.get_success(count), reserved_user_number) + + self.store.reap_monthly_active_users() + self.pump() + count = self.store.get_monthly_active_count() + self.assertEquals( + self.get_success(count), self.hs.config.max_mau_value + ) + + def _populate_reserved_users(self, user, email): + """Helper function that registers userpopulates reserved users""" + # Test reserved registed users + self.store.register_user(user_id=user, password_hash=None) + self.pump() + + now = int(self.hs.get_clock().time_msec()) + self.store.user_add_threepid(user, "email", email, now, now) + def test_populate_monthly_users_is_guest(self): # Test that guest users are not added to mau list user_id = "@user_id:host" @@ -198,6 +246,7 @@ class MonthlyActiveUsersTestCase(unittest.HomeserverTestCase): user1 = "@user1:example.com" user2 = "@user2:example.com" + user1_email = "user1@example.com" user2_email = "user2@example.com" threepids = [