mirror of
https://github.com/element-hq/synapse.git
synced 2024-11-21 17:15:38 +03:00
Improve lock performance when a lot of locks are waiting (#16840)
When a lot of locks are waiting for a single lock, notifying all locks independently with `call_later` on each release is really costly and incurs some kind of async contention, where the CPU is spinning a lot for not much. The included test is taking around 30s before the change, and 0.5s after. It was found following failing tests with https://github.com/element-hq/synapse/pull/16827.
This commit is contained in:
parent
a111ba0207
commit
cb562d73aa
4 changed files with 73 additions and 6 deletions
1
changelog.d/16840.misc
Normal file
1
changelog.d/16840.misc
Normal file
|
@ -0,0 +1 @@
|
||||||
|
Improve lock performance when a lot of locks are all waiting for a single lock to be released.
|
|
@ -182,12 +182,15 @@ class WorkerLocksHandler:
|
||||||
if not locks:
|
if not locks:
|
||||||
return
|
return
|
||||||
|
|
||||||
def _wake_deferred(deferred: defer.Deferred) -> None:
|
def _wake_all_locks(
|
||||||
if not deferred.called:
|
locks: Collection[Union[WaitingLock, WaitingMultiLock]]
|
||||||
deferred.callback(None)
|
) -> None:
|
||||||
|
for lock in locks:
|
||||||
|
deferred = lock.deferred
|
||||||
|
if not deferred.called:
|
||||||
|
deferred.callback(None)
|
||||||
|
|
||||||
for lock in locks:
|
self._clock.call_later(0, _wake_all_locks, locks)
|
||||||
self._clock.call_later(0, _wake_deferred, lock.deferred)
|
|
||||||
|
|
||||||
@wrap_as_background_process("_cleanup_locks")
|
@wrap_as_background_process("_cleanup_locks")
|
||||||
async def _cleanup_locks(self) -> None:
|
async def _cleanup_locks(self) -> None:
|
||||||
|
|
|
@ -27,6 +27,7 @@ from synapse.util import Clock
|
||||||
|
|
||||||
from tests import unittest
|
from tests import unittest
|
||||||
from tests.replication._base import BaseMultiWorkerStreamTestCase
|
from tests.replication._base import BaseMultiWorkerStreamTestCase
|
||||||
|
from tests.utils import test_timeout
|
||||||
|
|
||||||
|
|
||||||
class WorkerLockTestCase(unittest.HomeserverTestCase):
|
class WorkerLockTestCase(unittest.HomeserverTestCase):
|
||||||
|
@ -50,6 +51,28 @@ class WorkerLockTestCase(unittest.HomeserverTestCase):
|
||||||
self.get_success(d2)
|
self.get_success(d2)
|
||||||
self.get_success(lock2.__aexit__(None, None, None))
|
self.get_success(lock2.__aexit__(None, None, None))
|
||||||
|
|
||||||
|
def test_lock_contention(self) -> None:
|
||||||
|
"""Test lock contention when a lot of locks wait on a single worker"""
|
||||||
|
|
||||||
|
# It takes around 0.5s on a 5+ years old laptop
|
||||||
|
with test_timeout(5):
|
||||||
|
nb_locks = 500
|
||||||
|
d = self._take_locks(nb_locks)
|
||||||
|
self.assertEqual(self.get_success(d), nb_locks)
|
||||||
|
|
||||||
|
async def _take_locks(self, nb_locks: int) -> int:
|
||||||
|
locks = [
|
||||||
|
self.hs.get_worker_locks_handler().acquire_lock("test_lock", "")
|
||||||
|
for _ in range(nb_locks)
|
||||||
|
]
|
||||||
|
|
||||||
|
nb_locks_taken = 0
|
||||||
|
for lock in locks:
|
||||||
|
async with lock:
|
||||||
|
nb_locks_taken += 1
|
||||||
|
|
||||||
|
return nb_locks_taken
|
||||||
|
|
||||||
|
|
||||||
class WorkerLockWorkersTestCase(BaseMultiWorkerStreamTestCase):
|
class WorkerLockWorkersTestCase(BaseMultiWorkerStreamTestCase):
|
||||||
def prepare(
|
def prepare(
|
||||||
|
|
|
@ -21,7 +21,20 @@
|
||||||
|
|
||||||
import atexit
|
import atexit
|
||||||
import os
|
import os
|
||||||
from typing import Any, Callable, Dict, List, Tuple, Type, TypeVar, Union, overload
|
import signal
|
||||||
|
from types import FrameType, TracebackType
|
||||||
|
from typing import (
|
||||||
|
Any,
|
||||||
|
Callable,
|
||||||
|
Dict,
|
||||||
|
List,
|
||||||
|
Optional,
|
||||||
|
Tuple,
|
||||||
|
Type,
|
||||||
|
TypeVar,
|
||||||
|
Union,
|
||||||
|
overload,
|
||||||
|
)
|
||||||
|
|
||||||
import attr
|
import attr
|
||||||
from typing_extensions import Literal, ParamSpec
|
from typing_extensions import Literal, ParamSpec
|
||||||
|
@ -379,3 +392,30 @@ def checked_cast(type: Type[T], x: object) -> T:
|
||||||
"""
|
"""
|
||||||
assert isinstance(x, type)
|
assert isinstance(x, type)
|
||||||
return x
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
class TestTimeout(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class test_timeout:
|
||||||
|
def __init__(self, seconds: int, error_message: Optional[str] = None) -> None:
|
||||||
|
if error_message is None:
|
||||||
|
error_message = "test timed out after {}s.".format(seconds)
|
||||||
|
self.seconds = seconds
|
||||||
|
self.error_message = error_message
|
||||||
|
|
||||||
|
def handle_timeout(self, signum: int, frame: Optional[FrameType]) -> None:
|
||||||
|
raise TestTimeout(self.error_message)
|
||||||
|
|
||||||
|
def __enter__(self) -> None:
|
||||||
|
signal.signal(signal.SIGALRM, self.handle_timeout)
|
||||||
|
signal.alarm(self.seconds)
|
||||||
|
|
||||||
|
def __exit__(
|
||||||
|
self,
|
||||||
|
exc_type: Optional[Type[BaseException]],
|
||||||
|
exc_val: Optional[BaseException],
|
||||||
|
exc_tb: Optional[TracebackType],
|
||||||
|
) -> None:
|
||||||
|
signal.alarm(0)
|
||||||
|
|
Loading…
Reference in a new issue