mirror of
https://github.com/element-hq/synapse.git
synced 2024-11-29 07:28:55 +03:00
Track why we're evicting from caches (#10829)
So we can see distinguish between "evicting because the cache is too big" and "evicting because the cache entries haven't been recently used".
This commit is contained in:
parent
51e2db3598
commit
a2d7195e01
4 changed files with 31 additions and 15 deletions
1
changelog.d/10829.misc
Normal file
1
changelog.d/10829.misc
Normal file
|
@ -0,0 +1 @@
|
||||||
|
Track cache eviction rates more finely in Prometheus' monitoring.
|
|
@ -12,8 +12,10 @@
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
import collections
|
||||||
import logging
|
import logging
|
||||||
|
import typing
|
||||||
|
from enum import Enum, auto
|
||||||
from sys import intern
|
from sys import intern
|
||||||
from typing import Callable, Dict, Optional, Sized
|
from typing import Callable, Dict, Optional, Sized
|
||||||
|
|
||||||
|
@ -34,7 +36,7 @@ collectors_by_name: Dict[str, "CacheMetric"] = {}
|
||||||
|
|
||||||
cache_size = Gauge("synapse_util_caches_cache:size", "", ["name"])
|
cache_size = Gauge("synapse_util_caches_cache:size", "", ["name"])
|
||||||
cache_hits = Gauge("synapse_util_caches_cache:hits", "", ["name"])
|
cache_hits = Gauge("synapse_util_caches_cache:hits", "", ["name"])
|
||||||
cache_evicted = Gauge("synapse_util_caches_cache:evicted_size", "", ["name"])
|
cache_evicted = Gauge("synapse_util_caches_cache:evicted_size", "", ["name", "reason"])
|
||||||
cache_total = Gauge("synapse_util_caches_cache:total", "", ["name"])
|
cache_total = Gauge("synapse_util_caches_cache:total", "", ["name"])
|
||||||
cache_max_size = Gauge("synapse_util_caches_cache_max_size", "", ["name"])
|
cache_max_size = Gauge("synapse_util_caches_cache_max_size", "", ["name"])
|
||||||
cache_memory_usage = Gauge(
|
cache_memory_usage = Gauge(
|
||||||
|
@ -46,11 +48,16 @@ cache_memory_usage = Gauge(
|
||||||
response_cache_size = Gauge("synapse_util_caches_response_cache:size", "", ["name"])
|
response_cache_size = Gauge("synapse_util_caches_response_cache:size", "", ["name"])
|
||||||
response_cache_hits = Gauge("synapse_util_caches_response_cache:hits", "", ["name"])
|
response_cache_hits = Gauge("synapse_util_caches_response_cache:hits", "", ["name"])
|
||||||
response_cache_evicted = Gauge(
|
response_cache_evicted = Gauge(
|
||||||
"synapse_util_caches_response_cache:evicted_size", "", ["name"]
|
"synapse_util_caches_response_cache:evicted_size", "", ["name", "reason"]
|
||||||
)
|
)
|
||||||
response_cache_total = Gauge("synapse_util_caches_response_cache:total", "", ["name"])
|
response_cache_total = Gauge("synapse_util_caches_response_cache:total", "", ["name"])
|
||||||
|
|
||||||
|
|
||||||
|
class EvictionReason(Enum):
|
||||||
|
size = auto()
|
||||||
|
time = auto()
|
||||||
|
|
||||||
|
|
||||||
@attr.s(slots=True)
|
@attr.s(slots=True)
|
||||||
class CacheMetric:
|
class CacheMetric:
|
||||||
|
|
||||||
|
@ -61,7 +68,9 @@ class CacheMetric:
|
||||||
|
|
||||||
hits = attr.ib(default=0)
|
hits = attr.ib(default=0)
|
||||||
misses = attr.ib(default=0)
|
misses = attr.ib(default=0)
|
||||||
evicted_size = attr.ib(default=0)
|
eviction_size_by_reason: typing.Counter[EvictionReason] = attr.ib(
|
||||||
|
factory=collections.Counter
|
||||||
|
)
|
||||||
memory_usage = attr.ib(default=None)
|
memory_usage = attr.ib(default=None)
|
||||||
|
|
||||||
def inc_hits(self) -> None:
|
def inc_hits(self) -> None:
|
||||||
|
@ -70,8 +79,8 @@ class CacheMetric:
|
||||||
def inc_misses(self) -> None:
|
def inc_misses(self) -> None:
|
||||||
self.misses += 1
|
self.misses += 1
|
||||||
|
|
||||||
def inc_evictions(self, size: int = 1) -> None:
|
def inc_evictions(self, reason: EvictionReason, size: int = 1) -> None:
|
||||||
self.evicted_size += size
|
self.eviction_size_by_reason[reason] += size
|
||||||
|
|
||||||
def inc_memory_usage(self, memory: int) -> None:
|
def inc_memory_usage(self, memory: int) -> None:
|
||||||
if self.memory_usage is None:
|
if self.memory_usage is None:
|
||||||
|
@ -94,14 +103,20 @@ class CacheMetric:
|
||||||
if self._cache_type == "response_cache":
|
if self._cache_type == "response_cache":
|
||||||
response_cache_size.labels(self._cache_name).set(len(self._cache))
|
response_cache_size.labels(self._cache_name).set(len(self._cache))
|
||||||
response_cache_hits.labels(self._cache_name).set(self.hits)
|
response_cache_hits.labels(self._cache_name).set(self.hits)
|
||||||
response_cache_evicted.labels(self._cache_name).set(self.evicted_size)
|
for reason in EvictionReason:
|
||||||
|
response_cache_evicted.labels(self._cache_name, reason.name).set(
|
||||||
|
self.eviction_size_by_reason[reason]
|
||||||
|
)
|
||||||
response_cache_total.labels(self._cache_name).set(
|
response_cache_total.labels(self._cache_name).set(
|
||||||
self.hits + self.misses
|
self.hits + self.misses
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
cache_size.labels(self._cache_name).set(len(self._cache))
|
cache_size.labels(self._cache_name).set(len(self._cache))
|
||||||
cache_hits.labels(self._cache_name).set(self.hits)
|
cache_hits.labels(self._cache_name).set(self.hits)
|
||||||
cache_evicted.labels(self._cache_name).set(self.evicted_size)
|
for reason in EvictionReason:
|
||||||
|
cache_evicted.labels(self._cache_name, reason.name).set(
|
||||||
|
self.eviction_size_by_reason[reason]
|
||||||
|
)
|
||||||
cache_total.labels(self._cache_name).set(self.hits + self.misses)
|
cache_total.labels(self._cache_name).set(self.hits + self.misses)
|
||||||
if getattr(self._cache, "max_size", None):
|
if getattr(self._cache, "max_size", None):
|
||||||
cache_max_size.labels(self._cache_name).set(self._cache.max_size)
|
cache_max_size.labels(self._cache_name).set(self._cache.max_size)
|
||||||
|
|
|
@ -22,7 +22,7 @@ from typing_extensions import Literal
|
||||||
from synapse.config import cache as cache_config
|
from synapse.config import cache as cache_config
|
||||||
from synapse.metrics.background_process_metrics import run_as_background_process
|
from synapse.metrics.background_process_metrics import run_as_background_process
|
||||||
from synapse.util import Clock
|
from synapse.util import Clock
|
||||||
from synapse.util.caches import register_cache
|
from synapse.util.caches import EvictionReason, register_cache
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
@ -98,9 +98,9 @@ class ExpiringCache(Generic[KT, VT]):
|
||||||
while self._max_size and len(self) > self._max_size:
|
while self._max_size and len(self) > self._max_size:
|
||||||
_key, value = self._cache.popitem(last=False)
|
_key, value = self._cache.popitem(last=False)
|
||||||
if self.iterable:
|
if self.iterable:
|
||||||
self.metrics.inc_evictions(len(value.value))
|
self.metrics.inc_evictions(EvictionReason.size, len(value.value))
|
||||||
else:
|
else:
|
||||||
self.metrics.inc_evictions()
|
self.metrics.inc_evictions(EvictionReason.size)
|
||||||
|
|
||||||
def __getitem__(self, key: KT) -> VT:
|
def __getitem__(self, key: KT) -> VT:
|
||||||
try:
|
try:
|
||||||
|
@ -175,9 +175,9 @@ class ExpiringCache(Generic[KT, VT]):
|
||||||
for k in keys_to_delete:
|
for k in keys_to_delete:
|
||||||
value = self._cache.pop(k)
|
value = self._cache.pop(k)
|
||||||
if self.iterable:
|
if self.iterable:
|
||||||
self.metrics.inc_evictions(len(value.value))
|
self.metrics.inc_evictions(EvictionReason.time, len(value.value))
|
||||||
else:
|
else:
|
||||||
self.metrics.inc_evictions()
|
self.metrics.inc_evictions(EvictionReason.time)
|
||||||
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"[%s] _prune_cache before: %d, after len: %d",
|
"[%s] _prune_cache before: %d, after len: %d",
|
||||||
|
|
|
@ -40,7 +40,7 @@ from twisted.internet.interfaces import IReactorTime
|
||||||
from synapse.config import cache as cache_config
|
from synapse.config import cache as cache_config
|
||||||
from synapse.metrics.background_process_metrics import wrap_as_background_process
|
from synapse.metrics.background_process_metrics import wrap_as_background_process
|
||||||
from synapse.util import Clock, caches
|
from synapse.util import Clock, caches
|
||||||
from synapse.util.caches import CacheMetric, register_cache
|
from synapse.util.caches import CacheMetric, EvictionReason, register_cache
|
||||||
from synapse.util.caches.treecache import TreeCache, iterate_tree_cache_entry
|
from synapse.util.caches.treecache import TreeCache, iterate_tree_cache_entry
|
||||||
from synapse.util.linked_list import ListNode
|
from synapse.util.linked_list import ListNode
|
||||||
|
|
||||||
|
@ -403,7 +403,7 @@ class LruCache(Generic[KT, VT]):
|
||||||
evicted_len = delete_node(node)
|
evicted_len = delete_node(node)
|
||||||
cache.pop(node.key, None)
|
cache.pop(node.key, None)
|
||||||
if metrics:
|
if metrics:
|
||||||
metrics.inc_evictions(evicted_len)
|
metrics.inc_evictions(EvictionReason.size, evicted_len)
|
||||||
|
|
||||||
def synchronized(f: FT) -> FT:
|
def synchronized(f: FT) -> FT:
|
||||||
@wraps(f)
|
@wraps(f)
|
||||||
|
|
Loading…
Reference in a new issue