mirror of
https://github.com/element-hq/synapse.git
synced 2024-11-25 19:15:51 +03:00
Track why we're evicting from caches (#10829)
So we can see distinguish between "evicting because the cache is too big" and "evicting because the cache entries haven't been recently used".
This commit is contained in:
parent
51e2db3598
commit
a2d7195e01
4 changed files with 31 additions and 15 deletions
1
changelog.d/10829.misc
Normal file
1
changelog.d/10829.misc
Normal file
|
@ -0,0 +1 @@
|
|||
Track cache eviction rates more finely in Prometheus' monitoring.
|
|
@ -12,8 +12,10 @@
|
|||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import collections
|
||||
import logging
|
||||
import typing
|
||||
from enum import Enum, auto
|
||||
from sys import intern
|
||||
from typing import Callable, Dict, Optional, Sized
|
||||
|
||||
|
@ -34,7 +36,7 @@ collectors_by_name: Dict[str, "CacheMetric"] = {}
|
|||
|
||||
cache_size = Gauge("synapse_util_caches_cache:size", "", ["name"])
|
||||
cache_hits = Gauge("synapse_util_caches_cache:hits", "", ["name"])
|
||||
cache_evicted = Gauge("synapse_util_caches_cache:evicted_size", "", ["name"])
|
||||
cache_evicted = Gauge("synapse_util_caches_cache:evicted_size", "", ["name", "reason"])
|
||||
cache_total = Gauge("synapse_util_caches_cache:total", "", ["name"])
|
||||
cache_max_size = Gauge("synapse_util_caches_cache_max_size", "", ["name"])
|
||||
cache_memory_usage = Gauge(
|
||||
|
@ -46,11 +48,16 @@ cache_memory_usage = Gauge(
|
|||
response_cache_size = Gauge("synapse_util_caches_response_cache:size", "", ["name"])
|
||||
response_cache_hits = Gauge("synapse_util_caches_response_cache:hits", "", ["name"])
|
||||
response_cache_evicted = Gauge(
|
||||
"synapse_util_caches_response_cache:evicted_size", "", ["name"]
|
||||
"synapse_util_caches_response_cache:evicted_size", "", ["name", "reason"]
|
||||
)
|
||||
response_cache_total = Gauge("synapse_util_caches_response_cache:total", "", ["name"])
|
||||
|
||||
|
||||
class EvictionReason(Enum):
|
||||
size = auto()
|
||||
time = auto()
|
||||
|
||||
|
||||
@attr.s(slots=True)
|
||||
class CacheMetric:
|
||||
|
||||
|
@ -61,7 +68,9 @@ class CacheMetric:
|
|||
|
||||
hits = attr.ib(default=0)
|
||||
misses = attr.ib(default=0)
|
||||
evicted_size = attr.ib(default=0)
|
||||
eviction_size_by_reason: typing.Counter[EvictionReason] = attr.ib(
|
||||
factory=collections.Counter
|
||||
)
|
||||
memory_usage = attr.ib(default=None)
|
||||
|
||||
def inc_hits(self) -> None:
|
||||
|
@ -70,8 +79,8 @@ class CacheMetric:
|
|||
def inc_misses(self) -> None:
|
||||
self.misses += 1
|
||||
|
||||
def inc_evictions(self, size: int = 1) -> None:
|
||||
self.evicted_size += size
|
||||
def inc_evictions(self, reason: EvictionReason, size: int = 1) -> None:
|
||||
self.eviction_size_by_reason[reason] += size
|
||||
|
||||
def inc_memory_usage(self, memory: int) -> None:
|
||||
if self.memory_usage is None:
|
||||
|
@ -94,14 +103,20 @@ class CacheMetric:
|
|||
if self._cache_type == "response_cache":
|
||||
response_cache_size.labels(self._cache_name).set(len(self._cache))
|
||||
response_cache_hits.labels(self._cache_name).set(self.hits)
|
||||
response_cache_evicted.labels(self._cache_name).set(self.evicted_size)
|
||||
for reason in EvictionReason:
|
||||
response_cache_evicted.labels(self._cache_name, reason.name).set(
|
||||
self.eviction_size_by_reason[reason]
|
||||
)
|
||||
response_cache_total.labels(self._cache_name).set(
|
||||
self.hits + self.misses
|
||||
)
|
||||
else:
|
||||
cache_size.labels(self._cache_name).set(len(self._cache))
|
||||
cache_hits.labels(self._cache_name).set(self.hits)
|
||||
cache_evicted.labels(self._cache_name).set(self.evicted_size)
|
||||
for reason in EvictionReason:
|
||||
cache_evicted.labels(self._cache_name, reason.name).set(
|
||||
self.eviction_size_by_reason[reason]
|
||||
)
|
||||
cache_total.labels(self._cache_name).set(self.hits + self.misses)
|
||||
if getattr(self._cache, "max_size", None):
|
||||
cache_max_size.labels(self._cache_name).set(self._cache.max_size)
|
||||
|
|
|
@ -22,7 +22,7 @@ from typing_extensions import Literal
|
|||
from synapse.config import cache as cache_config
|
||||
from synapse.metrics.background_process_metrics import run_as_background_process
|
||||
from synapse.util import Clock
|
||||
from synapse.util.caches import register_cache
|
||||
from synapse.util.caches import EvictionReason, register_cache
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -98,9 +98,9 @@ class ExpiringCache(Generic[KT, VT]):
|
|||
while self._max_size and len(self) > self._max_size:
|
||||
_key, value = self._cache.popitem(last=False)
|
||||
if self.iterable:
|
||||
self.metrics.inc_evictions(len(value.value))
|
||||
self.metrics.inc_evictions(EvictionReason.size, len(value.value))
|
||||
else:
|
||||
self.metrics.inc_evictions()
|
||||
self.metrics.inc_evictions(EvictionReason.size)
|
||||
|
||||
def __getitem__(self, key: KT) -> VT:
|
||||
try:
|
||||
|
@ -175,9 +175,9 @@ class ExpiringCache(Generic[KT, VT]):
|
|||
for k in keys_to_delete:
|
||||
value = self._cache.pop(k)
|
||||
if self.iterable:
|
||||
self.metrics.inc_evictions(len(value.value))
|
||||
self.metrics.inc_evictions(EvictionReason.time, len(value.value))
|
||||
else:
|
||||
self.metrics.inc_evictions()
|
||||
self.metrics.inc_evictions(EvictionReason.time)
|
||||
|
||||
logger.debug(
|
||||
"[%s] _prune_cache before: %d, after len: %d",
|
||||
|
|
|
@ -40,7 +40,7 @@ from twisted.internet.interfaces import IReactorTime
|
|||
from synapse.config import cache as cache_config
|
||||
from synapse.metrics.background_process_metrics import wrap_as_background_process
|
||||
from synapse.util import Clock, caches
|
||||
from synapse.util.caches import CacheMetric, register_cache
|
||||
from synapse.util.caches import CacheMetric, EvictionReason, register_cache
|
||||
from synapse.util.caches.treecache import TreeCache, iterate_tree_cache_entry
|
||||
from synapse.util.linked_list import ListNode
|
||||
|
||||
|
@ -403,7 +403,7 @@ class LruCache(Generic[KT, VT]):
|
|||
evicted_len = delete_node(node)
|
||||
cache.pop(node.key, None)
|
||||
if metrics:
|
||||
metrics.inc_evictions(evicted_len)
|
||||
metrics.inc_evictions(EvictionReason.size, evicted_len)
|
||||
|
||||
def synchronized(f: FT) -> FT:
|
||||
@wraps(f)
|
||||
|
|
Loading…
Reference in a new issue