2022-05-31 19:35:29 +03:00
|
|
|
#
|
2023-11-21 23:29:58 +03:00
|
|
|
# This file is licensed under the Affero General Public License (AGPL) version 3.
|
|
|
|
#
|
2024-01-23 14:26:48 +03:00
|
|
|
# Copyright 2022 The Matrix.org Foundation C.I.C.
|
2023-11-21 23:29:58 +03:00
|
|
|
# Copyright (C) 2023 New Vector, Ltd
|
|
|
|
#
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU Affero General Public License as
|
|
|
|
# published by the Free Software Foundation, either version 3 of the
|
|
|
|
# License, or (at your option) any later version.
|
|
|
|
#
|
|
|
|
# See the GNU Affero General Public License for more details:
|
|
|
|
# <https://www.gnu.org/licenses/agpl-3.0.html>.
|
|
|
|
#
|
|
|
|
# Originally licensed under the Apache License, Version 2.0:
|
|
|
|
# <http://www.apache.org/licenses/LICENSE-2.0>.
|
|
|
|
#
|
|
|
|
# [This file includes modifications made by New Vector Limited]
|
2022-05-31 19:35:29 +03:00
|
|
|
#
|
|
|
|
#
|
|
|
|
|
|
|
|
import io
|
2022-09-15 15:57:16 +03:00
|
|
|
from typing import Iterable, Optional
|
|
|
|
|
|
|
|
from matrix_common.types.mxc_uri import MXCUri
|
2022-05-31 19:35:29 +03:00
|
|
|
|
|
|
|
from twisted.test.proto_helpers import MemoryReactor
|
|
|
|
|
|
|
|
from synapse.rest import admin
|
|
|
|
from synapse.rest.client import login, register, room
|
|
|
|
from synapse.server import HomeServer
|
|
|
|
from synapse.types import UserID
|
|
|
|
from synapse.util import Clock
|
|
|
|
|
|
|
|
from tests import unittest
|
|
|
|
from tests.unittest import override_config
|
|
|
|
from tests.utils import MockClock
|
|
|
|
|
|
|
|
|
|
|
|
class MediaRetentionTestCase(unittest.HomeserverTestCase):
|
|
|
|
ONE_DAY_IN_MS = 24 * 60 * 60 * 1000
|
|
|
|
THIRTY_DAYS_IN_MS = 30 * ONE_DAY_IN_MS
|
|
|
|
|
|
|
|
servlets = [
|
|
|
|
room.register_servlets,
|
|
|
|
login.register_servlets,
|
|
|
|
register.register_servlets,
|
|
|
|
admin.register_servlets_for_client_rest_resource,
|
|
|
|
]
|
|
|
|
|
|
|
|
def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
|
|
|
|
# We need to be able to test advancing time in the homeserver, so we
|
|
|
|
# replace the test homeserver's default clock with a MockClock, which
|
|
|
|
# supports advancing time.
|
|
|
|
return self.setup_test_homeserver(clock=MockClock())
|
|
|
|
|
|
|
|
def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
|
|
|
|
self.remote_server_name = "remote.homeserver"
|
|
|
|
self.store = hs.get_datastores().main
|
|
|
|
|
|
|
|
# Create a user to upload media with
|
|
|
|
test_user_id = self.register_user("alice", "password")
|
|
|
|
|
2022-06-07 13:53:47 +03:00
|
|
|
# Inject media (recently accessed, old access, never accessed, old access
|
|
|
|
# quarantined media) into both the local store and the remote cache, plus
|
|
|
|
# one additional local media that is marked as protected from quarantine.
|
2022-05-31 19:35:29 +03:00
|
|
|
media_repository = hs.get_media_repository()
|
|
|
|
test_media_content = b"example string"
|
|
|
|
|
2022-06-07 13:53:47 +03:00
|
|
|
def _create_media_and_set_attributes(
|
2022-05-31 19:35:29 +03:00
|
|
|
last_accessed_ms: Optional[int],
|
2022-06-07 13:53:47 +03:00
|
|
|
is_quarantined: Optional[bool] = False,
|
|
|
|
is_protected: Optional[bool] = False,
|
2022-09-15 15:57:16 +03:00
|
|
|
) -> MXCUri:
|
2022-05-31 19:35:29 +03:00
|
|
|
# "Upload" some media to the local media store
|
2022-09-15 15:57:16 +03:00
|
|
|
mxc_uri: MXCUri = self.get_success(
|
2022-05-31 19:35:29 +03:00
|
|
|
media_repository.create_content(
|
|
|
|
media_type="text/plain",
|
|
|
|
upload_name=None,
|
|
|
|
content=io.BytesIO(test_media_content),
|
|
|
|
content_length=len(test_media_content),
|
|
|
|
auth_user=UserID.from_string(test_user_id),
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
# Set the last recently accessed time for this media
|
|
|
|
if last_accessed_ms is not None:
|
|
|
|
self.get_success(
|
|
|
|
self.store.update_cached_last_access_time(
|
2022-09-15 15:57:16 +03:00
|
|
|
local_media=(mxc_uri.media_id,),
|
2022-05-31 19:35:29 +03:00
|
|
|
remote_media=(),
|
|
|
|
time_ms=last_accessed_ms,
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
2022-06-07 13:53:47 +03:00
|
|
|
if is_quarantined:
|
|
|
|
# Mark this media as quarantined
|
|
|
|
self.get_success(
|
|
|
|
self.store.quarantine_media_by_id(
|
|
|
|
server_name=self.hs.config.server.server_name,
|
2022-09-15 15:57:16 +03:00
|
|
|
media_id=mxc_uri.media_id,
|
2022-06-07 13:53:47 +03:00
|
|
|
quarantined_by="@theadmin:test",
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
if is_protected:
|
|
|
|
# Mark this media as protected from quarantine
|
|
|
|
self.get_success(
|
|
|
|
self.store.mark_local_media_as_safe(
|
2022-09-15 15:57:16 +03:00
|
|
|
media_id=mxc_uri.media_id,
|
2022-06-07 13:53:47 +03:00
|
|
|
safe=True,
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
2022-09-15 15:57:16 +03:00
|
|
|
return mxc_uri
|
2022-05-31 19:35:29 +03:00
|
|
|
|
2022-06-07 13:53:47 +03:00
|
|
|
def _cache_remote_media_and_set_attributes(
|
|
|
|
media_id: str,
|
|
|
|
last_accessed_ms: Optional[int],
|
|
|
|
is_quarantined: Optional[bool] = False,
|
2022-09-15 15:57:16 +03:00
|
|
|
) -> MXCUri:
|
2022-05-31 19:35:29 +03:00
|
|
|
# Pretend to cache some remote media
|
|
|
|
self.get_success(
|
|
|
|
self.store.store_cached_remote_media(
|
|
|
|
origin=self.remote_server_name,
|
|
|
|
media_id=media_id,
|
|
|
|
media_type="text/plain",
|
|
|
|
media_length=1,
|
|
|
|
time_now_ms=clock.time_msec(),
|
|
|
|
upload_name="testfile.txt",
|
|
|
|
filesystem_id="abcdefg12345",
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
# Set the last recently accessed time for this media
|
|
|
|
if last_accessed_ms is not None:
|
|
|
|
self.get_success(
|
|
|
|
hs.get_datastores().main.update_cached_last_access_time(
|
|
|
|
local_media=(),
|
|
|
|
remote_media=((self.remote_server_name, media_id),),
|
|
|
|
time_ms=last_accessed_ms,
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
2022-06-07 13:53:47 +03:00
|
|
|
if is_quarantined:
|
|
|
|
# Mark this media as quarantined
|
|
|
|
self.get_success(
|
|
|
|
self.store.quarantine_media_by_id(
|
|
|
|
server_name=self.remote_server_name,
|
|
|
|
media_id=media_id,
|
|
|
|
quarantined_by="@theadmin:test",
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
2022-09-15 15:57:16 +03:00
|
|
|
return MXCUri(self.remote_server_name, media_id)
|
2022-05-31 19:35:29 +03:00
|
|
|
|
|
|
|
# Start with the local media store
|
2022-06-07 13:53:47 +03:00
|
|
|
self.local_recently_accessed_media = _create_media_and_set_attributes(
|
|
|
|
last_accessed_ms=self.THIRTY_DAYS_IN_MS,
|
2022-05-31 19:35:29 +03:00
|
|
|
)
|
2022-06-07 13:53:47 +03:00
|
|
|
self.local_not_recently_accessed_media = _create_media_and_set_attributes(
|
|
|
|
last_accessed_ms=self.ONE_DAY_IN_MS,
|
|
|
|
)
|
|
|
|
self.local_not_recently_accessed_quarantined_media = (
|
|
|
|
_create_media_and_set_attributes(
|
|
|
|
last_accessed_ms=self.ONE_DAY_IN_MS,
|
|
|
|
is_quarantined=True,
|
|
|
|
)
|
|
|
|
)
|
|
|
|
self.local_not_recently_accessed_protected_media = (
|
|
|
|
_create_media_and_set_attributes(
|
|
|
|
last_accessed_ms=self.ONE_DAY_IN_MS,
|
|
|
|
is_protected=True,
|
|
|
|
)
|
|
|
|
)
|
|
|
|
self.local_never_accessed_media = _create_media_and_set_attributes(
|
|
|
|
last_accessed_ms=None,
|
2022-05-31 19:35:29 +03:00
|
|
|
)
|
|
|
|
|
|
|
|
# And now the remote media store
|
2022-06-07 13:53:47 +03:00
|
|
|
self.remote_recently_accessed_media = _cache_remote_media_and_set_attributes(
|
|
|
|
media_id="a",
|
|
|
|
last_accessed_ms=self.THIRTY_DAYS_IN_MS,
|
2022-05-31 19:35:29 +03:00
|
|
|
)
|
|
|
|
self.remote_not_recently_accessed_media = (
|
2022-06-07 13:53:47 +03:00
|
|
|
_cache_remote_media_and_set_attributes(
|
|
|
|
media_id="b",
|
|
|
|
last_accessed_ms=self.ONE_DAY_IN_MS,
|
|
|
|
)
|
|
|
|
)
|
|
|
|
self.remote_not_recently_accessed_quarantined_media = (
|
|
|
|
_cache_remote_media_and_set_attributes(
|
|
|
|
media_id="c",
|
|
|
|
last_accessed_ms=self.ONE_DAY_IN_MS,
|
|
|
|
is_quarantined=True,
|
|
|
|
)
|
2022-05-31 19:35:29 +03:00
|
|
|
)
|
|
|
|
# Remote media will always have a "last accessed" attribute, as it would not
|
|
|
|
# be fetched from the remote homeserver unless instigated by a user.
|
|
|
|
|
|
|
|
@override_config(
|
|
|
|
{
|
|
|
|
"media_retention": {
|
|
|
|
# Enable retention for local media
|
|
|
|
"local_media_lifetime": "30d"
|
|
|
|
# Cached remote media should not be purged
|
|
|
|
}
|
|
|
|
}
|
|
|
|
)
|
|
|
|
def test_local_media_retention(self) -> None:
|
|
|
|
"""
|
|
|
|
Tests that local media that have not been accessed recently is purged, while
|
|
|
|
cached remote media is unaffected.
|
|
|
|
"""
|
|
|
|
# Advance 31 days (in seconds)
|
|
|
|
self.reactor.advance(31 * 24 * 60 * 60)
|
|
|
|
|
|
|
|
# Check that media has been correctly purged.
|
|
|
|
# Local media accessed <30 days ago should still exist.
|
|
|
|
# Remote media should be unaffected.
|
|
|
|
self._assert_if_mxc_uris_purged(
|
|
|
|
purged=[
|
2022-09-15 15:57:16 +03:00
|
|
|
self.local_not_recently_accessed_media,
|
|
|
|
self.local_never_accessed_media,
|
2022-05-31 19:35:29 +03:00
|
|
|
],
|
|
|
|
not_purged=[
|
2022-09-15 15:57:16 +03:00
|
|
|
self.local_recently_accessed_media,
|
|
|
|
self.local_not_recently_accessed_quarantined_media,
|
|
|
|
self.local_not_recently_accessed_protected_media,
|
|
|
|
self.remote_recently_accessed_media,
|
|
|
|
self.remote_not_recently_accessed_media,
|
|
|
|
self.remote_not_recently_accessed_quarantined_media,
|
2022-05-31 19:35:29 +03:00
|
|
|
],
|
|
|
|
)
|
|
|
|
|
|
|
|
@override_config(
|
|
|
|
{
|
|
|
|
"media_retention": {
|
|
|
|
# Enable retention for cached remote media
|
|
|
|
"remote_media_lifetime": "30d"
|
|
|
|
# Local media should not be purged
|
|
|
|
}
|
|
|
|
}
|
|
|
|
)
|
|
|
|
def test_remote_media_cache_retention(self) -> None:
|
|
|
|
"""
|
|
|
|
Tests that entries from the remote media cache that have not been accessed
|
|
|
|
recently is purged, while local media is unaffected.
|
|
|
|
"""
|
|
|
|
# Advance 31 days (in seconds)
|
|
|
|
self.reactor.advance(31 * 24 * 60 * 60)
|
|
|
|
|
|
|
|
# Check that media has been correctly purged.
|
|
|
|
# Local media should be unaffected.
|
|
|
|
# Remote media accessed <30 days ago should still exist.
|
|
|
|
self._assert_if_mxc_uris_purged(
|
|
|
|
purged=[
|
2022-09-15 15:57:16 +03:00
|
|
|
self.remote_not_recently_accessed_media,
|
2022-05-31 19:35:29 +03:00
|
|
|
],
|
|
|
|
not_purged=[
|
2022-09-15 15:57:16 +03:00
|
|
|
self.remote_recently_accessed_media,
|
|
|
|
self.local_recently_accessed_media,
|
|
|
|
self.local_not_recently_accessed_media,
|
|
|
|
self.local_not_recently_accessed_quarantined_media,
|
|
|
|
self.local_not_recently_accessed_protected_media,
|
|
|
|
self.remote_not_recently_accessed_quarantined_media,
|
|
|
|
self.local_never_accessed_media,
|
2022-05-31 19:35:29 +03:00
|
|
|
],
|
|
|
|
)
|
|
|
|
|
|
|
|
def _assert_if_mxc_uris_purged(
|
2022-09-15 15:57:16 +03:00
|
|
|
self, purged: Iterable[MXCUri], not_purged: Iterable[MXCUri]
|
2022-05-31 19:35:29 +03:00
|
|
|
) -> None:
|
2022-09-15 15:57:16 +03:00
|
|
|
def _assert_mxc_uri_purge_state(mxc_uri: MXCUri, expect_purged: bool) -> None:
|
2022-05-31 19:35:29 +03:00
|
|
|
"""Given an MXC URI, assert whether it has been purged or not."""
|
2022-09-15 15:57:16 +03:00
|
|
|
if mxc_uri.server_name == self.hs.config.server.server_name:
|
2023-11-09 19:00:30 +03:00
|
|
|
found_media = bool(
|
|
|
|
self.get_success(self.store.get_local_media(mxc_uri.media_id))
|
2022-05-31 19:35:29 +03:00
|
|
|
)
|
|
|
|
else:
|
2023-11-09 19:00:30 +03:00
|
|
|
found_media = bool(
|
|
|
|
self.get_success(
|
|
|
|
self.store.get_cached_remote_media(
|
|
|
|
mxc_uri.server_name, mxc_uri.media_id
|
|
|
|
)
|
2022-09-15 15:57:16 +03:00
|
|
|
)
|
2022-05-31 19:35:29 +03:00
|
|
|
)
|
|
|
|
|
|
|
|
if expect_purged:
|
2023-11-09 19:00:30 +03:00
|
|
|
self.assertFalse(found_media, msg=f"{mxc_uri} unexpectedly not purged")
|
2022-05-31 19:35:29 +03:00
|
|
|
else:
|
2023-11-09 19:00:30 +03:00
|
|
|
self.assertTrue(
|
|
|
|
found_media,
|
2022-05-31 19:35:29 +03:00
|
|
|
msg=f"{mxc_uri} unexpectedly purged",
|
|
|
|
)
|
|
|
|
|
|
|
|
# Assert that the given MXC URIs have either been correctly purged or not.
|
2022-09-15 15:57:16 +03:00
|
|
|
for mxc_uri in purged:
|
|
|
|
_assert_mxc_uri_purge_state(mxc_uri, expect_purged=True)
|
|
|
|
for mxc_uri in not_purged:
|
|
|
|
_assert_mxc_uri_purge_state(mxc_uri, expect_purged=False)
|