Support MSC3916 by adding unstable media endpoints to _matrix/client (#17213)
Some checks failed
Build docker images / build (push) Has been cancelled
Deploy the documentation / Calculate variables for GitHub Pages deployment (push) Has been cancelled
Build release artifacts / Calculate list of debian distros (push) Has been cancelled
Build release artifacts / Build wheels on ${{ matrix.os }} for ${{ matrix.arch }} (aarch64, ${{ startsWith(github.ref, 'refs/pull/') }}, ubuntu-20.04) (push) Has been cancelled
Build release artifacts / Build wheels on ${{ matrix.os }} for ${{ matrix.arch }} (x86_64, ${{ startsWith(github.ref, 'refs/pull/') }}, macos-11) (push) Has been cancelled
Build release artifacts / Build wheels on ${{ matrix.os }} for ${{ matrix.arch }} (x86_64, ${{ startsWith(github.ref, 'refs/pull/') }}, ubuntu-20.04) (push) Has been cancelled
Build release artifacts / Build sdist (push) Has been cancelled
Tests / check-lockfile (push) Has been cancelled
Tests / changes (push) Has been cancelled
Tests / lint-crlf (push) Has been cancelled
Tests / lint-newsfile (push) Has been cancelled
Deploy the documentation / GitHub Pages (push) Has been cancelled
Build release artifacts / Build .deb packages (push) Has been cancelled
Build release artifacts / Attach assets to release (push) Has been cancelled
Tests / check-sampleconfig (push) Has been cancelled
Tests / check-schema-delta (push) Has been cancelled
Tests / lint (push) Has been cancelled
Tests / Typechecking (push) Has been cancelled
Tests / lint-pydantic (push) Has been cancelled
Tests / lint-clippy (push) Has been cancelled
Tests / lint-clippy-nightly (push) Has been cancelled
Tests / lint-rustfmt (push) Has been cancelled
Tests / linting-done (push) Has been cancelled
Tests / calculate-test-jobs (push) Has been cancelled
Tests / trial (push) Has been cancelled
Tests / trial-olddeps (push) Has been cancelled
Tests / trial-pypy (all, pypy-3.8) (push) Has been cancelled
Tests / sytest (push) Has been cancelled
Tests / export-data (push) Has been cancelled
Tests / portdb (11, 3.8) (push) Has been cancelled
Tests / portdb (15, 3.11) (push) Has been cancelled
Tests / complement (monolith, Postgres) (push) Has been cancelled
Tests / complement (monolith, SQLite) (push) Has been cancelled
Tests / complement (workers, Postgres) (push) Has been cancelled
Tests / cargo-test (push) Has been cancelled
Tests / cargo-bench (push) Has been cancelled
Tests / tests-done (push) Has been cancelled

[MSC3916](https://github.com/matrix-org/matrix-spec-proposals/blob/rav/authentication-for-media/proposals/3916-authentication-for-media.md)
adds new media endpoints under `_matrix/client`. This PR adds the
`/preview_url`, `/config`, and `/thumbnail` endpoints. `/download` will
be added in a follow-up PR once the work for the federation `/download`
endpoint is complete (see
https://github.com/element-hq/synapse/pull/17172).

Should be reviewable commit-by-commit.
This commit is contained in:
Shay 2024-05-24 01:47:37 -07:00 committed by GitHub
parent c97251d5ba
commit 9edb725ebc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 2393 additions and 549 deletions

View file

@ -0,0 +1 @@
Support MSC3916 by adding unstable media endpoints to `_matrix/client` (#17213).

View file

@ -439,3 +439,7 @@ class ExperimentalConfig(Config):
self.msc4115_membership_on_events = experimental.get(
"msc4115_membership_on_events", False
)
self.msc3916_authenticated_media_enabled = experimental.get(
"msc3916_authenticated_media_enabled", False
)

View file

@ -22,11 +22,27 @@
import logging
from io import BytesIO
from types import TracebackType
from typing import Optional, Tuple, Type
from typing import TYPE_CHECKING, List, Optional, Tuple, Type
from PIL import Image
from synapse.api.errors import Codes, SynapseError, cs_error
from synapse.config.repository import THUMBNAIL_SUPPORTED_MEDIA_FORMAT_MAP
from synapse.http.server import respond_with_json
from synapse.http.site import SynapseRequest
from synapse.logging.opentracing import trace
from synapse.media._base import (
FileInfo,
ThumbnailInfo,
respond_404,
respond_with_file,
respond_with_responder,
)
from synapse.media.media_storage import MediaStorage
if TYPE_CHECKING:
from synapse.media.media_repository import MediaRepository
from synapse.server import HomeServer
logger = logging.getLogger(__name__)
@ -231,3 +247,471 @@ class Thumbnailer:
def __del__(self) -> None:
# Make sure we actually do close the image, rather than leak data.
self.close()
class ThumbnailProvider:
def __init__(
self,
hs: "HomeServer",
media_repo: "MediaRepository",
media_storage: MediaStorage,
):
self.hs = hs
self.media_repo = media_repo
self.media_storage = media_storage
self.store = hs.get_datastores().main
self.dynamic_thumbnails = hs.config.media.dynamic_thumbnails
async def respond_local_thumbnail(
self,
request: SynapseRequest,
media_id: str,
width: int,
height: int,
method: str,
m_type: str,
max_timeout_ms: int,
) -> None:
media_info = await self.media_repo.get_local_media_info(
request, media_id, max_timeout_ms
)
if not media_info:
return
thumbnail_infos = await self.store.get_local_media_thumbnails(media_id)
await self._select_and_respond_with_thumbnail(
request,
width,
height,
method,
m_type,
thumbnail_infos,
media_id,
media_id,
url_cache=bool(media_info.url_cache),
server_name=None,
)
async def select_or_generate_local_thumbnail(
self,
request: SynapseRequest,
media_id: str,
desired_width: int,
desired_height: int,
desired_method: str,
desired_type: str,
max_timeout_ms: int,
) -> None:
media_info = await self.media_repo.get_local_media_info(
request, media_id, max_timeout_ms
)
if not media_info:
return
thumbnail_infos = await self.store.get_local_media_thumbnails(media_id)
for info in thumbnail_infos:
t_w = info.width == desired_width
t_h = info.height == desired_height
t_method = info.method == desired_method
t_type = info.type == desired_type
if t_w and t_h and t_method and t_type:
file_info = FileInfo(
server_name=None,
file_id=media_id,
url_cache=bool(media_info.url_cache),
thumbnail=info,
)
responder = await self.media_storage.fetch_media(file_info)
if responder:
await respond_with_responder(
request, responder, info.type, info.length
)
return
logger.debug("We don't have a thumbnail of that size. Generating")
# Okay, so we generate one.
file_path = await self.media_repo.generate_local_exact_thumbnail(
media_id,
desired_width,
desired_height,
desired_method,
desired_type,
url_cache=bool(media_info.url_cache),
)
if file_path:
await respond_with_file(request, desired_type, file_path)
else:
logger.warning("Failed to generate thumbnail")
raise SynapseError(400, "Failed to generate thumbnail.")
async def select_or_generate_remote_thumbnail(
self,
request: SynapseRequest,
server_name: str,
media_id: str,
desired_width: int,
desired_height: int,
desired_method: str,
desired_type: str,
max_timeout_ms: int,
) -> None:
media_info = await self.media_repo.get_remote_media_info(
server_name, media_id, max_timeout_ms
)
if not media_info:
respond_404(request)
return
thumbnail_infos = await self.store.get_remote_media_thumbnails(
server_name, media_id
)
file_id = media_info.filesystem_id
for info in thumbnail_infos:
t_w = info.width == desired_width
t_h = info.height == desired_height
t_method = info.method == desired_method
t_type = info.type == desired_type
if t_w and t_h and t_method and t_type:
file_info = FileInfo(
server_name=server_name,
file_id=file_id,
thumbnail=info,
)
responder = await self.media_storage.fetch_media(file_info)
if responder:
await respond_with_responder(
request, responder, info.type, info.length
)
return
logger.debug("We don't have a thumbnail of that size. Generating")
# Okay, so we generate one.
file_path = await self.media_repo.generate_remote_exact_thumbnail(
server_name,
file_id,
media_id,
desired_width,
desired_height,
desired_method,
desired_type,
)
if file_path:
await respond_with_file(request, desired_type, file_path)
else:
logger.warning("Failed to generate thumbnail")
raise SynapseError(400, "Failed to generate thumbnail.")
async def respond_remote_thumbnail(
self,
request: SynapseRequest,
server_name: str,
media_id: str,
width: int,
height: int,
method: str,
m_type: str,
max_timeout_ms: int,
) -> None:
# TODO: Don't download the whole remote file
# We should proxy the thumbnail from the remote server instead of
# downloading the remote file and generating our own thumbnails.
media_info = await self.media_repo.get_remote_media_info(
server_name, media_id, max_timeout_ms
)
if not media_info:
return
thumbnail_infos = await self.store.get_remote_media_thumbnails(
server_name, media_id
)
await self._select_and_respond_with_thumbnail(
request,
width,
height,
method,
m_type,
thumbnail_infos,
media_id,
media_info.filesystem_id,
url_cache=False,
server_name=server_name,
)
async def _select_and_respond_with_thumbnail(
self,
request: SynapseRequest,
desired_width: int,
desired_height: int,
desired_method: str,
desired_type: str,
thumbnail_infos: List[ThumbnailInfo],
media_id: str,
file_id: str,
url_cache: bool,
server_name: Optional[str] = None,
) -> None:
"""
Respond to a request with an appropriate thumbnail from the previously generated thumbnails.
Args:
request: The incoming request.
desired_width: The desired width, the returned thumbnail may be larger than this.
desired_height: The desired height, the returned thumbnail may be larger than this.
desired_method: The desired method used to generate the thumbnail.
desired_type: The desired content-type of the thumbnail.
thumbnail_infos: A list of thumbnail info of candidate thumbnails.
file_id: The ID of the media that a thumbnail is being requested for.
url_cache: True if this is from a URL cache.
server_name: The server name, if this is a remote thumbnail.
"""
logger.debug(
"_select_and_respond_with_thumbnail: media_id=%s desired=%sx%s (%s) thumbnail_infos=%s",
media_id,
desired_width,
desired_height,
desired_method,
thumbnail_infos,
)
# If `dynamic_thumbnails` is enabled, we expect Synapse to go down a
# different code path to handle it.
assert not self.dynamic_thumbnails
if thumbnail_infos:
file_info = self._select_thumbnail(
desired_width,
desired_height,
desired_method,
desired_type,
thumbnail_infos,
file_id,
url_cache,
server_name,
)
if not file_info:
logger.info("Couldn't find a thumbnail matching the desired inputs")
respond_404(request)
return
# The thumbnail property must exist.
assert file_info.thumbnail is not None
responder = await self.media_storage.fetch_media(file_info)
if responder:
await respond_with_responder(
request,
responder,
file_info.thumbnail.type,
file_info.thumbnail.length,
)
return
# If we can't find the thumbnail we regenerate it. This can happen
# if e.g. we've deleted the thumbnails but still have the original
# image somewhere.
#
# Since we have an entry for the thumbnail in the DB we a) know we
# have have successfully generated the thumbnail in the past (so we
# don't need to worry about repeatedly failing to generate
# thumbnails), and b) have already calculated that appropriate
# width/height/method so we can just call the "generate exact"
# methods.
# First let's check that we do actually have the original image
# still. This will throw a 404 if we don't.
# TODO: We should refetch the thumbnails for remote media.
await self.media_storage.ensure_media_is_in_local_cache(
FileInfo(server_name, file_id, url_cache=url_cache)
)
if server_name:
await self.media_repo.generate_remote_exact_thumbnail(
server_name,
file_id=file_id,
media_id=media_id,
t_width=file_info.thumbnail.width,
t_height=file_info.thumbnail.height,
t_method=file_info.thumbnail.method,
t_type=file_info.thumbnail.type,
)
else:
await self.media_repo.generate_local_exact_thumbnail(
media_id=media_id,
t_width=file_info.thumbnail.width,
t_height=file_info.thumbnail.height,
t_method=file_info.thumbnail.method,
t_type=file_info.thumbnail.type,
url_cache=url_cache,
)
responder = await self.media_storage.fetch_media(file_info)
await respond_with_responder(
request,
responder,
file_info.thumbnail.type,
file_info.thumbnail.length,
)
else:
# This might be because:
# 1. We can't create thumbnails for the given media (corrupted or
# unsupported file type), or
# 2. The thumbnailing process never ran or errored out initially
# when the media was first uploaded (these bugs should be
# reported and fixed).
# Note that we don't attempt to generate a thumbnail now because
# `dynamic_thumbnails` is disabled.
logger.info("Failed to find any generated thumbnails")
assert request.path is not None
respond_with_json(
request,
400,
cs_error(
"Cannot find any thumbnails for the requested media ('%s'). This might mean the media is not a supported_media_format=(%s) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)"
% (
request.path.decode(),
", ".join(THUMBNAIL_SUPPORTED_MEDIA_FORMAT_MAP.keys()),
),
code=Codes.UNKNOWN,
),
send_cors=True,
)
def _select_thumbnail(
self,
desired_width: int,
desired_height: int,
desired_method: str,
desired_type: str,
thumbnail_infos: List[ThumbnailInfo],
file_id: str,
url_cache: bool,
server_name: Optional[str],
) -> Optional[FileInfo]:
"""
Choose an appropriate thumbnail from the previously generated thumbnails.
Args:
desired_width: The desired width, the returned thumbnail may be larger than this.
desired_height: The desired height, the returned thumbnail may be larger than this.
desired_method: The desired method used to generate the thumbnail.
desired_type: The desired content-type of the thumbnail.
thumbnail_infos: A list of thumbnail infos of candidate thumbnails.
file_id: The ID of the media that a thumbnail is being requested for.
url_cache: True if this is from a URL cache.
server_name: The server name, if this is a remote thumbnail.
Returns:
The thumbnail which best matches the desired parameters.
"""
desired_method = desired_method.lower()
# The chosen thumbnail.
thumbnail_info = None
d_w = desired_width
d_h = desired_height
if desired_method == "crop":
# Thumbnails that match equal or larger sizes of desired width/height.
crop_info_list: List[
Tuple[int, int, int, bool, Optional[int], ThumbnailInfo]
] = []
# Other thumbnails.
crop_info_list2: List[
Tuple[int, int, int, bool, Optional[int], ThumbnailInfo]
] = []
for info in thumbnail_infos:
# Skip thumbnails generated with different methods.
if info.method != "crop":
continue
t_w = info.width
t_h = info.height
aspect_quality = abs(d_w * t_h - d_h * t_w)
min_quality = 0 if d_w <= t_w and d_h <= t_h else 1
size_quality = abs((d_w - t_w) * (d_h - t_h))
type_quality = desired_type != info.type
length_quality = info.length
if t_w >= d_w or t_h >= d_h:
crop_info_list.append(
(
aspect_quality,
min_quality,
size_quality,
type_quality,
length_quality,
info,
)
)
else:
crop_info_list2.append(
(
aspect_quality,
min_quality,
size_quality,
type_quality,
length_quality,
info,
)
)
# Pick the most appropriate thumbnail. Some values of `desired_width` and
# `desired_height` may result in a tie, in which case we avoid comparing on
# the thumbnail info and pick the thumbnail that appears earlier
# in the list of candidates.
if crop_info_list:
thumbnail_info = min(crop_info_list, key=lambda t: t[:-1])[-1]
elif crop_info_list2:
thumbnail_info = min(crop_info_list2, key=lambda t: t[:-1])[-1]
elif desired_method == "scale":
# Thumbnails that match equal or larger sizes of desired width/height.
info_list: List[Tuple[int, bool, int, ThumbnailInfo]] = []
# Other thumbnails.
info_list2: List[Tuple[int, bool, int, ThumbnailInfo]] = []
for info in thumbnail_infos:
# Skip thumbnails generated with different methods.
if info.method != "scale":
continue
t_w = info.width
t_h = info.height
size_quality = abs((d_w - t_w) * (d_h - t_h))
type_quality = desired_type != info.type
length_quality = info.length
if t_w >= d_w or t_h >= d_h:
info_list.append((size_quality, type_quality, length_quality, info))
else:
info_list2.append(
(size_quality, type_quality, length_quality, info)
)
# Pick the most appropriate thumbnail. Some values of `desired_width` and
# `desired_height` may result in a tie, in which case we avoid comparing on
# the thumbnail info and pick the thumbnail that appears earlier
# in the list of candidates.
if info_list:
thumbnail_info = min(info_list, key=lambda t: t[:-1])[-1]
elif info_list2:
thumbnail_info = min(info_list2, key=lambda t: t[:-1])[-1]
if thumbnail_info:
return FileInfo(
file_id=file_id,
url_cache=url_cache,
server_name=server_name,
thumbnail=thumbnail_info,
)
# No matching thumbnail was found.
return None

View file

@ -0,0 +1,205 @@
#
# This file is licensed under the Affero General Public License (AGPL) version 3.
#
# Copyright 2020 The Matrix.org Foundation C.I.C.
# Copyright 2015, 2016 OpenMarket Ltd
# Copyright (C) 2024 New Vector, Ltd
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# See the GNU Affero General Public License for more details:
# <https://www.gnu.org/licenses/agpl-3.0.html>.
#
# Originally licensed under the Apache License, Version 2.0:
# <http://www.apache.org/licenses/LICENSE-2.0>.
#
# [This file includes modifications made by New Vector Limited]
#
#
import logging
import re
from synapse.http.server import (
HttpServer,
respond_with_json,
respond_with_json_bytes,
set_corp_headers,
set_cors_headers,
)
from synapse.http.servlet import RestServlet, parse_integer, parse_string
from synapse.http.site import SynapseRequest
from synapse.media._base import (
DEFAULT_MAX_TIMEOUT_MS,
MAXIMUM_ALLOWED_MAX_TIMEOUT_MS,
respond_404,
)
from synapse.media.media_repository import MediaRepository
from synapse.media.media_storage import MediaStorage
from synapse.media.thumbnailer import ThumbnailProvider
from synapse.server import HomeServer
from synapse.util.stringutils import parse_and_validate_server_name
logger = logging.getLogger(__name__)
class UnstablePreviewURLServlet(RestServlet):
"""
Same as `GET /_matrix/media/r0/preview_url`, this endpoint provides a generic preview API
for URLs which outputs Open Graph (https://ogp.me/) responses (with some Matrix
specific additions).
This does have trade-offs compared to other designs:
* Pros:
* Simple and flexible; can be used by any clients at any point
* Cons:
* If each homeserver provides one of these independently, all the homeservers in a
room may needlessly DoS the target URI
* The URL metadata must be stored somewhere, rather than just using Matrix
itself to store the media.
* Matrix cannot be used to distribute the metadata between homeservers.
"""
PATTERNS = [
re.compile(r"^/_matrix/client/unstable/org.matrix.msc3916/media/preview_url$")
]
def __init__(
self,
hs: "HomeServer",
media_repo: "MediaRepository",
media_storage: MediaStorage,
):
super().__init__()
self.auth = hs.get_auth()
self.clock = hs.get_clock()
self.media_repo = media_repo
self.media_storage = media_storage
assert self.media_repo.url_previewer is not None
self.url_previewer = self.media_repo.url_previewer
async def on_GET(self, request: SynapseRequest) -> None:
requester = await self.auth.get_user_by_req(request)
url = parse_string(request, "url", required=True)
ts = parse_integer(request, "ts")
if ts is None:
ts = self.clock.time_msec()
og = await self.url_previewer.preview(url, requester.user, ts)
respond_with_json_bytes(request, 200, og, send_cors=True)
class UnstableMediaConfigResource(RestServlet):
PATTERNS = [
re.compile(r"^/_matrix/client/unstable/org.matrix.msc3916/media/config$")
]
def __init__(self, hs: "HomeServer"):
super().__init__()
config = hs.config
self.clock = hs.get_clock()
self.auth = hs.get_auth()
self.limits_dict = {"m.upload.size": config.media.max_upload_size}
async def on_GET(self, request: SynapseRequest) -> None:
await self.auth.get_user_by_req(request)
respond_with_json(request, 200, self.limits_dict, send_cors=True)
class UnstableThumbnailResource(RestServlet):
PATTERNS = [
re.compile(
"/_matrix/client/unstable/org.matrix.msc3916/media/thumbnail/(?P<server_name>[^/]*)/(?P<media_id>[^/]*)$"
)
]
def __init__(
self,
hs: "HomeServer",
media_repo: "MediaRepository",
media_storage: MediaStorage,
):
super().__init__()
self.store = hs.get_datastores().main
self.media_repo = media_repo
self.media_storage = media_storage
self.dynamic_thumbnails = hs.config.media.dynamic_thumbnails
self._is_mine_server_name = hs.is_mine_server_name
self._server_name = hs.hostname
self.prevent_media_downloads_from = hs.config.media.prevent_media_downloads_from
self.thumbnailer = ThumbnailProvider(hs, media_repo, media_storage)
self.auth = hs.get_auth()
async def on_GET(
self, request: SynapseRequest, server_name: str, media_id: str
) -> None:
# Validate the server name, raising if invalid
parse_and_validate_server_name(server_name)
await self.auth.get_user_by_req(request)
set_cors_headers(request)
set_corp_headers(request)
width = parse_integer(request, "width", required=True)
height = parse_integer(request, "height", required=True)
method = parse_string(request, "method", "scale")
# TODO Parse the Accept header to get an prioritised list of thumbnail types.
m_type = "image/png"
max_timeout_ms = parse_integer(
request, "timeout_ms", default=DEFAULT_MAX_TIMEOUT_MS
)
max_timeout_ms = min(max_timeout_ms, MAXIMUM_ALLOWED_MAX_TIMEOUT_MS)
if self._is_mine_server_name(server_name):
if self.dynamic_thumbnails:
await self.thumbnailer.select_or_generate_local_thumbnail(
request, media_id, width, height, method, m_type, max_timeout_ms
)
else:
await self.thumbnailer.respond_local_thumbnail(
request, media_id, width, height, method, m_type, max_timeout_ms
)
self.media_repo.mark_recently_accessed(None, media_id)
else:
# Don't let users download media from configured domains, even if it
# is already downloaded. This is Trust & Safety tooling to make some
# media inaccessible to local users.
# See `prevent_media_downloads_from` config docs for more info.
if server_name in self.prevent_media_downloads_from:
respond_404(request)
return
remote_resp_function = (
self.thumbnailer.select_or_generate_remote_thumbnail
if self.dynamic_thumbnails
else self.thumbnailer.respond_remote_thumbnail
)
await remote_resp_function(
request,
server_name,
media_id,
width,
height,
method,
m_type,
max_timeout_ms,
)
self.media_repo.mark_recently_accessed(server_name, media_id)
def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None:
if hs.config.experimental.msc3916_authenticated_media_enabled:
media_repo = hs.get_media_repository()
if hs.config.media.url_preview_enabled:
UnstablePreviewURLServlet(
hs, media_repo, media_repo.media_storage
).register(http_server)
UnstableMediaConfigResource(hs).register(http_server)
UnstableThumbnailResource(hs, media_repo, media_repo.media_storage).register(
http_server
)

View file

@ -22,23 +22,18 @@
import logging
import re
from typing import TYPE_CHECKING, List, Optional, Tuple
from typing import TYPE_CHECKING
from synapse.api.errors import Codes, SynapseError, cs_error
from synapse.config.repository import THUMBNAIL_SUPPORTED_MEDIA_FORMAT_MAP
from synapse.http.server import respond_with_json, set_corp_headers, set_cors_headers
from synapse.http.server import set_corp_headers, set_cors_headers
from synapse.http.servlet import RestServlet, parse_integer, parse_string
from synapse.http.site import SynapseRequest
from synapse.media._base import (
DEFAULT_MAX_TIMEOUT_MS,
MAXIMUM_ALLOWED_MAX_TIMEOUT_MS,
FileInfo,
ThumbnailInfo,
respond_404,
respond_with_file,
respond_with_responder,
)
from synapse.media.media_storage import MediaStorage
from synapse.media.thumbnailer import ThumbnailProvider
from synapse.util.stringutils import parse_and_validate_server_name
if TYPE_CHECKING:
@ -66,10 +61,11 @@ class ThumbnailResource(RestServlet):
self.store = hs.get_datastores().main
self.media_repo = media_repo
self.media_storage = media_storage
self.dynamic_thumbnails = hs.config.media.dynamic_thumbnails
self._is_mine_server_name = hs.is_mine_server_name
self._server_name = hs.hostname
self.prevent_media_downloads_from = hs.config.media.prevent_media_downloads_from
self.dynamic_thumbnails = hs.config.media.dynamic_thumbnails
self.thumbnail_provider = ThumbnailProvider(hs, media_repo, media_storage)
async def on_GET(
self, request: SynapseRequest, server_name: str, media_id: str
@ -91,11 +87,11 @@ class ThumbnailResource(RestServlet):
if self._is_mine_server_name(server_name):
if self.dynamic_thumbnails:
await self._select_or_generate_local_thumbnail(
await self.thumbnail_provider.select_or_generate_local_thumbnail(
request, media_id, width, height, method, m_type, max_timeout_ms
)
else:
await self._respond_local_thumbnail(
await self.thumbnail_provider.respond_local_thumbnail(
request, media_id, width, height, method, m_type, max_timeout_ms
)
self.media_repo.mark_recently_accessed(None, media_id)
@ -109,9 +105,9 @@ class ThumbnailResource(RestServlet):
return
remote_resp_function = (
self._select_or_generate_remote_thumbnail
self.thumbnail_provider.select_or_generate_remote_thumbnail
if self.dynamic_thumbnails
else self._respond_remote_thumbnail
else self.thumbnail_provider.respond_remote_thumbnail
)
await remote_resp_function(
request,
@ -124,457 +120,3 @@ class ThumbnailResource(RestServlet):
max_timeout_ms,
)
self.media_repo.mark_recently_accessed(server_name, media_id)
async def _respond_local_thumbnail(
self,
request: SynapseRequest,
media_id: str,
width: int,
height: int,
method: str,
m_type: str,
max_timeout_ms: int,
) -> None:
media_info = await self.media_repo.get_local_media_info(
request, media_id, max_timeout_ms
)
if not media_info:
return
thumbnail_infos = await self.store.get_local_media_thumbnails(media_id)
await self._select_and_respond_with_thumbnail(
request,
width,
height,
method,
m_type,
thumbnail_infos,
media_id,
media_id,
url_cache=bool(media_info.url_cache),
server_name=None,
)
async def _select_or_generate_local_thumbnail(
self,
request: SynapseRequest,
media_id: str,
desired_width: int,
desired_height: int,
desired_method: str,
desired_type: str,
max_timeout_ms: int,
) -> None:
media_info = await self.media_repo.get_local_media_info(
request, media_id, max_timeout_ms
)
if not media_info:
return
thumbnail_infos = await self.store.get_local_media_thumbnails(media_id)
for info in thumbnail_infos:
t_w = info.width == desired_width
t_h = info.height == desired_height
t_method = info.method == desired_method
t_type = info.type == desired_type
if t_w and t_h and t_method and t_type:
file_info = FileInfo(
server_name=None,
file_id=media_id,
url_cache=bool(media_info.url_cache),
thumbnail=info,
)
responder = await self.media_storage.fetch_media(file_info)
if responder:
await respond_with_responder(
request, responder, info.type, info.length
)
return
logger.debug("We don't have a thumbnail of that size. Generating")
# Okay, so we generate one.
file_path = await self.media_repo.generate_local_exact_thumbnail(
media_id,
desired_width,
desired_height,
desired_method,
desired_type,
url_cache=bool(media_info.url_cache),
)
if file_path:
await respond_with_file(request, desired_type, file_path)
else:
logger.warning("Failed to generate thumbnail")
raise SynapseError(400, "Failed to generate thumbnail.")
async def _select_or_generate_remote_thumbnail(
self,
request: SynapseRequest,
server_name: str,
media_id: str,
desired_width: int,
desired_height: int,
desired_method: str,
desired_type: str,
max_timeout_ms: int,
) -> None:
media_info = await self.media_repo.get_remote_media_info(
server_name, media_id, max_timeout_ms
)
if not media_info:
respond_404(request)
return
thumbnail_infos = await self.store.get_remote_media_thumbnails(
server_name, media_id
)
file_id = media_info.filesystem_id
for info in thumbnail_infos:
t_w = info.width == desired_width
t_h = info.height == desired_height
t_method = info.method == desired_method
t_type = info.type == desired_type
if t_w and t_h and t_method and t_type:
file_info = FileInfo(
server_name=server_name,
file_id=file_id,
thumbnail=info,
)
responder = await self.media_storage.fetch_media(file_info)
if responder:
await respond_with_responder(
request, responder, info.type, info.length
)
return
logger.debug("We don't have a thumbnail of that size. Generating")
# Okay, so we generate one.
file_path = await self.media_repo.generate_remote_exact_thumbnail(
server_name,
file_id,
media_id,
desired_width,
desired_height,
desired_method,
desired_type,
)
if file_path:
await respond_with_file(request, desired_type, file_path)
else:
logger.warning("Failed to generate thumbnail")
raise SynapseError(400, "Failed to generate thumbnail.")
async def _respond_remote_thumbnail(
self,
request: SynapseRequest,
server_name: str,
media_id: str,
width: int,
height: int,
method: str,
m_type: str,
max_timeout_ms: int,
) -> None:
# TODO: Don't download the whole remote file
# We should proxy the thumbnail from the remote server instead of
# downloading the remote file and generating our own thumbnails.
media_info = await self.media_repo.get_remote_media_info(
server_name, media_id, max_timeout_ms
)
if not media_info:
return
thumbnail_infos = await self.store.get_remote_media_thumbnails(
server_name, media_id
)
await self._select_and_respond_with_thumbnail(
request,
width,
height,
method,
m_type,
thumbnail_infos,
media_id,
media_info.filesystem_id,
url_cache=False,
server_name=server_name,
)
async def _select_and_respond_with_thumbnail(
self,
request: SynapseRequest,
desired_width: int,
desired_height: int,
desired_method: str,
desired_type: str,
thumbnail_infos: List[ThumbnailInfo],
media_id: str,
file_id: str,
url_cache: bool,
server_name: Optional[str] = None,
) -> None:
"""
Respond to a request with an appropriate thumbnail from the previously generated thumbnails.
Args:
request: The incoming request.
desired_width: The desired width, the returned thumbnail may be larger than this.
desired_height: The desired height, the returned thumbnail may be larger than this.
desired_method: The desired method used to generate the thumbnail.
desired_type: The desired content-type of the thumbnail.
thumbnail_infos: A list of thumbnail info of candidate thumbnails.
file_id: The ID of the media that a thumbnail is being requested for.
url_cache: True if this is from a URL cache.
server_name: The server name, if this is a remote thumbnail.
"""
logger.debug(
"_select_and_respond_with_thumbnail: media_id=%s desired=%sx%s (%s) thumbnail_infos=%s",
media_id,
desired_width,
desired_height,
desired_method,
thumbnail_infos,
)
# If `dynamic_thumbnails` is enabled, we expect Synapse to go down a
# different code path to handle it.
assert not self.dynamic_thumbnails
if thumbnail_infos:
file_info = self._select_thumbnail(
desired_width,
desired_height,
desired_method,
desired_type,
thumbnail_infos,
file_id,
url_cache,
server_name,
)
if not file_info:
logger.info("Couldn't find a thumbnail matching the desired inputs")
respond_404(request)
return
# The thumbnail property must exist.
assert file_info.thumbnail is not None
responder = await self.media_storage.fetch_media(file_info)
if responder:
await respond_with_responder(
request,
responder,
file_info.thumbnail.type,
file_info.thumbnail.length,
)
return
# If we can't find the thumbnail we regenerate it. This can happen
# if e.g. we've deleted the thumbnails but still have the original
# image somewhere.
#
# Since we have an entry for the thumbnail in the DB we a) know we
# have have successfully generated the thumbnail in the past (so we
# don't need to worry about repeatedly failing to generate
# thumbnails), and b) have already calculated that appropriate
# width/height/method so we can just call the "generate exact"
# methods.
# First let's check that we do actually have the original image
# still. This will throw a 404 if we don't.
# TODO: We should refetch the thumbnails for remote media.
await self.media_storage.ensure_media_is_in_local_cache(
FileInfo(server_name, file_id, url_cache=url_cache)
)
if server_name:
await self.media_repo.generate_remote_exact_thumbnail(
server_name,
file_id=file_id,
media_id=media_id,
t_width=file_info.thumbnail.width,
t_height=file_info.thumbnail.height,
t_method=file_info.thumbnail.method,
t_type=file_info.thumbnail.type,
)
else:
await self.media_repo.generate_local_exact_thumbnail(
media_id=media_id,
t_width=file_info.thumbnail.width,
t_height=file_info.thumbnail.height,
t_method=file_info.thumbnail.method,
t_type=file_info.thumbnail.type,
url_cache=url_cache,
)
responder = await self.media_storage.fetch_media(file_info)
await respond_with_responder(
request,
responder,
file_info.thumbnail.type,
file_info.thumbnail.length,
)
else:
# This might be because:
# 1. We can't create thumbnails for the given media (corrupted or
# unsupported file type), or
# 2. The thumbnailing process never ran or errored out initially
# when the media was first uploaded (these bugs should be
# reported and fixed).
# Note that we don't attempt to generate a thumbnail now because
# `dynamic_thumbnails` is disabled.
logger.info("Failed to find any generated thumbnails")
assert request.path is not None
respond_with_json(
request,
400,
cs_error(
"Cannot find any thumbnails for the requested media ('%s'). This might mean the media is not a supported_media_format=(%s) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)"
% (
request.path.decode(),
", ".join(THUMBNAIL_SUPPORTED_MEDIA_FORMAT_MAP.keys()),
),
code=Codes.UNKNOWN,
),
send_cors=True,
)
def _select_thumbnail(
self,
desired_width: int,
desired_height: int,
desired_method: str,
desired_type: str,
thumbnail_infos: List[ThumbnailInfo],
file_id: str,
url_cache: bool,
server_name: Optional[str],
) -> Optional[FileInfo]:
"""
Choose an appropriate thumbnail from the previously generated thumbnails.
Args:
desired_width: The desired width, the returned thumbnail may be larger than this.
desired_height: The desired height, the returned thumbnail may be larger than this.
desired_method: The desired method used to generate the thumbnail.
desired_type: The desired content-type of the thumbnail.
thumbnail_infos: A list of thumbnail infos of candidate thumbnails.
file_id: The ID of the media that a thumbnail is being requested for.
url_cache: True if this is from a URL cache.
server_name: The server name, if this is a remote thumbnail.
Returns:
The thumbnail which best matches the desired parameters.
"""
desired_method = desired_method.lower()
# The chosen thumbnail.
thumbnail_info = None
d_w = desired_width
d_h = desired_height
if desired_method == "crop":
# Thumbnails that match equal or larger sizes of desired width/height.
crop_info_list: List[
Tuple[int, int, int, bool, Optional[int], ThumbnailInfo]
] = []
# Other thumbnails.
crop_info_list2: List[
Tuple[int, int, int, bool, Optional[int], ThumbnailInfo]
] = []
for info in thumbnail_infos:
# Skip thumbnails generated with different methods.
if info.method != "crop":
continue
t_w = info.width
t_h = info.height
aspect_quality = abs(d_w * t_h - d_h * t_w)
min_quality = 0 if d_w <= t_w and d_h <= t_h else 1
size_quality = abs((d_w - t_w) * (d_h - t_h))
type_quality = desired_type != info.type
length_quality = info.length
if t_w >= d_w or t_h >= d_h:
crop_info_list.append(
(
aspect_quality,
min_quality,
size_quality,
type_quality,
length_quality,
info,
)
)
else:
crop_info_list2.append(
(
aspect_quality,
min_quality,
size_quality,
type_quality,
length_quality,
info,
)
)
# Pick the most appropriate thumbnail. Some values of `desired_width` and
# `desired_height` may result in a tie, in which case we avoid comparing on
# the thumbnail info and pick the thumbnail that appears earlier
# in the list of candidates.
if crop_info_list:
thumbnail_info = min(crop_info_list, key=lambda t: t[:-1])[-1]
elif crop_info_list2:
thumbnail_info = min(crop_info_list2, key=lambda t: t[:-1])[-1]
elif desired_method == "scale":
# Thumbnails that match equal or larger sizes of desired width/height.
info_list: List[Tuple[int, bool, int, ThumbnailInfo]] = []
# Other thumbnails.
info_list2: List[Tuple[int, bool, int, ThumbnailInfo]] = []
for info in thumbnail_infos:
# Skip thumbnails generated with different methods.
if info.method != "scale":
continue
t_w = info.width
t_h = info.height
size_quality = abs((d_w - t_w) * (d_h - t_h))
type_quality = desired_type != info.type
length_quality = info.length
if t_w >= d_w or t_h >= d_h:
info_list.append((size_quality, type_quality, length_quality, info))
else:
info_list2.append(
(size_quality, type_quality, length_quality, info)
)
# Pick the most appropriate thumbnail. Some values of `desired_width` and
# `desired_height` may result in a tie, in which case we avoid comparing on
# the thumbnail info and pick the thumbnail that appears earlier
# in the list of candidates.
if info_list:
thumbnail_info = min(info_list, key=lambda t: t[:-1])[-1]
elif info_list2:
thumbnail_info = min(info_list2, key=lambda t: t[:-1])[-1]
if thumbnail_info:
return FileInfo(
file_id=file_id,
url_cache=url_cache,
server_name=server_name,
thumbnail=thumbnail_info,
)
# No matching thumbnail was found.
return None

View file

@ -18,6 +18,7 @@
# [This file includes modifications made by New Vector Limited]
#
#
import itertools
import os
import shutil
import tempfile
@ -46,11 +47,11 @@ from synapse.media._base import FileInfo, ThumbnailInfo
from synapse.media.filepath import MediaFilePaths
from synapse.media.media_storage import MediaStorage, ReadableFileWrapper
from synapse.media.storage_provider import FileStorageProviderBackend
from synapse.media.thumbnailer import ThumbnailProvider
from synapse.module_api import ModuleApi
from synapse.module_api.callbacks.spamchecker_callbacks import load_legacy_spam_checkers
from synapse.rest import admin
from synapse.rest.client import login
from synapse.rest.media.thumbnail_resource import ThumbnailResource
from synapse.rest.client import login, media
from synapse.server import HomeServer
from synapse.types import JsonDict, RoomAlias
from synapse.util import Clock
@ -153,68 +154,54 @@ class _TestImage:
is_inline: bool = True
@parameterized_class(
("test_image",),
[
# small png
(
_TestImage(
SMALL_PNG,
b"image/png",
b".png",
unhexlify(
b"89504e470d0a1a0a0000000d4948445200000020000000200806"
b"000000737a7af40000001a49444154789cedc101010000008220"
b"ffaf6e484001000000ef0610200001194334ee0000000049454e"
b"44ae426082"
),
unhexlify(
b"89504e470d0a1a0a0000000d4948445200000001000000010806"
b"0000001f15c4890000000d49444154789c636060606000000005"
b"0001a5f645400000000049454e44ae426082"
),
),
),
# small png with transparency.
(
_TestImage(
unhexlify(
b"89504e470d0a1a0a0000000d49484452000000010000000101000"
b"00000376ef9240000000274524e5300010194fdae0000000a4944"
b"4154789c636800000082008177cd72b60000000049454e44ae426"
b"082"
),
b"image/png",
b".png",
# Note that we don't check the output since it varies across
# different versions of Pillow.
),
),
# small lossless webp
(
_TestImage(
unhexlify(
b"524946461a000000574542505650384c0d0000002f0000001007"
b"1011118888fe0700"
),
b"image/webp",
b".webp",
),
),
# an empty file
(
_TestImage(
b"",
b"image/gif",
b".gif",
expected_found=False,
unable_to_thumbnail=True,
),
),
# An SVG.
(
_TestImage(
b"""<?xml version="1.0"?>
small_png = _TestImage(
SMALL_PNG,
b"image/png",
b".png",
unhexlify(
b"89504e470d0a1a0a0000000d4948445200000020000000200806"
b"000000737a7af40000001a49444154789cedc101010000008220"
b"ffaf6e484001000000ef0610200001194334ee0000000049454e"
b"44ae426082"
),
unhexlify(
b"89504e470d0a1a0a0000000d4948445200000001000000010806"
b"0000001f15c4890000000d49444154789c636060606000000005"
b"0001a5f645400000000049454e44ae426082"
),
)
small_png_with_transparency = _TestImage(
unhexlify(
b"89504e470d0a1a0a0000000d49484452000000010000000101000"
b"00000376ef9240000000274524e5300010194fdae0000000a4944"
b"4154789c636800000082008177cd72b60000000049454e44ae426"
b"082"
),
b"image/png",
b".png",
# Note that we don't check the output since it varies across
# different versions of Pillow.
)
small_lossless_webp = _TestImage(
unhexlify(
b"524946461a000000574542505650384c0d0000002f0000001007" b"1011118888fe0700"
),
b"image/webp",
b".webp",
)
empty_file = _TestImage(
b"",
b"image/gif",
b".gif",
expected_found=False,
unable_to_thumbnail=True,
)
SVG = _TestImage(
b"""<?xml version="1.0"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
@ -223,19 +210,32 @@ class _TestImage:
<circle cx="100" cy="100" r="50" stroke="black"
stroke-width="5" fill="red" />
</svg>""",
b"image/svg",
b".svg",
expected_found=False,
unable_to_thumbnail=True,
is_inline=False,
),
),
],
b"image/svg",
b".svg",
expected_found=False,
unable_to_thumbnail=True,
is_inline=False,
)
test_images = [
small_png,
small_png_with_transparency,
small_lossless_webp,
empty_file,
SVG,
]
urls = [
"_matrix/media/r0/thumbnail",
"_matrix/client/unstable/org.matrix.msc3916/media/thumbnail",
]
@parameterized_class(("test_image", "url"), itertools.product(test_images, urls))
class MediaRepoTests(unittest.HomeserverTestCase):
servlets = [media.register_servlets]
test_image: ClassVar[_TestImage]
hijack_auth = True
user_id = "@test:user"
url: ClassVar[str]
def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
self.fetches: List[
@ -298,6 +298,7 @@ class MediaRepoTests(unittest.HomeserverTestCase):
"config": {"directory": self.storage_path},
}
config["media_storage_providers"] = [provider_config]
config["experimental_features"] = {"msc3916_authenticated_media_enabled": True}
hs = self.setup_test_homeserver(config=config, federation_http_client=client)
@ -502,7 +503,7 @@ class MediaRepoTests(unittest.HomeserverTestCase):
params = "?width=32&height=32&method=scale"
channel = self.make_request(
"GET",
f"/_matrix/media/v3/thumbnail/{self.media_id}{params}",
f"/{self.url}/{self.media_id}{params}",
shorthand=False,
await_result=False,
)
@ -530,7 +531,7 @@ class MediaRepoTests(unittest.HomeserverTestCase):
channel = self.make_request(
"GET",
f"/_matrix/media/v3/thumbnail/{self.media_id}{params}",
f"/{self.url}/{self.media_id}{params}",
shorthand=False,
await_result=False,
)
@ -566,12 +567,11 @@ class MediaRepoTests(unittest.HomeserverTestCase):
params = "?width=32&height=32&method=" + method
channel = self.make_request(
"GET",
f"/_matrix/media/r0/thumbnail/{self.media_id}{params}",
f"/{self.url}/{self.media_id}{params}",
shorthand=False,
await_result=False,
)
self.pump()
headers = {
b"Content-Length": [b"%d" % (len(self.test_image.data))],
b"Content-Type": [self.test_image.content_type],
@ -580,7 +580,6 @@ class MediaRepoTests(unittest.HomeserverTestCase):
(self.test_image.data, (len(self.test_image.data), headers))
)
self.pump()
if expected_found:
self.assertEqual(channel.code, 200)
@ -603,7 +602,7 @@ class MediaRepoTests(unittest.HomeserverTestCase):
channel.json_body,
{
"errcode": "M_UNKNOWN",
"error": "Cannot find any thumbnails for the requested media ('/_matrix/media/r0/thumbnail/example.com/12345'). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)",
"error": f"Cannot find any thumbnails for the requested media ('/{self.url}/example.com/12345'). This might mean the media is not a supported_media_format=(image/jpeg, image/jpg, image/webp, image/gif, image/png) or that thumbnailing failed for some other reason. (Dynamic thumbnails are disabled on this server.)",
},
)
else:
@ -613,7 +612,7 @@ class MediaRepoTests(unittest.HomeserverTestCase):
channel.json_body,
{
"errcode": "M_NOT_FOUND",
"error": "Not found '/_matrix/media/r0/thumbnail/example.com/12345'",
"error": f"Not found '/{self.url}/example.com/12345'",
},
)
@ -625,12 +624,12 @@ class MediaRepoTests(unittest.HomeserverTestCase):
content_type = self.test_image.content_type.decode()
media_repo = self.hs.get_media_repository()
thumbnail_resouce = ThumbnailResource(
thumbnail_provider = ThumbnailProvider(
self.hs, media_repo, media_repo.media_storage
)
self.assertIsNotNone(
thumbnail_resouce._select_thumbnail(
thumbnail_provider._select_thumbnail(
desired_width=desired_size,
desired_height=desired_size,
desired_method=method,

File diff suppressed because it is too large Load diff