mirror of
https://github.com/element-hq/synapse.git
synced 2024-11-22 09:35:45 +03:00
Implement a content type allow list for URL previews (#11936)
This implements an allow list for content types for which Synapse will attempt URL preview. If a URL resolves to a resource with a content type which isn't in the list, the download will terminate immediately. This makes sense given that Synapse would never successfully generate a URL preview for such files in the first place, and helps prevent issues with streaming media servers, such as #8302. Signed-off-by: Denis Kasak dkasak@termina.org.uk
This commit is contained in:
parent
06e5a76322
commit
337f38cac3
4 changed files with 99 additions and 0 deletions
1
changelog.d/11936.bugfix
Normal file
1
changelog.d/11936.bugfix
Normal file
|
@ -0,0 +1 @@
|
|||
Implement an allow list of content types for which we will attempt to preview a URL. This prevents Synapse from making useless longer-lived connections to streaming media servers.
|
|
@ -20,6 +20,7 @@ from typing import (
|
|||
TYPE_CHECKING,
|
||||
Any,
|
||||
BinaryIO,
|
||||
Callable,
|
||||
Dict,
|
||||
Iterable,
|
||||
List,
|
||||
|
@ -693,12 +694,18 @@ class SimpleHttpClient:
|
|||
output_stream: BinaryIO,
|
||||
max_size: Optional[int] = None,
|
||||
headers: Optional[RawHeaders] = None,
|
||||
is_allowed_content_type: Optional[Callable[[str], bool]] = None,
|
||||
) -> Tuple[int, Dict[bytes, List[bytes]], str, int]:
|
||||
"""GETs a file from a given URL
|
||||
Args:
|
||||
url: The URL to GET
|
||||
output_stream: File to write the response body to.
|
||||
headers: A map from header name to a list of values for that header
|
||||
is_allowed_content_type: A predicate to determine whether the
|
||||
content type of the file we're downloading is allowed. If set and
|
||||
it evaluates to False when called with the content type, the
|
||||
request will be terminated before completing the download by
|
||||
raising SynapseError.
|
||||
Returns:
|
||||
A tuple of the file length, dict of the response
|
||||
headers, absolute URI of the response and HTTP response code.
|
||||
|
@ -726,6 +733,17 @@ class SimpleHttpClient:
|
|||
HTTPStatus.BAD_GATEWAY, "Got error %d" % (response.code,), Codes.UNKNOWN
|
||||
)
|
||||
|
||||
if is_allowed_content_type and b"Content-Type" in resp_headers:
|
||||
content_type = resp_headers[b"Content-Type"][0].decode("ascii")
|
||||
if not is_allowed_content_type(content_type):
|
||||
raise SynapseError(
|
||||
HTTPStatus.BAD_GATEWAY,
|
||||
(
|
||||
"Requested file's content type not allowed for this operation: %s"
|
||||
% content_type
|
||||
),
|
||||
)
|
||||
|
||||
# TODO: if our Content-Type is HTML or something, just read the first
|
||||
# N bytes into RAM rather than saving it all to disk only to read it
|
||||
# straight back in again
|
||||
|
|
|
@ -403,6 +403,7 @@ class PreviewUrlResource(DirectServeJsonResource):
|
|||
output_stream=output_stream,
|
||||
max_size=self.max_spider_size,
|
||||
headers={"Accept-Language": self.url_preview_accept_language},
|
||||
is_allowed_content_type=_is_previewable,
|
||||
)
|
||||
except SynapseError:
|
||||
# Pass SynapseErrors through directly, so that the servlet
|
||||
|
@ -761,3 +762,10 @@ def _is_html(content_type: str) -> bool:
|
|||
|
||||
def _is_json(content_type: str) -> bool:
|
||||
return content_type.lower().startswith("application/json")
|
||||
|
||||
|
||||
def _is_previewable(content_type: str) -> bool:
|
||||
"""Returns True for content types for which we will perform URL preview and False
|
||||
otherwise."""
|
||||
|
||||
return _is_html(content_type) or _is_media(content_type) or _is_json(content_type)
|
||||
|
|
|
@ -243,6 +243,78 @@ class URLPreviewTests(unittest.HomeserverTestCase):
|
|||
self.assertEqual(channel.code, 200)
|
||||
self.assertEqual(channel.json_body["og:title"], "\u0434\u043a\u0430")
|
||||
|
||||
def test_video_rejected(self):
|
||||
self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
|
||||
|
||||
end_content = b"anything"
|
||||
|
||||
channel = self.make_request(
|
||||
"GET",
|
||||
"preview_url?url=http://matrix.org",
|
||||
shorthand=False,
|
||||
await_result=False,
|
||||
)
|
||||
self.pump()
|
||||
|
||||
client = self.reactor.tcpClients[0][2].buildProtocol(None)
|
||||
server = AccumulatingProtocol()
|
||||
server.makeConnection(FakeTransport(client, self.reactor))
|
||||
client.makeConnection(FakeTransport(server, self.reactor))
|
||||
client.dataReceived(
|
||||
(
|
||||
b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
|
||||
b"Content-Type: video/mp4\r\n\r\n"
|
||||
)
|
||||
% (len(end_content))
|
||||
+ end_content
|
||||
)
|
||||
|
||||
self.pump()
|
||||
self.assertEqual(channel.code, 502)
|
||||
self.assertEqual(
|
||||
channel.json_body,
|
||||
{
|
||||
"errcode": "M_UNKNOWN",
|
||||
"error": "Requested file's content type not allowed for this operation: video/mp4",
|
||||
},
|
||||
)
|
||||
|
||||
def test_audio_rejected(self):
|
||||
self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
|
||||
|
||||
end_content = b"anything"
|
||||
|
||||
channel = self.make_request(
|
||||
"GET",
|
||||
"preview_url?url=http://matrix.org",
|
||||
shorthand=False,
|
||||
await_result=False,
|
||||
)
|
||||
self.pump()
|
||||
|
||||
client = self.reactor.tcpClients[0][2].buildProtocol(None)
|
||||
server = AccumulatingProtocol()
|
||||
server.makeConnection(FakeTransport(client, self.reactor))
|
||||
client.makeConnection(FakeTransport(server, self.reactor))
|
||||
client.dataReceived(
|
||||
(
|
||||
b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
|
||||
b"Content-Type: audio/aac\r\n\r\n"
|
||||
)
|
||||
% (len(end_content))
|
||||
+ end_content
|
||||
)
|
||||
|
||||
self.pump()
|
||||
self.assertEqual(channel.code, 502)
|
||||
self.assertEqual(
|
||||
channel.json_body,
|
||||
{
|
||||
"errcode": "M_UNKNOWN",
|
||||
"error": "Requested file's content type not allowed for this operation: audio/aac",
|
||||
},
|
||||
)
|
||||
|
||||
def test_non_ascii_preview_content_type(self):
|
||||
self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
|
||||
|
||||
|
|
Loading…
Reference in a new issue