From 2db53c228493311ad2fd044936af6f930f570270 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Sun, 19 Feb 2017 03:04:42 +0200 Subject: [PATCH 1/5] whitelist data & mxc URIs on img tags: readds PR #333 now that punkave/sanitize-html#137 has landed --- package.json | 2 +- src/HtmlUtils.js | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/package.json b/package.json index a07e2236aa..9b260e341a 100644 --- a/package.json +++ b/package.json @@ -68,7 +68,7 @@ "react-addons-css-transition-group": "15.3.2", "react-dom": "^15.4.0", "react-gemini-scrollbar": "matrix-org/react-gemini-scrollbar#5e97aef", - "sanitize-html": "^1.11.1", + "sanitize-html": "^1.14.1", "text-encoding-utf-8": "^1.0.1", "velocity-vector": "vector-im/velocity#059e3b2", "whatwg-fetch": "^1.0.0" diff --git a/src/HtmlUtils.js b/src/HtmlUtils.js index b9d0ce67e8..8ae2c0a4a8 100644 --- a/src/HtmlUtils.js +++ b/src/HtmlUtils.js @@ -87,7 +87,7 @@ var sanitizeHtmlParams = { // deliberately no h1/h2 to stop people shouting. 'h3', 'h4', 'h5', 'h6', 'blockquote', 'p', 'a', 'ul', 'ol', 'nl', 'li', 'b', 'i', 'u', 'strong', 'em', 'strike', 'code', 'hr', 'br', 'div', - 'table', 'thead', 'caption', 'tbody', 'tr', 'th', 'td', 'pre' + 'table', 'thead', 'caption', 'tbody', 'tr', 'th', 'td', 'pre', 'img', ], allowedAttributes: { // custom ones first: @@ -102,10 +102,10 @@ var sanitizeHtmlParams = { // URL schemes we permit allowedSchemes: ['http', 'https', 'ftp', 'mailto'], - // DO NOT USE. sanitize-html allows all URL starting with '//' - // so this will always allow links to whatever scheme the - // host page is served over. - allowedSchemesByTag: {}, + allowedSchemesByTag: { + img: [ 'data', 'mxc' ], + }, + allowProtocolRelative: false, transformTags: { // custom to matrix // add blank targets to all hyperlinks except vector URLs From 96f5f92c7f8510bc28208bc6a273da22202fad6a Mon Sep 17 00:00:00 2001 From: Luke Barnard Date: Mon, 10 Jul 2017 15:44:41 +0100 Subject: [PATCH 2/5] Disallow data attribute, we don't need it currently --- src/HtmlUtils.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/HtmlUtils.js b/src/HtmlUtils.js index ea72b92eaf..e291632eca 100644 --- a/src/HtmlUtils.js +++ b/src/HtmlUtils.js @@ -153,7 +153,7 @@ const sanitizeHtmlParams = { allowedSchemes: ['http', 'https', 'ftp', 'mailto'], allowedSchemesByTag: { - img: [ 'data', 'mxc' ], + img: ['mxc'], }, allowProtocolRelative: false, From bb9080425a5490fcf0bf26e741727b811485e686 Mon Sep 17 00:00:00 2001 From: Luke Barnard Date: Mon, 10 Jul 2017 16:27:23 +0100 Subject: [PATCH 3/5] Allow image tags with src attributes with schemes http[s] And transform `mxc:*` URLs to `https?://` --- src/HtmlUtils.js | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/HtmlUtils.js b/src/HtmlUtils.js index e291632eca..95e698d6e5 100644 --- a/src/HtmlUtils.js +++ b/src/HtmlUtils.js @@ -23,6 +23,7 @@ var linkifyMatrix = require('./linkify-matrix'); import escape from 'lodash/escape'; import emojione from 'emojione'; import classNames from 'classnames'; +import MatrixClientPeg from './MatrixClientPeg'; emojione.imagePathSVG = 'emojione/svg/'; // Store PNG path for displaying many flags at once (for increased performance over SVG) @@ -141,8 +142,6 @@ const sanitizeHtmlParams = { font: ['color', 'data-mx-bg-color', 'data-mx-color', 'style'], // custom to matrix span: ['data-mx-bg-color', 'data-mx-color', 'style'], // custom to matrix a: ['href', 'name', 'target', 'rel'], // remote target: custom to matrix - // We don't currently allow img itself by default, but this - // would make sense if we did img: ['src'], ol: ['start'], code: ['class'], // We don't actually allow all classes, we filter them in transformTags @@ -153,7 +152,7 @@ const sanitizeHtmlParams = { allowedSchemes: ['http', 'https', 'ftp', 'mailto'], allowedSchemesByTag: { - img: ['mxc'], + img: ['http', 'https'], }, allowProtocolRelative: false, @@ -187,6 +186,16 @@ const sanitizeHtmlParams = { attribs.rel = 'noopener'; // https://mathiasbynens.github.io/rel-noopener/ return { tagName: tagName, attribs : attribs }; }, + 'img': function(tagName, attribs) { + if (attribs.src.startsWith('mxc://')) { + attribs.src = MatrixClientPeg.get().mxcUrlToHttp( + attribs.src, + attribs.width || 800, + attribs.height || 600, + ); + } + return { tagName: tagName, attribs: attribs }; + }, 'code': function(tagName, attribs) { if (typeof attribs.class !== 'undefined') { // Filter out all classes other than ones starting with language- for syntax highlighting. From 6877b9943539b4e0900ad65d4ae77a861d3d8b97 Mon Sep 17 00:00:00 2001 From: Luke Barnard Date: Mon, 10 Jul 2017 17:44:49 +0100 Subject: [PATCH 4/5] Strip ``s when transforming `img`s instead of using `allowedSchemesByTag` --- src/HtmlUtils.js | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/HtmlUtils.js b/src/HtmlUtils.js index 95e698d6e5..1036fbf663 100644 --- a/src/HtmlUtils.js +++ b/src/HtmlUtils.js @@ -151,9 +151,6 @@ const sanitizeHtmlParams = { // URL schemes we permit allowedSchemes: ['http', 'https', 'ftp', 'mailto'], - allowedSchemesByTag: { - img: ['http', 'https'], - }, allowProtocolRelative: false, transformTags: { // custom to matrix @@ -187,13 +184,14 @@ const sanitizeHtmlParams = { return { tagName: tagName, attribs : attribs }; }, 'img': function(tagName, attribs) { - if (attribs.src.startsWith('mxc://')) { - attribs.src = MatrixClientPeg.get().mxcUrlToHttp( - attribs.src, - attribs.width || 800, - attribs.height || 600, - ); + if (!attribs.src.startsWith('mxc://')) { + return { tagName, attribs: {}}; } + attribs.src = MatrixClientPeg.get().mxcUrlToHttp( + attribs.src, + attribs.width || 800, + attribs.height || 600, + ); return { tagName: tagName, attribs: attribs }; }, 'code': function(tagName, attribs) { From dfa97e84523089b0a39de0f769282f756f99f8c1 Mon Sep 17 00:00:00 2001 From: Luke Barnard Date: Mon, 10 Jul 2017 17:48:01 +0100 Subject: [PATCH 5/5] Add comment --- src/HtmlUtils.js | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/HtmlUtils.js b/src/HtmlUtils.js index 1036fbf663..9041e88594 100644 --- a/src/HtmlUtils.js +++ b/src/HtmlUtils.js @@ -184,6 +184,9 @@ const sanitizeHtmlParams = { return { tagName: tagName, attribs : attribs }; }, 'img': function(tagName, attribs) { + // Strip out imgs that aren't `mxc` here instead of using allowedSchemesByTag + // because transformTags is used _before_ we filter by allowedSchemesByTag and + // we don't want to allow images with `https?` `src`s. if (!attribs.src.startsWith('mxc://')) { return { tagName, attribs: {}}; }