mirror of
https://github.com/element-hq/element-web
synced 2024-11-27 19:56:47 +03:00
Fix spurious html tags like <shrug>
* Only render HTML tags in markdown if they're del tags * Consider non-allowed HTML tags as plain text nodes, so a message of just '<shrug>' doesn't need to be sent as HTML * Consequently rewrite isPlaintext to just look at the parse tree rather than making and gutting a renderer to walk the tree (now we're using a library that actually produces a meaningfgul parse tree). * Tweak when we put \n on text output to avoid putting \n on the end of messages. Fixes https://github.com/vector-im/riot-web/issues/3065
This commit is contained in:
parent
63e47d8677
commit
853c89dfdc
1 changed files with 78 additions and 39 deletions
117
src/Markdown.js
117
src/Markdown.js
|
@ -15,6 +15,45 @@ limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import commonmark from 'commonmark';
|
import commonmark from 'commonmark';
|
||||||
|
import escape from 'lodash/escape';
|
||||||
|
|
||||||
|
const ALLOWED_HTML_TAGS = ['del'];
|
||||||
|
|
||||||
|
// These types of node are definitely text
|
||||||
|
const TEXT_NODES = ['text', 'softbreak', 'linebreak', 'paragraph', 'document'];
|
||||||
|
|
||||||
|
function is_allowed_html_tag(node) {
|
||||||
|
// Regex won't work for tags with attrs, but we only
|
||||||
|
// allow <del> anyway.
|
||||||
|
const matches = /^<\/?(.*)>$/.exec(node.literal);
|
||||||
|
if (matches && matches.length == 2) {
|
||||||
|
const tag = matches[1];
|
||||||
|
return ALLOWED_HTML_TAGS.indexOf(tag) > -1;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
function html_if_tag_allowed(node) {
|
||||||
|
if (is_allowed_html_tag(node)) {
|
||||||
|
this.lit(node.literal);
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
|
this.lit(escape(node.literal));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Returns true if the parse output containing the node
|
||||||
|
* comprises multiple block level elements (ie. lines),
|
||||||
|
* or false if it is only a single line.
|
||||||
|
*/
|
||||||
|
function is_multi_line(node) {
|
||||||
|
var par = node;
|
||||||
|
while (par.parent) {
|
||||||
|
par = par.parent;
|
||||||
|
}
|
||||||
|
return par.firstChild != par.lastChild;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Class that wraps commonmark, adding the ability to see whether
|
* Class that wraps commonmark, adding the ability to see whether
|
||||||
|
@ -30,29 +69,26 @@ export default class Markdown {
|
||||||
}
|
}
|
||||||
|
|
||||||
isPlainText() {
|
isPlainText() {
|
||||||
// we determine if the message requires markdown by
|
const walker = this.parsed.walker();
|
||||||
// running the parser on the tokens with a dummy
|
|
||||||
// rendered and seeing if any of the renderer's
|
|
||||||
// functions are called other than those noted below.
|
|
||||||
// TODO: can't we just examine the output of the parser?
|
|
||||||
let is_plain = true;
|
|
||||||
|
|
||||||
function setNotPlain() {
|
let ev;
|
||||||
is_plain = false;
|
while ( (ev = walker.next()) ) {
|
||||||
|
const node = ev.node;
|
||||||
|
if (TEXT_NODES.indexOf(node.type) > -1) {
|
||||||
|
// definitely text
|
||||||
|
continue;
|
||||||
|
} else if (node.type == 'html_inline' || node.type == 'html_block') {
|
||||||
|
// if it's an allowed html tag, we need to render it and therefore
|
||||||
|
// we will need to use HTML. If it's not allowed, it's not HTML since
|
||||||
|
// we'll just be treating it as text.
|
||||||
|
if (is_allowed_html_tag(node)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
return true;
|
||||||
const dummy_renderer = new commonmark.HtmlRenderer();
|
|
||||||
for (const k of Object.keys(commonmark.HtmlRenderer.prototype)) {
|
|
||||||
dummy_renderer[k] = setNotPlain;
|
|
||||||
}
|
|
||||||
// text and paragraph are just text
|
|
||||||
dummy_renderer.text = function(t) { return t; };
|
|
||||||
dummy_renderer.softbreak = function(t) { return t; };
|
|
||||||
dummy_renderer.paragraph = function(t) { return t; };
|
|
||||||
|
|
||||||
dummy_renderer.render(this.parsed);
|
|
||||||
|
|
||||||
return is_plain;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
toHTML() {
|
toHTML() {
|
||||||
|
@ -65,20 +101,27 @@ export default class Markdown {
|
||||||
// 'inline', rather than unnecessarily wrapped in its own
|
// 'inline', rather than unnecessarily wrapped in its own
|
||||||
// p tag. If, however, we have multiple nodes, each gets
|
// p tag. If, however, we have multiple nodes, each gets
|
||||||
// its own p tag to keep them as separate paragraphs.
|
// its own p tag to keep them as separate paragraphs.
|
||||||
var par = node;
|
if (is_multi_line(node)) {
|
||||||
while (par.parent) {
|
|
||||||
par = par.parent;
|
|
||||||
}
|
|
||||||
if (par.firstChild != par.lastChild) {
|
|
||||||
real_paragraph.call(this, node, entering);
|
real_paragraph.call(this, node, entering);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
renderer.html_inline = html_if_tag_allowed;
|
||||||
|
renderer.html_block = function(node) {
|
||||||
|
// as with `paragraph`, we only insert line breaks
|
||||||
|
// if there are multiple lines in the markdown.
|
||||||
|
const isMultiLine = is_multi_line(node);
|
||||||
|
|
||||||
|
if (isMultiLine) this.cr();
|
||||||
|
html_if_tag_allowed.call(this, node);
|
||||||
|
if (isMultiLine) this.cr();
|
||||||
|
}
|
||||||
|
|
||||||
return renderer.render(this.parsed);
|
return renderer.render(this.parsed);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Render the mrkdown message to plain text. That is, essentially
|
* Render the markdown message to plain text. That is, essentially
|
||||||
* just remove any backslashes escaping what would otherwise be
|
* just remove any backslashes escaping what would otherwise be
|
||||||
* markdown syntax
|
* markdown syntax
|
||||||
* (to fix https://github.com/vector-im/riot-web/issues/2870)
|
* (to fix https://github.com/vector-im/riot-web/issues/2870)
|
||||||
|
@ -96,22 +139,18 @@ export default class Markdown {
|
||||||
};
|
};
|
||||||
|
|
||||||
renderer.paragraph = function(node, entering) {
|
renderer.paragraph = function(node, entering) {
|
||||||
// If there is only one top level node, just return the
|
// as with toHTML, only append lines to paragraphs if there are
|
||||||
// bare text: it's a single line of text and so should be
|
// multiple paragraphs
|
||||||
// 'inline', rather than unnecessarily wrapped in its own
|
if (is_multi_line(node)) {
|
||||||
// p tag. If, however, we have multiple nodes, each gets
|
if (!entering && node.next) {
|
||||||
// its own p tag to keep them as separate paragraphs.
|
|
||||||
var par = node;
|
|
||||||
while (par.parent) {
|
|
||||||
node = par;
|
|
||||||
par = par.parent;
|
|
||||||
}
|
|
||||||
if (node != par.lastChild) {
|
|
||||||
if (!entering) {
|
|
||||||
this.lit('\n\n');
|
this.lit('\n\n');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
renderer.html_block = function(node) {
|
||||||
|
this.lit(node.literal);
|
||||||
|
if (is_multi_line(node) && node.next) this.lit('\n\n');
|
||||||
|
}
|
||||||
|
|
||||||
return renderer.render(this.parsed);
|
return renderer.render(this.parsed);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue