Replace marked with commonmark

Marked has some annoying bugs, and the author is inactive, so replace it
with commonmark.js, which is the reference JavaScript implementation of
CommonMark.  CommonMark is also preferable since it has a specification,
and a conformance test suite to make sure that parsers are correct.

Signed-off-by: Johannes Löthberg <johannes@kyriasis.com>
This commit is contained in:
Johannes Löthberg 2016-11-29 20:56:48 +01:00
parent 03bd4b1457
commit 4d2926485b
2 changed files with 24 additions and 70 deletions

View file

@ -58,7 +58,7 @@
"isomorphic-fetch": "^2.2.1", "isomorphic-fetch": "^2.2.1",
"linkifyjs": "^2.1.3", "linkifyjs": "^2.1.3",
"lodash": "^4.13.1", "lodash": "^4.13.1",
"marked": "^0.3.5", "commonmark": "^0.27.0",
"matrix-js-sdk": "matrix-org/matrix-js-sdk#develop", "matrix-js-sdk": "matrix-org/matrix-js-sdk#develop",
"optimist": "^0.6.1", "optimist": "^0.6.1",
"q": "^1.4.1", "q": "^1.4.1",

View file

@ -14,20 +14,7 @@ See the License for the specific language governing permissions and
limitations under the License. limitations under the License.
*/ */
import marked from 'marked'; import commonmark from 'commonmark';
// marked only applies the default options on the high
// level marked() interface, so we do it here.
const marked_options = Object.assign({}, marked.defaults, {
gfm: true,
tables: true,
breaks: true,
pedantic: false,
sanitize: true,
smartLists: true,
smartypants: false,
xhtml: true, // return self closing tags (ie. <br /> not <br>)
});
/** /**
* Class that wraps marked, adding the ability to see whether * Class that wraps marked, adding the ability to see whether
@ -36,16 +23,7 @@ const marked_options = Object.assign({}, marked.defaults, {
*/ */
export default class Markdown { export default class Markdown {
constructor(input) { constructor(input) {
const lexer = new marked.Lexer(marked_options); this.input = input
this.tokens = lexer.lex(input);
}
_copyTokens() {
// copy tokens (the parser modifies its input arg)
const tokens_copy = this.tokens.slice();
// it also has a 'links' property, because this is javascript
// and why wouldn't you have an array that also has properties?
return Object.assign(tokens_copy, this.tokens);
} }
isPlainText() { isPlainText() {
@ -64,65 +42,41 @@ export default class Markdown {
is_plain = false; is_plain = false;
} }
const dummy_renderer = {}; const dummy_renderer = new commonmark.HtmlRenderer();
for (const k of Object.keys(marked.Renderer.prototype)) { for (const k of Object.keys(commonmark.HtmlRenderer.prototype)) {
dummy_renderer[k] = setNotPlain; dummy_renderer[k] = setNotPlain;
} }
// text and paragraph are just text // text and paragraph are just text
dummy_renderer.text = function(t) { return t; } dummy_renderer.text = function(t) { return t; }
dummy_renderer.paragraph = function(t) { return t; } dummy_renderer.paragraph = function(t) { return t; }
// ignore links where text is just the url: const dummy_parser = new commonmark.Parser();
// this ignores plain URLs that markdown has dummy_renderer.render(dummy_parser.parse(this.input));
// detected whilst preserving markdown syntax links
dummy_renderer.link = function(href, title, text) {
if (text != href) {
is_plain = false;
}
}
const dummy_options = Object.assign({}, marked_options, {
renderer: dummy_renderer,
});
const dummy_parser = new marked.Parser(dummy_options);
dummy_parser.parse(this._copyTokens());
return is_plain; return is_plain;
} }
toHTML() { toHTML() {
const real_renderer = new marked.Renderer(); const parser = new commonmark.Parser();
real_renderer.link = function(href, title, text) {
// prevent marked from turning plain URLs
// into links, because its algorithm is fairly
// poor. Let's send plain URLs rather than
// badly linkified ones (the linkifier Vector
// uses on message display is way better, eg.
// handles URLs with closing parens at the end).
if (text == href) {
return href;
}
return marked.Renderer.prototype.link.apply(this, arguments);
}
real_renderer.paragraph = (text) => { const renderer = new commonmark.HtmlRenderer({safe: true});
// The tokens at the top level are the 'blocks', so if we const real_paragraph = renderer.paragraph;
// have more than one, there are multiple 'paragraphs'. renderer.paragraph = function(node, entering) {
// If there is only one top level token, just return the // If there is only one top level node, just return the
// bare text: it's a single line of text and so should be // bare text: it's a single line of text and so should be
// 'inline', rather than necessarily wrapped in its own // 'inline', rather than unnecessarily wrapped in its own
// p tag. If, however, we have multiple tokens, each gets // p tag. If, however, we have multiple nodes, each gets
// its own p tag to keep them as separate paragraphs. // its own p tag to keep them as separate paragraphs.
if (this.tokens.length == 1) { var par = node;
return text; while (par.parent) {
par = par.parent
}
if (par.firstChild != par.lastChild) {
real_paragraph.bind(this)(node, entering);
} }
return '<p>' + text + '</p>';
} }
const real_options = Object.assign({}, marked_options, { var parsed = parser.parse(this.input);
renderer: real_renderer, return renderer.render(parsed);
});
const real_parser = new marked.Parser(real_options);
return real_parser.parse(this._copyTokens());
} }
} }