diff --git a/package.json b/package.json index d361f79937..d82ec90c58 100644 --- a/package.json +++ b/package.json @@ -36,7 +36,7 @@ "fuse.js": "^2.2.0", "glob": "^5.0.14", "highlight.js": "^8.9.1", - "linkifyjs": "2.0.0-beta.4", + "linkifyjs": "^2.1.1", "lodash": "^4.13.1", "marked": "^0.3.5", "matrix-js-sdk": "matrix-org/matrix-js-sdk#develop", diff --git a/src/Markdown.js b/src/Markdown.js new file mode 100644 index 0000000000..f7b97cf621 --- /dev/null +++ b/src/Markdown.js @@ -0,0 +1,128 @@ +/* +Copyright 2016 OpenMarket Ltd + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +import marked from 'marked'; + +// marked only applies the default options on the high +// level marked() interface, so we do it here. +const marked_options = Object.assign({}, marked.defaults, { + gfm: true, + tables: true, + breaks: true, + pedantic: false, + sanitize: true, + smartLists: true, + smartypants: false, + xhtml: true, // return self closing tags (ie.
not
) +}); + +/** + * Class that wraps marked, adding the ability to see whether + * a given message actually uses any markdown syntax or whether + * it's plain text. + */ +export default class Markdown { + constructor(input) { + const lexer = new marked.Lexer(marked_options); + this.tokens = lexer.lex(input); + } + + _copyTokens() { + // copy tokens (the parser modifies its input arg) + const tokens_copy = this.tokens.slice(); + // it also has a 'links' property, because this is javascript + // and why wouldn't you have an array that also has properties? + return Object.assign(tokens_copy, this.tokens); + } + + isPlainText() { + // we determine if the message requires markdown by + // running the parser on the tokens with a dummy + // rendered and seeing if any of the renderer's + // functions are called other than those noted below. + // In case you were wondering, no we can't just examine + // the tokens because the tokens we have are only the + // output of the *first* tokenizer: any line-based + // markdown is processed by marked within Parser by + // the 'inline lexer'... + let is_plain = true; + + function setNotPlain() { + is_plain = false; + } + + const dummy_renderer = {}; + for (const k of Object.keys(marked.Renderer.prototype)) { + dummy_renderer[k] = setNotPlain; + } + // text and paragraph are just text + dummy_renderer.text = function(t){return t;} + dummy_renderer.paragraph = function(t){return t;} + + // ignore links where text is just the url: + // this ignores plain URLs that markdown has + // detected whilst preserving markdown syntax links + dummy_renderer.link = function(href, title, text) { + if (text != href) { + is_plain = false; + } + } + + const dummy_options = Object.assign({}, marked_options, { + renderer: dummy_renderer, + }); + const dummy_parser = new marked.Parser(dummy_options); + dummy_parser.parse(this._copyTokens()); + + return is_plain; + } + + toHTML() { + const real_renderer = new marked.Renderer(); + real_renderer.link = function(href, title, text) { + // prevent marked from turning plain URLs + // into links, because its algorithm is fairly + // poor. Let's send plain URLs rather than + // badly linkified ones (the linkifier Vector + // uses on message display is way better, eg. + // handles URLs with closing parens at the end). + if (text == href) { + return href; + } + return marked.Renderer.prototype.apply(this, arguments); + } + + real_renderer.paragraph = (text) => { + // The tokens at the top level are the 'blocks', so if we + // have more than one, there are multiple 'paragraphs'. + // If there is only one top level token, just return the + // bare text: it's a single line of text and so should be + // 'inline', rather than necessarily wrapped in its own + // p tag. If, however, we have multiple tokens, each gets + // its own p tag to keep them as separate paragraphs. + if (this.tokens.length == 1) { + return text; + } + return '

' + text + '

'; + } + + const real_options = Object.assign({}, marked_options, { + renderer: real_renderer, + }); + const real_parser = new marked.Parser(real_options); + return real_parser.parse(this._copyTokens()); + } +} diff --git a/src/components/views/rooms/MessageComposerInputOld.js b/src/components/views/rooms/MessageComposerInputOld.js index 20b57fb246..28e3186c50 100644 --- a/src/components/views/rooms/MessageComposerInputOld.js +++ b/src/components/views/rooms/MessageComposerInputOld.js @@ -15,18 +15,6 @@ */ var React = require("react"); -var marked = require("marked"); -marked.setOptions({ - renderer: new marked.Renderer(), - gfm: true, - tables: true, - breaks: true, - pedantic: false, - sanitize: true, - smartLists: true, - smartypants: false -}); - var MatrixClientPeg = require("../../../MatrixClientPeg"); var SlashCommands = require("../../../SlashCommands"); var Modal = require("../../../Modal"); @@ -35,24 +23,12 @@ var sdk = require('../../../index'); var dis = require("../../../dispatcher"); var KeyCode = require("../../../KeyCode"); +var Markdown = require("../../../Markdown"); var TYPING_USER_TIMEOUT = 10000; var TYPING_SERVER_TIMEOUT = 30000; var MARKDOWN_ENABLED = true; -function mdownToHtml(mdown) { - var html = marked(mdown) || ""; - html = html.trim(); - // strip start and end

tags else you get 'orrible spacing - if (html.indexOf("

") === 0) { - html = html.substring("

".length); - } - if (html.lastIndexOf("

") === (html.length - "

".length)) { - html = html.substring(0, html.length - "

".length); - } - return html; -} - /* * The textInput part of the MessageComposer */ @@ -341,8 +317,15 @@ module.exports = React.createClass({ contentText = contentText.substring(1); } - var htmlText; - if (this.markdownEnabled && (htmlText = mdownToHtml(contentText)) !== contentText) { + let send_markdown = false; + let mdown; + if (this.markdownEnabled) { + mdown = new Markdown(contentText); + send_markdown = !mdown.isPlainText(); + } + + if (send_markdown) { + const htmlText = mdown.toHTML(); sendMessagePromise = isEmote ? MatrixClientPeg.get().sendHtmlEmote(this.props.room.roomId, contentText, htmlText) : MatrixClientPeg.get().sendHtmlMessage(this.props.room.roomId, contentText, htmlText); diff --git a/src/linkify-matrix.js b/src/linkify-matrix.js index 99e4898182..8dceb3a527 100644 --- a/src/linkify-matrix.js +++ b/src/linkify-matrix.js @@ -42,7 +42,13 @@ function matrixLinkify(linkify) { TT.PLUS, TT.NUM, TT.DOMAIN, - TT.TLD + TT.TLD, + TT.UNDERSCORE, + TT.POUND, + + // because 'localhost' is tokenised to the localhost token, + // usernames @localhost:foo.com are otherwise not matched! + TT.LOCALHOST, ]; S_START.on(TT.POUND, S_HASH); @@ -54,6 +60,7 @@ function matrixLinkify(linkify) { S_HASH_NAME.on(TT.COLON, S_HASH_NAME_COLON); S_HASH_NAME_COLON.on(TT.DOMAIN, S_HASH_NAME_COLON_DOMAIN); + S_HASH_NAME_COLON.on(TT.LOCALHOST, S_ROOMALIAS); // accept #foo:localhost S_HASH_NAME_COLON_DOMAIN.on(TT.DOT, S_HASH_NAME_COLON_DOMAIN_DOT); S_HASH_NAME_COLON_DOMAIN_DOT.on(TT.DOMAIN, S_HASH_NAME_COLON_DOMAIN); S_HASH_NAME_COLON_DOMAIN_DOT.on(TT.TLD, S_ROOMALIAS); @@ -75,10 +82,14 @@ function matrixLinkify(linkify) { var username_tokens = [ TT.DOT, + TT.UNDERSCORE, TT.PLUS, TT.NUM, TT.DOMAIN, - TT.TLD + TT.TLD, + + // as in roomname_tokens + TT.LOCALHOST, ]; S_START.on(TT.AT, S_AT); @@ -90,6 +101,7 @@ function matrixLinkify(linkify) { S_AT_NAME.on(TT.COLON, S_AT_NAME_COLON); S_AT_NAME_COLON.on(TT.DOMAIN, S_AT_NAME_COLON_DOMAIN); + S_AT_NAME_COLON.on(TT.LOCALHOST, S_USERID); // accept @foo:localhost S_AT_NAME_COLON_DOMAIN.on(TT.DOT, S_AT_NAME_COLON_DOMAIN_DOT); S_AT_NAME_COLON_DOMAIN_DOT.on(TT.DOMAIN, S_AT_NAME_COLON_DOMAIN); S_AT_NAME_COLON_DOMAIN_DOT.on(TT.TLD, S_USERID);