mirror of
https://github.com/element-hq/element-web
synced 2024-11-25 10:45:51 +03:00
Don't consider textual characters to be emoji (#12582)
* Don't consider textual characters to be emoji We were using emojibase-regex to match emoji within messages. However, the docs (https://emojibase.dev/docs/regex/) state that this regex matches both emoji and text presentation characters. This is not what we want, and will result in false positives for characters like '↔' that could turn into an emoji if paired with a variation selector. Unfortunately, none of the other regexes provided by Emojibase do what we want either (https://github.com/milesj/emojibase/issues/174). In the meantime, browser support for the RGI_Emoji character sequence class has made it feasible to write an emoji regex by hand, so that's what I've done. * Add a fallback for BIGEMOJI_REGEX as well
This commit is contained in:
parent
489bc32674
commit
c61eca8c24
6 changed files with 98 additions and 12 deletions
10
.eslintrc.js
10
.eslintrc.js
|
@ -78,6 +78,11 @@ module.exports = {
|
||||||
name: "matrix-react-sdk/",
|
name: "matrix-react-sdk/",
|
||||||
message: "Please use matrix-react-sdk/src/index instead",
|
message: "Please use matrix-react-sdk/src/index instead",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "emojibase-regex",
|
||||||
|
message:
|
||||||
|
"This regex doesn't actually test for emoji. See the docs at https://emojibase.dev/docs/regex/ and prefer our own EMOJI_REGEX from HtmlUtils.",
|
||||||
|
},
|
||||||
],
|
],
|
||||||
patterns: [
|
patterns: [
|
||||||
{
|
{
|
||||||
|
@ -141,6 +146,11 @@ module.exports = {
|
||||||
],
|
],
|
||||||
message: "Please use matrix-js-sdk/src/matrix instead",
|
message: "Please use matrix-js-sdk/src/matrix instead",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
group: ["emojibase-regex/emoji*"],
|
||||||
|
message:
|
||||||
|
"This regex doesn't actually test for emoji. See the docs at https://emojibase.dev/docs/regex/ and prefer our own EMOJI_REGEX from HtmlUtils.",
|
||||||
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
|
|
|
@ -20,7 +20,6 @@ limitations under the License.
|
||||||
import React, { LegacyRef, ReactNode } from "react";
|
import React, { LegacyRef, ReactNode } from "react";
|
||||||
import sanitizeHtml from "sanitize-html";
|
import sanitizeHtml from "sanitize-html";
|
||||||
import classNames from "classnames";
|
import classNames from "classnames";
|
||||||
import EMOJIBASE_REGEX from "emojibase-regex";
|
|
||||||
import katex from "katex";
|
import katex from "katex";
|
||||||
import { decode } from "html-entities";
|
import { decode } from "html-entities";
|
||||||
import { IContent } from "matrix-js-sdk/src/matrix";
|
import { IContent } from "matrix-js-sdk/src/matrix";
|
||||||
|
@ -46,10 +45,35 @@ const SURROGATE_PAIR_PATTERN = /([\ud800-\udbff])([\udc00-\udfff])/;
|
||||||
const SYMBOL_PATTERN = /([\u2100-\u2bff])/;
|
const SYMBOL_PATTERN = /([\u2100-\u2bff])/;
|
||||||
|
|
||||||
// Regex pattern for non-emoji characters that can appear in an "all-emoji" message
|
// Regex pattern for non-emoji characters that can appear in an "all-emoji" message
|
||||||
// (Zero-Width Joiner, Zero-Width Space, Emoji presentation character, other whitespace)
|
// (Zero-Width Space, other whitespace)
|
||||||
const EMOJI_SEPARATOR_REGEX = /[\u200D\u200B\s]|\uFE0F/g;
|
const EMOJI_SEPARATOR_REGEX = /[\u200B\s]/g;
|
||||||
|
|
||||||
const BIGEMOJI_REGEX = new RegExp(`^(${EMOJIBASE_REGEX.source})+$`, "i");
|
// Regex for emoji. This includes any RGI_Emoji sequence followed by an optional
|
||||||
|
// emoji presentation VS (U+FE0F), but not those sequences that are followed by
|
||||||
|
// a text presentation VS (U+FE0E). We also count lone regional indicators
|
||||||
|
// (U+1F1E6-U+1F1FF). Technically this regex produces false negatives for emoji
|
||||||
|
// followed by U+FE0E when the emoji doesn't have a text variant, but in
|
||||||
|
// practice this doesn't matter.
|
||||||
|
export const EMOJI_REGEX = (() => {
|
||||||
|
try {
|
||||||
|
// Per our support policy, v mode is available to us, but we still don't
|
||||||
|
// want the app to completely crash on older platforms. We use the
|
||||||
|
// constructor here to avoid a syntax error on such platforms.
|
||||||
|
return new RegExp("\\p{RGI_Emoji}(?!\\uFE0E)(?:(?<!\\uFE0F)\\uFE0F)?|[\\u{1f1e6}-\\u{1f1ff}]", "v");
|
||||||
|
} catch (_e) {
|
||||||
|
// v mode not supported; fall back to matching nothing
|
||||||
|
return /(?!)/;
|
||||||
|
}
|
||||||
|
})();
|
||||||
|
|
||||||
|
const BIGEMOJI_REGEX = (() => {
|
||||||
|
try {
|
||||||
|
return new RegExp(`^(${EMOJI_REGEX.source})+$`, "iv");
|
||||||
|
} catch (_e) {
|
||||||
|
// Fall back, just like for EMOJI_REGEX
|
||||||
|
return /(?!)/;
|
||||||
|
}
|
||||||
|
})();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Return true if the given string contains emoji
|
* Return true if the given string contains emoji
|
||||||
|
@ -266,7 +290,7 @@ export function formatEmojis(message: string | undefined, isHtmlMessage?: boolea
|
||||||
let key = 0;
|
let key = 0;
|
||||||
|
|
||||||
for (const data of graphemeSegmenter.segment(message)) {
|
for (const data of graphemeSegmenter.segment(message)) {
|
||||||
if (EMOJIBASE_REGEX.test(data.segment)) {
|
if (EMOJI_REGEX.test(data.segment)) {
|
||||||
if (text) {
|
if (text) {
|
||||||
result.push(text);
|
result.push(text);
|
||||||
text = "";
|
text = "";
|
||||||
|
|
|
@ -15,7 +15,6 @@ limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import React, { createRef, KeyboardEvent, SyntheticEvent } from "react";
|
import React, { createRef, KeyboardEvent, SyntheticEvent } from "react";
|
||||||
import EMOJI_REGEX from "emojibase-regex";
|
|
||||||
import {
|
import {
|
||||||
IContent,
|
IContent,
|
||||||
MatrixEvent,
|
MatrixEvent,
|
||||||
|
@ -70,6 +69,7 @@ import { doMaybeLocalRoomAction } from "../../../utils/local-room";
|
||||||
import { Caret } from "../../../editor/caret";
|
import { Caret } from "../../../editor/caret";
|
||||||
import { IDiff } from "../../../editor/diff";
|
import { IDiff } from "../../../editor/diff";
|
||||||
import { getBlobSafeMimeType } from "../../../utils/blobs";
|
import { getBlobSafeMimeType } from "../../../utils/blobs";
|
||||||
|
import { EMOJI_REGEX } from "../../../HtmlUtils";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Build the mentions information based on the editor model (and any related events):
|
* Build the mentions information based on the editor model (and any related events):
|
||||||
|
|
|
@ -15,11 +15,10 @@ See the License for the specific language governing permissions and
|
||||||
limitations under the License.
|
limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import EMOJIBASE_REGEX from "emojibase-regex";
|
|
||||||
import { MatrixClient, RoomMember, Room } from "matrix-js-sdk/src/matrix";
|
import { MatrixClient, RoomMember, Room } from "matrix-js-sdk/src/matrix";
|
||||||
|
|
||||||
import AutocompleteWrapperModel, { GetAutocompleterComponent, UpdateCallback, UpdateQuery } from "./autocomplete";
|
import AutocompleteWrapperModel, { GetAutocompleterComponent, UpdateCallback, UpdateQuery } from "./autocomplete";
|
||||||
import { unicodeToShortcode } from "../HtmlUtils";
|
import { EMOJI_REGEX, unicodeToShortcode } from "../HtmlUtils";
|
||||||
import * as Avatar from "../Avatar";
|
import * as Avatar from "../Avatar";
|
||||||
import defaultDispatcher from "../dispatcher/dispatcher";
|
import defaultDispatcher from "../dispatcher/dispatcher";
|
||||||
import { Action } from "../dispatcher/actions";
|
import { Action } from "../dispatcher/actions";
|
||||||
|
@ -197,7 +196,7 @@ abstract class BasePart {
|
||||||
|
|
||||||
abstract class PlainBasePart extends BasePart {
|
abstract class PlainBasePart extends BasePart {
|
||||||
protected acceptsInsertion(chr: string, offset: number, inputType: string): boolean {
|
protected acceptsInsertion(chr: string, offset: number, inputType: string): boolean {
|
||||||
if (chr === "\n" || EMOJIBASE_REGEX.test(chr)) {
|
if (chr === "\n" || EMOJI_REGEX.test(chr)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// when not pasting or dropping text, reject characters that should start a pill candidate
|
// when not pasting or dropping text, reject characters that should start a pill candidate
|
||||||
|
@ -375,7 +374,7 @@ class NewlinePart extends BasePart implements IBasePart {
|
||||||
|
|
||||||
export class EmojiPart extends BasePart implements IBasePart {
|
export class EmojiPart extends BasePart implements IBasePart {
|
||||||
protected acceptsInsertion(chr: string, offset: number): boolean {
|
protected acceptsInsertion(chr: string, offset: number): boolean {
|
||||||
return EMOJIBASE_REGEX.test(chr);
|
return EMOJI_REGEX.test(chr);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected acceptsRemoval(position: number, chr: string): boolean {
|
protected acceptsRemoval(position: number, chr: string): boolean {
|
||||||
|
@ -573,7 +572,7 @@ export class PartCreator {
|
||||||
case "\n":
|
case "\n":
|
||||||
return new NewlinePart();
|
return new NewlinePart();
|
||||||
default:
|
default:
|
||||||
if (EMOJIBASE_REGEX.test(getFirstGrapheme(input))) {
|
if (EMOJI_REGEX.test(getFirstGrapheme(input))) {
|
||||||
return new EmojiPart();
|
return new EmojiPart();
|
||||||
}
|
}
|
||||||
return new PlainPart();
|
return new PlainPart();
|
||||||
|
@ -650,7 +649,7 @@ export class PartCreator {
|
||||||
let plainText = "";
|
let plainText = "";
|
||||||
|
|
||||||
for (const data of graphemeSegmenter.segment(text)) {
|
for (const data of graphemeSegmenter.segment(text)) {
|
||||||
if (EMOJIBASE_REGEX.test(data.segment)) {
|
if (EMOJI_REGEX.test(data.segment)) {
|
||||||
if (plainText) {
|
if (plainText) {
|
||||||
parts.push(this.plain(plainText));
|
parts.push(this.plain(plainText));
|
||||||
plainText = "";
|
plainText = "";
|
||||||
|
|
|
@ -107,6 +107,12 @@ describe("bodyToHtml", () => {
|
||||||
expect(html).toMatchInlineSnapshot(`"<span class="mx_EventTile_searchHighlight">test</span> foo <b>bar"`);
|
expect(html).toMatchInlineSnapshot(`"<span class="mx_EventTile_searchHighlight">test</span> foo <b>bar"`);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("generates big emoji for emoji made of multiple characters", () => {
|
||||||
|
const { asFragment } = render(bodyToHtml({ body: "👨👩👧👦 ↔️ 🇮🇸", msgtype: "m.text" }, [], {}) as ReactElement);
|
||||||
|
|
||||||
|
expect(asFragment()).toMatchSnapshot();
|
||||||
|
});
|
||||||
|
|
||||||
it("should generate big emoji for an emoji-only reply to a message", () => {
|
it("should generate big emoji for an emoji-only reply to a message", () => {
|
||||||
const { asFragment } = render(
|
const { asFragment } = render(
|
||||||
bodyToHtml(
|
bodyToHtml(
|
||||||
|
@ -132,6 +138,12 @@ describe("bodyToHtml", () => {
|
||||||
expect(asFragment()).toMatchSnapshot();
|
expect(asFragment()).toMatchSnapshot();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("does not mistake characters in text presentation mode for emoji", () => {
|
||||||
|
const { asFragment } = render(bodyToHtml({ body: "↔ ❗︎", msgtype: "m.text" }, [], {}) as ReactElement);
|
||||||
|
|
||||||
|
expect(asFragment()).toMatchSnapshot();
|
||||||
|
});
|
||||||
|
|
||||||
describe("feature_latex_maths", () => {
|
describe("feature_latex_maths", () => {
|
||||||
beforeEach(() => {
|
beforeEach(() => {
|
||||||
jest.spyOn(SettingsStore, "getValue").mockImplementation((feature) => feature === "feature_latex_maths");
|
jest.spyOn(SettingsStore, "getValue").mockImplementation((feature) => feature === "feature_latex_maths");
|
||||||
|
|
|
@ -1,5 +1,16 @@
|
||||||
// Jest Snapshot v1, https://goo.gl/fbAQLP
|
// Jest Snapshot v1, https://goo.gl/fbAQLP
|
||||||
|
|
||||||
|
exports[`bodyToHtml does not mistake characters in text presentation mode for emoji 1`] = `
|
||||||
|
<DocumentFragment>
|
||||||
|
<span
|
||||||
|
class="mx_EventTile_body"
|
||||||
|
dir="auto"
|
||||||
|
>
|
||||||
|
↔ ❗︎
|
||||||
|
</span>
|
||||||
|
</DocumentFragment>
|
||||||
|
`;
|
||||||
|
|
||||||
exports[`bodyToHtml feature_latex_maths should not mangle code blocks 1`] = `"<p>hello</p><pre><code>$\\xi$</code></pre><p>world</p>"`;
|
exports[`bodyToHtml feature_latex_maths should not mangle code blocks 1`] = `"<p>hello</p><pre><code>$\\xi$</code></pre><p>world</p>"`;
|
||||||
|
|
||||||
exports[`bodyToHtml feature_latex_maths should not mangle divs 1`] = `"<p>hello</p><div>world</div>"`;
|
exports[`bodyToHtml feature_latex_maths should not mangle divs 1`] = `"<p>hello</p><div>world</div>"`;
|
||||||
|
@ -8,6 +19,36 @@ exports[`bodyToHtml feature_latex_maths should render block katex 1`] = `"<p>hel
|
||||||
|
|
||||||
exports[`bodyToHtml feature_latex_maths should render inline katex 1`] = `"hello <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ξ</mi></mrow><annotation encoding="application/x-tex">\\xi</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.8889em;vertical-align:-0.1944em;"></span><span class="mord mathnormal" style="margin-right:0.04601em;">ξ</span></span></span></span> world"`;
|
exports[`bodyToHtml feature_latex_maths should render inline katex 1`] = `"hello <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ξ</mi></mrow><annotation encoding="application/x-tex">\\xi</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.8889em;vertical-align:-0.1944em;"></span><span class="mord mathnormal" style="margin-right:0.04601em;">ξ</span></span></span></span> world"`;
|
||||||
|
|
||||||
|
exports[`bodyToHtml generates big emoji for emoji made of multiple characters 1`] = `
|
||||||
|
<DocumentFragment>
|
||||||
|
<span
|
||||||
|
class="mx_EventTile_body mx_EventTile_bigEmoji"
|
||||||
|
dir="auto"
|
||||||
|
>
|
||||||
|
<span
|
||||||
|
class="mx_Emoji"
|
||||||
|
title=":man-woman-girl-boy:"
|
||||||
|
>
|
||||||
|
👨👩👧👦
|
||||||
|
</span>
|
||||||
|
|
||||||
|
<span
|
||||||
|
class="mx_Emoji"
|
||||||
|
title=":left_right_arrow:"
|
||||||
|
>
|
||||||
|
↔️
|
||||||
|
</span>
|
||||||
|
|
||||||
|
<span
|
||||||
|
class="mx_Emoji"
|
||||||
|
title=":flag-is:"
|
||||||
|
>
|
||||||
|
🇮🇸
|
||||||
|
</span>
|
||||||
|
</span>
|
||||||
|
</DocumentFragment>
|
||||||
|
`;
|
||||||
|
|
||||||
exports[`bodyToHtml should generate big emoji for an emoji-only reply to a message 1`] = `
|
exports[`bodyToHtml should generate big emoji for an emoji-only reply to a message 1`] = `
|
||||||
<DocumentFragment>
|
<DocumentFragment>
|
||||||
<span
|
<span
|
||||||
|
|
Loading…
Reference in a new issue