Switch from graphemer to Intl.Segmenter (#12697)

Signed-off-by: Michael Telatynski <7t3chguy@gmail.com>
This commit is contained in:
Michael Telatynski 2024-06-26 10:34:07 +01:00 committed by GitHub
parent 95c8aa3d18
commit 86a95cfff7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 15 additions and 19 deletions

View file

@ -96,7 +96,6 @@
"filesize": "10.1.2",
"github-markdown-css": "^5.5.1",
"glob-to-regexp": "^0.4.1",
"graphemer": "^1.4.0",
"highlight.js": "^11.3.1",
"html-entities": "^2.0.0",
"is-ip": "^3.1.0",

View file

@ -26,7 +26,6 @@ import { decode } from "html-entities";
import { IContent } from "matrix-js-sdk/src/matrix";
import { Optional } from "matrix-events-sdk";
import escapeHtml from "escape-html";
import GraphemeSplitter from "graphemer";
import { getEmojiFromUnicode } from "@matrix-org/emojibase-bindings";
import { IExtendedSanitizeOptions } from "./@types/sanitize-html";
@ -34,6 +33,7 @@ import SettingsStore from "./settings/SettingsStore";
import { stripHTMLReply, stripPlainReply } from "./utils/Reply";
import { PERMITTED_URL_SCHEMES } from "./utils/UrlUtils";
import { sanitizeHtmlParams, transformTags } from "./Linkify";
import { graphemeSegmenter } from "./utils/strings";
export { Linkify, linkifyElement, linkifyAndSanitizeHtml } from "./Linkify";
@ -265,17 +265,16 @@ export function formatEmojis(message: string | undefined, isHtmlMessage?: boolea
let text = "";
let key = 0;
const splitter = new GraphemeSplitter();
for (const char of splitter.iterateGraphemes(message)) {
if (EMOJIBASE_REGEX.test(char)) {
for (const data of graphemeSegmenter.segment(message)) {
if (EMOJIBASE_REGEX.test(data.segment)) {
if (text) {
result.push(text);
text = "";
}
result.push(emojiToSpan(char, key));
result.push(emojiToSpan(data.segment, key));
key++;
} else {
text += char;
text += data.segment;
}
}
if (text) {

View file

@ -17,7 +17,6 @@ limitations under the License.
import EMOJIBASE_REGEX from "emojibase-regex";
import { MatrixClient, RoomMember, Room } from "matrix-js-sdk/src/matrix";
import GraphemeSplitter from "graphemer";
import AutocompleteWrapperModel, { GetAutocompleterComponent, UpdateCallback, UpdateQuery } from "./autocomplete";
import { unicodeToShortcode } from "../HtmlUtils";
@ -25,7 +24,7 @@ import * as Avatar from "../Avatar";
import defaultDispatcher from "../dispatcher/dispatcher";
import { Action } from "../dispatcher/actions";
import SettingsStore from "../settings/SettingsStore";
import { getFirstGrapheme } from "../utils/strings";
import { getFirstGrapheme, graphemeSegmenter } from "../utils/strings";
const REGIONAL_EMOJI_SEPARATOR = String.fromCodePoint(0x200b);
@ -650,19 +649,18 @@ export class PartCreator {
const parts: (PlainPart | EmojiPart)[] = [];
let plainText = "";
const splitter = new GraphemeSplitter();
for (const char of splitter.iterateGraphemes(text)) {
if (EMOJIBASE_REGEX.test(char)) {
for (const data of graphemeSegmenter.segment(text)) {
if (EMOJIBASE_REGEX.test(data.segment)) {
if (plainText) {
parts.push(this.plain(plainText));
plainText = "";
}
parts.push(this.emoji(char));
parts.push(this.emoji(data.segment));
if (PartCreator.isRegionalIndicator(text)) {
parts.push(this.plain(REGIONAL_EMOJI_SEPARATOR));
}
} else {
plainText += char;
plainText += data.segment;
}
}
if (plainText) {

View file

@ -21,7 +21,6 @@ limitations under the License.
* @param text the plaintext to put in the user's clipboard
*/
import { logger } from "matrix-js-sdk/src/logger";
import GraphemeSplitter from "graphemer";
export async function copyPlaintext(text: string): Promise<boolean> {
try {
@ -85,6 +84,8 @@ export function getSelectedText(): string {
return window.getSelection()!.toString();
}
export const graphemeSegmenter = new Intl.Segmenter();
/**
* Returns the first grapheme in the given string,
* especially useful for strings containing emoji, will not break compound emoji up.
@ -92,7 +93,6 @@ export function getSelectedText(): string {
* @returns the first grapheme or an empty string if given an empty string
*/
export function getFirstGrapheme(str: string): string {
const splitter = new GraphemeSplitter();
const result = splitter.iterateGraphemes(str).next();
return result.done ? "" : result.value;
const result = graphemeSegmenter.segment(str)[Symbol.iterator]().next();
return result.done ? "" : result.value.segment;
}

View file

@ -12,7 +12,7 @@
"outDir": "./lib",
"declaration": true,
"jsx": "react",
"lib": ["es2021", "dom", "dom.iterable"],
"lib": ["es2022", "dom", "dom.iterable"],
"strict": true
},
"include": [