It's time for MVP-ish language auto-detection

This commit is contained in:
Lim Chee Aun 2024-05-28 17:59:17 +08:00
parent d16221e296
commit 0a4aae51b7
5 changed files with 76 additions and 5 deletions

16
package-lock.json generated
View file

@ -38,6 +38,7 @@
"react-router-dom": "6.6.2",
"string-length": "6.0.0",
"swiped-events": "~1.2.0",
"tinyld": "~1.3.4",
"toastify-js": "~1.12.0",
"uid": "~2.0.2",
"use-debounce": "~10.0.0",
@ -8009,6 +8010,21 @@
"node": ">=10"
}
},
"node_modules/tinyld": {
"version": "1.3.4",
"resolved": "https://registry.npmjs.org/tinyld/-/tinyld-1.3.4.tgz",
"integrity": "sha512-u26CNoaInA4XpDU+8s/6Cq8xHc2T5M4fXB3ICfXPokUQoLzmPgSZU02TAkFwFMJCWTjk53gtkS8pETTreZwCqw==",
"bin": {
"tinyld": "bin/tinyld.js",
"tinyld-heavy": "bin/tinyld-heavy.js",
"tinyld-light": "bin/tinyld-light.js"
},
"engines": {
"node": ">= 12.10.0",
"npm": ">= 6.12.0",
"yarn": ">= 1.20.0"
}
},
"node_modules/to-fast-properties": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/to-fast-properties/-/to-fast-properties-2.0.0.tgz",

View file

@ -40,6 +40,7 @@
"react-router-dom": "6.6.2",
"string-length": "6.0.0",
"swiped-events": "~1.2.0",
"tinyld": "~1.3.4",
"toastify-js": "~1.12.0",
"uid": "~2.0.2",
"use-debounce": "~10.0.0",

View file

@ -15,6 +15,7 @@ import {
} from 'preact/hooks';
import { useHotkeys } from 'react-hotkeys-hook';
import stringLength from 'string-length';
import { detectAll } from 'tinyld/light';
import { uid } from 'uid/single';
import { useDebouncedCallback, useThrottledCallback } from 'use-debounce';
import { useSnapshot } from 'valtio';
@ -635,6 +636,7 @@ function Compose({
const [showEmoji2Picker, setShowEmoji2Picker] = useState(false);
const [showGIFPicker, setShowGIFPicker] = useState(false);
const [autoDetectedLanguages, setAutoDetectedLanguages] = useState(null);
const [topSupportedLanguages, restSupportedLanguages] = useMemo(() => {
const topLanguages = [];
const restLanguages = [];
@ -645,7 +647,8 @@ function Compose({
code === language ||
code === prevLanguage.current ||
code === DEFAULT_LANG ||
contentTranslationHideLanguages.includes(code)
contentTranslationHideLanguages.includes(code) ||
(autoDetectedLanguages?.length && autoDetectedLanguages.includes(code))
) {
topLanguages.push(l);
} else {
@ -661,7 +664,7 @@ function Compose({
commonA.localeCompare(commonB),
);
return [topLanguages, restLanguages];
}, [language]);
}, [language, autoDetectedLanguages]);
const replyToStatusMonthsAgo = useMemo(
() =>
@ -1172,6 +1175,11 @@ function Compose({
setShowMentionPicker({
defaultSearchTerm: action?.defaultSearchTerm || null,
});
} else if (
action?.name === 'auto-detect-language' &&
action?.languages
) {
setAutoDetectedLanguages(action.languages);
}
}}
/>
@ -1354,7 +1362,11 @@ function Compose({
)}
<label
class={`toolbar-button ${
language !== prevLanguage.current ? 'highlight' : ''
language !== prevLanguage.current ||
(autoDetectedLanguages?.length &&
autoDetectedLanguages.includes(language))
? 'highlight'
: ''
}`}
>
<span class="icon-text">
@ -1577,6 +1589,15 @@ const getCustomEmojis = pmem(_getCustomEmojis, {
maxAge: 30 * 60 * 1000, // 30 minutes
});
const detectLangs = (text) => {
const langs = detectAll(text);
if (langs?.length) {
// return max 2
return langs.slice(0, 2).map((lang) => lang.lang);
}
return null;
};
const Textarea = forwardRef((props, ref) => {
const { masto, instance } = api();
const [text, setText] = useState(ref.current?.value || '');
@ -1845,6 +1866,17 @@ const Textarea = forwardRef((props, ref) => {
// Newline to prevent multiple line breaks at the end from being collapsed, no idea why
}, 500);
const debouncedAutoDetectLanguage = useDebouncedCallback((text) => {
if (!text) return;
const langs = detectLangs(text);
if (langs?.length) {
onTrigger?.({
name: 'auto-detect-language',
languages: langs,
});
}
}, 1000);
return (
<text-expander
ref={textExpanderRef}
@ -1912,6 +1944,7 @@ const Textarea = forwardRef((props, ref) => {
autoResizeTextarea(target);
props.onInput?.(e);
throttleHighlightText(text);
debouncedAutoDetectLanguage(text);
}}
style={{
width: '100%',

View file

@ -23,6 +23,7 @@ import {
} from 'preact/hooks';
import punycode from 'punycode';
import { useHotkeys } from 'react-hotkeys-hook';
import { detectAll } from 'tinyld/light';
import { useLongPress } from 'use-long-press';
import { useSnapshot } from 'valtio';
@ -46,6 +47,7 @@ import handleContentLinks from '../utils/handle-content-links';
import htmlContentLength from '../utils/html-content-length';
import isMastodonLinkMaybe from '../utils/isMastodonLinkMaybe';
import localeMatch from '../utils/locale-match';
import mem from '../utils/mem';
import niceDateTime from '../utils/nice-date-time';
import openCompose from '../utils/open-compose';
import pmem from '../utils/pmem';
@ -158,6 +160,18 @@ const SIZE_CLASS = {
l: 'large',
};
const detectLang = mem((text) => {
const langs = detectAll(text);
const lang = langs[0];
if (lang?.lang && lang?.accuracy > 0.5) {
// If > 50% accurate, use it
// It can be accurate if < 50% but better be safe
// Though > 50% also can be inaccurate 🤷
return lang.lang;
}
return null;
});
function Status({
statusID,
status,
@ -242,7 +256,7 @@ function Status({
sensitive,
spoilerText,
visibility, // public, unlisted, private, direct
language,
language: _language,
editedAt,
filtered,
card,
@ -265,6 +279,9 @@ function Status({
emojiReactions,
} = status;
let languageAutoDetected = content && detectLang(getHTMLText(content));
const language = _language || languageAutoDetected;
// if (!mediaAttachments?.length) mediaFirst = false;
const hasMediaAttachments = !!mediaAttachments?.length;
if (mediaFirst && hasMediaAttachments) size = 's';
@ -1898,6 +1915,7 @@ function Status({
forceTranslate={forceTranslate || inlineTranslate}
mini={!isSizeLarge && !withinContext}
sourceLanguage={language}
autoDetected={languageAutoDetected}
text={getPostText(status)}
/>
)}

View file

@ -77,6 +77,7 @@ function TranslationBlock({
onTranslate,
text = '',
mini,
autoDetected,
}) {
const targetLang = getTranslateTargetLanguage(true);
const [uiState, setUIState] = useState('default');
@ -187,7 +188,9 @@ function TranslationBlock({
{uiState === 'loading'
? 'Translating…'
: sourceLanguage && sourceLangText && !detectedLang
? `Translate from ${sourceLangText}`
? autoDetected
? `Translate from ${sourceLangText} (auto-detected)`
: `Translate from ${sourceLangText}`
: `Translate`}
</span>
</button>