2023-09-29 11:39:56 +03:00
|
|
|
// GoToSocial
|
|
|
|
// Copyright (C) GoToSocial Authors admin@gotosocial.org
|
|
|
|
// SPDX-License-Identifier: AGPL-3.0-or-later
|
|
|
|
//
|
|
|
|
// This program is free software: you can redistribute it and/or modify
|
|
|
|
// it under the terms of the GNU Affero General Public License as published by
|
|
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
|
|
// (at your option) any later version.
|
|
|
|
//
|
|
|
|
// This program is distributed in the hope that it will be useful,
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
// GNU Affero General Public License for more details.
|
|
|
|
//
|
|
|
|
// You should have received a copy of the GNU Affero General Public License
|
|
|
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
package text
|
|
|
|
|
|
|
|
import "unicode"
|
|
|
|
|
|
|
|
func isPlausiblyInHashtag(r rune) bool {
|
|
|
|
// Marks are allowed during parsing
|
|
|
|
// prior to normalization, but not after,
|
|
|
|
// since they may be combined into letters
|
|
|
|
// during normalization.
|
|
|
|
return unicode.IsMark(r) ||
|
|
|
|
isPermittedInHashtag(r)
|
|
|
|
}
|
|
|
|
|
|
|
|
func isPermittedInHashtag(r rune) bool {
|
|
|
|
return unicode.IsLetter(r) ||
|
|
|
|
unicode.IsNumber(r) ||
|
|
|
|
r == '_'
|
|
|
|
}
|
|
|
|
|
|
|
|
// isHashtagBoundary returns true if rune r
|
|
|
|
// is a recognized break character for before
|
|
|
|
// or after a #hashtag.
|
|
|
|
func isHashtagBoundary(r rune) bool {
|
2024-12-08 18:03:00 +03:00
|
|
|
switch {
|
|
|
|
|
|
|
|
// Zero width space.
|
|
|
|
case r == '\u200B':
|
|
|
|
return true
|
|
|
|
|
|
|
|
// Zero width no-break space.
|
|
|
|
case r == '\uFEFF':
|
|
|
|
return true
|
|
|
|
|
|
|
|
// Pipe character sometimes
|
|
|
|
// used as workaround.
|
|
|
|
case r == '|':
|
|
|
|
return true
|
|
|
|
|
|
|
|
// Standard Unicode white space.
|
|
|
|
case unicode.IsSpace(r):
|
|
|
|
return true
|
|
|
|
|
|
|
|
// Non-underscore punctuation.
|
|
|
|
case unicode.IsPunct(r) && r != '_':
|
|
|
|
return true
|
|
|
|
|
|
|
|
// Not recognized
|
|
|
|
// hashtag boundary.
|
|
|
|
default:
|
|
|
|
return false
|
|
|
|
}
|
2023-09-29 11:39:56 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// isMentionBoundary returns true if rune r
|
|
|
|
// is a recognized break character for before
|
|
|
|
// or after a @mention.
|
|
|
|
func isMentionBoundary(r rune) bool {
|
|
|
|
return unicode.IsSpace(r) ||
|
|
|
|
unicode.IsPunct(r)
|
|
|
|
}
|