From dfe3ffc581eb2fb2137aa9f1530d1296bc6a2801 Mon Sep 17 00:00:00 2001 From: Gusted Date: Wed, 30 Oct 2024 15:59:48 +0000 Subject: [PATCH] feat: harden localization against malicious HTML (#5703) - Add a new script that proccess the localization files and verify that they only contain HTML according to our strictly defined rules. - This should make adding malicious HTML near-impossible. Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/5703 Reviewed-by: 0ko <0ko@noreply.codeberg.org> Co-authored-by: Gusted Co-committed-by: Gusted --- Makefile | 6 +- build/lint-locale.go | 156 ++++++++++++++++++ build/lint-locale_test.go | 65 ++++++++ options/locale/locale_ar.ini | 6 +- options/locale/locale_bg.ini | 6 +- options/locale/locale_cs-CZ.ini | 10 +- options/locale/locale_de-DE.ini | 10 +- options/locale/locale_el-GR.ini | 10 +- options/locale/locale_en-US.ini | 10 +- options/locale/locale_eo.ini | 1 - options/locale/locale_es-ES.ini | 10 +- options/locale/locale_fa-IR.ini | 7 +- options/locale/locale_fi-FI.ini | 6 +- options/locale/locale_fil.ini | 10 +- options/locale/locale_fr-FR.ini | 12 +- options/locale/locale_gl.ini | 3 +- options/locale/locale_hu-HU.ini | 5 +- options/locale/locale_id-ID.ini | 5 +- options/locale/locale_is-IS.ini | 3 +- options/locale/locale_it-IT.ini | 11 +- options/locale/locale_ja-JP.ini | 11 +- options/locale/locale_ko-KR.ini | 5 +- options/locale/locale_lv-LV.ini | 9 +- options/locale/locale_nds.ini | 10 +- options/locale/locale_nl-NL.ini | 10 +- options/locale/locale_pl-PL.ini | 4 +- options/locale/locale_pt-BR.ini | 11 +- options/locale/locale_pt-PT.ini | 10 +- options/locale/locale_ru-RU.ini | 10 +- options/locale/locale_si-LK.ini | 21 ++- options/locale/locale_sk-SK.ini | 3 +- options/locale/locale_sl.ini | 2 +- options/locale/locale_sr-SP.ini | 2 +- options/locale/locale_sv-SE.ini | 4 +- options/locale/locale_tr-TR.ini | 9 +- options/locale/locale_uk-UA.ini | 6 +- options/locale/locale_zh-CN.ini | 10 +- options/locale/locale_zh-HK.ini | 2 +- options/locale/locale_zh-TW.ini | 11 +- templates/repo/editor/commit_form.tmpl | 2 +- .../repo/issue/view_content/comments.tmpl | 2 +- templates/repo/issue/view_title.tmpl | 4 +- templates/user/settings/applications.tmpl | 2 +- 43 files changed, 361 insertions(+), 151 deletions(-) create mode 100644 build/lint-locale.go create mode 100644 build/lint-locale_test.go diff --git a/Makefile b/Makefile index 62cf4e536d..f9c02cfbf3 100644 --- a/Makefile +++ b/Makefile @@ -418,7 +418,7 @@ lint-frontend: lint-js lint-css lint-frontend-fix: lint-js-fix lint-css-fix .PHONY: lint-backend -lint-backend: lint-go lint-go-vet lint-editorconfig lint-renovate +lint-backend: lint-go lint-go-vet lint-editorconfig lint-renovate lint-locale .PHONY: lint-backend-fix lint-backend-fix: lint-go-fix lint-go-vet lint-editorconfig @@ -461,6 +461,10 @@ lint-renovate: node_modules @if grep --quiet --extended-regexp -e '^( WARN:|ERROR:)' .lint-renovate ; then cat .lint-renovate ; rm .lint-renovate ; exit 1 ; fi @rm .lint-renovate +.PHONY: lint-locale +lint-locale: + $(GO) run build/lint-locale.go + .PHONY: lint-md lint-md: node_modules npx markdownlint docs *.md diff --git a/build/lint-locale.go b/build/lint-locale.go new file mode 100644 index 0000000000..d403eaa70d --- /dev/null +++ b/build/lint-locale.go @@ -0,0 +1,156 @@ +// Copyright 2024 The Forgejo Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +//nolint:forbidigo +package main + +import ( + "fmt" + "html" + "io/fs" + "os" + "path/filepath" + "regexp" + "slices" + "strings" + + "github.com/microcosm-cc/bluemonday" + "github.com/sergi/go-diff/diffmatchpatch" + "gopkg.in/ini.v1" //nolint:depguard +) + +var ( + policy *bluemonday.Policy + tagRemover *strings.Replacer + safeURL = "https://TO-BE-REPLACED.COM" + + // Matches href="", href="#", href="%s", href="#%s", href="%[1]s" and href="#%[1]s". + placeHolderRegex = regexp.MustCompile(`href="#?(%s|%\[\d\]s)?"`) +) + +func initBlueMondayPolicy() { + policy = bluemonday.NewPolicy() + + policy.RequireParseableURLs(true) + policy.AllowURLSchemes("https") + + // Only allow safe URL on href. + // Only allow target="_blank". + // Only allow rel="nopener noreferrer", rel="noopener" and rel="noreferrer". + // Only allow placeholder on id and class. + policy.AllowAttrs("href").Matching(regexp.MustCompile("^" + regexp.QuoteMeta(safeURL) + "$")).OnElements("a") + policy.AllowAttrs("target").Matching(regexp.MustCompile("^_blank$")).OnElements("a") + policy.AllowAttrs("rel").Matching(regexp.MustCompile("^(noopener|noreferrer|noopener noreferrer)$")).OnElements("a") + policy.AllowAttrs("id", "class").Matching(regexp.MustCompile(`^%s|%\[\d\]s$`)).OnElements("a") + + // Only allow positional placeholder as class. + positionalPlaceholderRe := regexp.MustCompile(`^%\[\d\]s$`) + policy.AllowAttrs("class").Matching(positionalPlaceholderRe).OnElements("strong") + policy.AllowAttrs("id").Matching(positionalPlaceholderRe).OnElements("code") + + // Allowed elements with no attributes. Must be a recognized tagname. + policy.AllowElements("strong", "br", "b", "strike", "code", "i") + + // TODO: Remove in `actions.workflow.dispatch.trigger_found`. + policy.AllowNoAttrs().OnElements("c") +} + +func initRemoveTags() { + oldnew := []string{} + for _, el := range []string{ + "email@example.com", "correu@example.com", "epasts@domens.lv", "email@exemplo.com", "eposta@ornek.com", "email@példa.hu", "email@esempio.it", + "user", "utente", "lietotājs", "gebruiker", "usuário", "Benutzer", "Bruker", + "server", "servidor", "kiszolgáló", "serveris", + "label", "etichetta", "etiķete", "rótulo", "Label", "utilizador", + "filename", "bestandsnaam", "dosyaadi", "fails", "nome do arquivo", + } { + oldnew = append(oldnew, "<"+el+">", "REPLACED-TAG") + } + + tagRemover = strings.NewReplacer(oldnew...) +} + +func preprocessTranslationValue(value string) string { + // href should be a parsable URL, replace placeholder strings with a safe url. + value = placeHolderRegex.ReplaceAllString(value, `href="`+safeURL+`"`) + + // Remove tags that aren't tags but will be parsed as tags. We already know they are safe and sound. + value = tagRemover.Replace(value) + + return value +} + +func checkLocaleContent(localeContent []byte) []string { + // Same configuration as Forgejo uses. + cfg := ini.Empty(ini.LoadOptions{ + IgnoreContinuation: true, + }) + cfg.NameMapper = ini.SnackCase + + if err := cfg.Append(localeContent); err != nil { + panic(err) + } + + dmp := diffmatchpatch.New() + errors := []string{} + + for _, section := range cfg.Sections() { + for _, key := range section.Keys() { + var trKey string + if section.Name() == "" || section.Name() == "DEFAULT" || section.Name() == "common" { + trKey = key.Name() + } else { + trKey = section.Name() + "." + key.Name() + } + + keyValue := preprocessTranslationValue(key.Value()) + + if html.UnescapeString(policy.Sanitize(keyValue)) != keyValue { + // Create a nice diff of the difference. + diffs := dmp.DiffMain(keyValue, html.UnescapeString(policy.Sanitize(keyValue)), false) + diffs = dmp.DiffCleanupSemantic(diffs) + diffs = dmp.DiffCleanupEfficiency(diffs) + + errors = append(errors, trKey+": "+dmp.DiffPrettyText(diffs)) + } + } + } + return errors +} + +func main() { + initBlueMondayPolicy() + initRemoveTags() + + localeDir := filepath.Join("options", "locale") + localeFiles, err := os.ReadDir(localeDir) + if err != nil { + panic(err) + } + + if !slices.ContainsFunc(localeFiles, func(e fs.DirEntry) bool { return strings.HasSuffix(e.Name(), ".ini") }) { + fmt.Println("No locale files found") + os.Exit(1) + } + + exitCode := 0 + for _, localeFile := range localeFiles { + if !strings.HasSuffix(localeFile.Name(), ".ini") { + continue + } + + localeContent, err := os.ReadFile(filepath.Join(localeDir, localeFile.Name())) + if err != nil { + panic(err) + } + + if err := checkLocaleContent(localeContent); len(err) > 0 { + fmt.Println(localeFile.Name()) + fmt.Println(strings.Join(err, "\n")) + fmt.Println() + exitCode = 1 + } + } + + os.Exit(exitCode) +} diff --git a/build/lint-locale_test.go b/build/lint-locale_test.go new file mode 100644 index 0000000000..b33dc9af2b --- /dev/null +++ b/build/lint-locale_test.go @@ -0,0 +1,65 @@ +// Copyright 2024 The Forgejo Authors. All rights reserved. +// SPDX-License-Identifier: MIT +package main + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestLocalizationPolicy(t *testing.T) { + initBlueMondayPolicy() + initRemoveTags() + + t.Run("Remove tags", func(t *testing.T) { + assert.Empty(t, checkLocaleContent([]byte(`hidden_comment_types_description = Comment types checked here will not be shown inside issue pages. Checking "Label" for example removes all " added/removed