forgejo/modules/util/sanitize.go

// Copyright 2021 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package util

import (
	"bytes"
	"unicode"
)

type sanitizedError struct {
	err error
}

func (err sanitizedError) Error() string {
	return SanitizeCredentialURLs(err.err.Error())
}

func (err sanitizedError) Unwrap() error {
	return err.err
}

// SanitizeErrorCredentialURLs wraps the error and make sure the returned error message doesn't contain sensitive credentials in URLs
func SanitizeErrorCredentialURLs(err error) error {
	return sanitizedError{err: err}
}

const userPlaceholder = "sanitized-credential"

var schemeSep = []byte("://")

// SanitizeCredentialURLs remove all credentials in URLs (starting with "scheme://") for the input string: "https://user:pass@domain.com" => "https://sanitized-credential@domain.com"
func SanitizeCredentialURLs(s string) string {
	bs := UnsafeStringToBytes(s)
	schemeSepPos := bytes.Index(bs, schemeSep)
	if schemeSepPos == -1 || bytes.IndexByte(bs[schemeSepPos:], '@') == -1 {
		return s // fast return if there is no URL scheme or no userinfo
	}
	out := make([]byte, 0, len(bs)+len(userPlaceholder))
	for schemeSepPos != -1 {
		schemeSepPos += 3         // skip the "://"
		sepAtPos := -1            // the possible '@' position: "https://foo@[^here]host"
		sepEndPos := schemeSepPos // the possible end position: "The https://host[^here] in log for test"
	sepLoop:
		for ; sepEndPos < len(bs); sepEndPos++ {
			c := bs[sepEndPos]
			if ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('0' <= c && c <= '9') {
				continue
			}
			switch c {
			case '@':
				sepAtPos = sepEndPos
			case '-', '.', '_', '~', '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '%':
				continue // due to RFC 3986, userinfo can contain - . _ ~ ! $ & ' ( ) * + , ; = : and any percent-encoded chars
			default:
				break sepLoop // if it is an invalid char for URL (eg: space, '/', and others), stop the loop
			}
		}
		// if there is '@', and the string is like "s://u@h", then hide the "u" part
		if sepAtPos != -1 && (schemeSepPos >= 4 && unicode.IsLetter(rune(bs[schemeSepPos-4]))) && sepAtPos-schemeSepPos > 0 && sepEndPos-sepAtPos > 0 {
			out = append(out, bs[:schemeSepPos]...)
			out = append(out, userPlaceholder...)
			out = append(out, bs[sepAtPos:sepEndPos]...)
		} else {
			out = append(out, bs[:sepEndPos]...)
		}
		bs = bs[sepEndPos:]
		schemeSepPos = bytes.Index(bs, schemeSep)
	}
	out = append(out, bs...)
	return UnsafeBytesToString(out)
}
Add push to remote mirror repository (#15157) * Added push mirror model. * Integrated push mirror into queue. * Moved methods into own file. * Added basic implementation. * Mirror wiki too. * Removed duplicated method. * Get url for different remotes. * Added migration. * Unified remote url access. * Add/Remove push mirror remotes. * Prevent hangs with missing credentials. * Moved code between files. * Changed sanitizer interface. * Added push mirror backend methods. * Only update the mirror remote. * Limit refs on push. * Added UI part. * Added missing table. * Delete mirror if repository gets removed. * Changed signature. Handle object errors. * Added upload method. * Added "upload" unit tests. * Added transfer adapter unit tests. * Send correct headers. * Added pushing of LFS objects. * Added more logging. * Simpler body handling. * Process files in batches to reduce HTTP calls. * Added created timestamp. * Fixed invalid column name. * Changed name to prevent xorm auto setting. * Remove table header im empty. * Strip exit code from error message. * Added docs page about mirroring. * Fixed date. * Fixed merge errors. * Moved test to integrations. * Added push mirror test. * Added test. 2021-06-14 20:20:43 +03:00			`// Copyright 2021 The Gitea Authors. All rights reserved.`
Implement FSFE REUSE for golang files (#21840) Change all license headers to comply with REUSE specification. Fix #16132 Co-authored-by: flynnnnnnnnnn <flynnnnnnnnnn@github> Co-authored-by: John Olheiser <john.olheiser@gmail.com> 2022-11-27 21:20:29 +03:00			`// SPDX-License-Identifier: MIT`
Fix error message sanitiziation (#3082) 2017-12-04 04:48:03 +03:00
			`package util`

			`import (`
Use a more general (and faster) method to sanitize URLs with credentials (#19239) Use a more general method to sanitize URLs with credentials: Simple and intuitive / Faster / Remove all credentials in all URLs 2022-03-31 05:25:40 +03:00			`"bytes"`
			`"unicode"`
format with gofumpt (#18184) * gofumpt -w -l . * gofumpt -w -l -extra . * Add linter * manual fix * change make fmt 2022-01-20 20:46:10 +03:00			`)`
Add push to remote mirror repository (#15157) * Added push mirror model. * Integrated push mirror into queue. * Moved methods into own file. * Added basic implementation. * Mirror wiki too. * Removed duplicated method. * Get url for different remotes. * Added migration. * Unified remote url access. * Add/Remove push mirror remotes. * Prevent hangs with missing credentials. * Moved code between files. * Changed sanitizer interface. * Added push mirror backend methods. * Only update the mirror remote. * Limit refs on push. * Added UI part. * Added missing table. * Delete mirror if repository gets removed. * Changed signature. Handle object errors. * Added upload method. * Added "upload" unit tests. * Added transfer adapter unit tests. * Send correct headers. * Added pushing of LFS objects. * Added more logging. * Simpler body handling. * Process files in batches to reduce HTTP calls. * Added created timestamp. * Fixed invalid column name. * Changed name to prevent xorm auto setting. * Remove table header im empty. * Strip exit code from error message. * Added docs page about mirroring. * Fixed date. * Fixed merge errors. * Moved test to integrations. * Added push mirror test. * Added test. 2021-06-14 20:20:43 +03:00
			`type sanitizedError struct {`
Use a more general (and faster) method to sanitize URLs with credentials (#19239) Use a more general method to sanitize URLs with credentials: Simple and intuitive / Faster / Remove all credentials in all URLs 2022-03-31 05:25:40 +03:00			`err error`
Fix error message sanitiziation (#3082) 2017-12-04 04:48:03 +03:00			`}`

Add push to remote mirror repository (#15157) * Added push mirror model. * Integrated push mirror into queue. * Moved methods into own file. * Added basic implementation. * Mirror wiki too. * Removed duplicated method. * Get url for different remotes. * Added migration. * Unified remote url access. * Add/Remove push mirror remotes. * Prevent hangs with missing credentials. * Moved code between files. * Changed sanitizer interface. * Added push mirror backend methods. * Only update the mirror remote. * Limit refs on push. * Added UI part. * Added missing table. * Delete mirror if repository gets removed. * Changed signature. Handle object errors. * Added upload method. * Added "upload" unit tests. * Added transfer adapter unit tests. * Send correct headers. * Added pushing of LFS objects. * Added more logging. * Simpler body handling. * Process files in batches to reduce HTTP calls. * Added created timestamp. * Fixed invalid column name. * Changed name to prevent xorm auto setting. * Remove table header im empty. * Strip exit code from error message. * Added docs page about mirroring. * Fixed date. * Fixed merge errors. * Moved test to integrations. * Added push mirror test. * Added test. 2021-06-14 20:20:43 +03:00			`func (err sanitizedError) Error() string {`
Use a more general (and faster) method to sanitize URLs with credentials (#19239) Use a more general method to sanitize URLs with credentials: Simple and intuitive / Faster / Remove all credentials in all URLs 2022-03-31 05:25:40 +03:00			`return SanitizeCredentialURLs(err.err.Error())`
Fix error message sanitiziation (#3082) 2017-12-04 04:48:03 +03:00			`}`

Use a more general (and faster) method to sanitize URLs with credentials (#19239) Use a more general method to sanitize URLs with credentials: Simple and intuitive / Faster / Remove all credentials in all URLs 2022-03-31 05:25:40 +03:00			`func (err sanitizedError) Unwrap() error {`
			`return err.err`
Fix error message sanitiziation (#3082) 2017-12-04 04:48:03 +03:00			`}`

Use a more general (and faster) method to sanitize URLs with credentials (#19239) Use a more general method to sanitize URLs with credentials: Simple and intuitive / Faster / Remove all credentials in all URLs 2022-03-31 05:25:40 +03:00			`// SanitizeErrorCredentialURLs wraps the error and make sure the returned error message doesn't contain sensitive credentials in URLs`
			`func SanitizeErrorCredentialURLs(err error) error {`
			`return sanitizedError{err: err}`
Fix error message sanitiziation (#3082) 2017-12-04 04:48:03 +03:00			`}`

Use a more general (and faster) method to sanitize URLs with credentials (#19239) Use a more general method to sanitize URLs with credentials: Simple and intuitive / Faster / Remove all credentials in all URLs 2022-03-31 05:25:40 +03:00			`const userPlaceholder = "sanitized-credential"`
Add push to remote mirror repository (#15157) * Added push mirror model. * Integrated push mirror into queue. * Moved methods into own file. * Added basic implementation. * Mirror wiki too. * Removed duplicated method. * Get url for different remotes. * Added migration. * Unified remote url access. * Add/Remove push mirror remotes. * Prevent hangs with missing credentials. * Moved code between files. * Changed sanitizer interface. * Added push mirror backend methods. * Only update the mirror remote. * Limit refs on push. * Added UI part. * Added missing table. * Delete mirror if repository gets removed. * Changed signature. Handle object errors. * Added upload method. * Added "upload" unit tests. * Added transfer adapter unit tests. * Send correct headers. * Added pushing of LFS objects. * Added more logging. * Simpler body handling. * Process files in batches to reduce HTTP calls. * Added created timestamp. * Fixed invalid column name. * Changed name to prevent xorm auto setting. * Remove table header im empty. * Strip exit code from error message. * Added docs page about mirroring. * Fixed date. * Fixed merge errors. * Moved test to integrations. * Added push mirror test. * Added test. 2021-06-14 20:20:43 +03:00
Use a more general (and faster) method to sanitize URLs with credentials (#19239) Use a more general method to sanitize URLs with credentials: Simple and intuitive / Faster / Remove all credentials in all URLs 2022-03-31 05:25:40 +03:00			`var schemeSep = []byte("://")`
Add push to remote mirror repository (#15157) * Added push mirror model. * Integrated push mirror into queue. * Moved methods into own file. * Added basic implementation. * Mirror wiki too. * Removed duplicated method. * Get url for different remotes. * Added migration. * Unified remote url access. * Add/Remove push mirror remotes. * Prevent hangs with missing credentials. * Moved code between files. * Changed sanitizer interface. * Added push mirror backend methods. * Only update the mirror remote. * Limit refs on push. * Added UI part. * Added missing table. * Delete mirror if repository gets removed. * Changed signature. Handle object errors. * Added upload method. * Added "upload" unit tests. * Added transfer adapter unit tests. * Send correct headers. * Added pushing of LFS objects. * Added more logging. * Simpler body handling. * Process files in batches to reduce HTTP calls. * Added created timestamp. * Fixed invalid column name. * Changed name to prevent xorm auto setting. * Remove table header im empty. * Strip exit code from error message. * Added docs page about mirroring. * Fixed date. * Fixed merge errors. * Moved test to integrations. * Added push mirror test. * Added test. 2021-06-14 20:20:43 +03:00
Use a more general (and faster) method to sanitize URLs with credentials (#19239) Use a more general method to sanitize URLs with credentials: Simple and intuitive / Faster / Remove all credentials in all URLs 2022-03-31 05:25:40 +03:00			`// SanitizeCredentialURLs remove all credentials in URLs (starting with "scheme://") for the input string: "https://user:pass@domain.com" => "https://sanitized-credential@domain.com"`
			`func SanitizeCredentialURLs(s string) string {`
Refactor to use UnsafeStringToBytes (#31358) The PR replaces all `goldmark/util.BytesToReadOnlyString` with `util.UnsafeBytesToString`, `goldmark/util.StringToReadOnlyBytes` with `util.UnsafeStringToBytes`. This removes one `TODO`. Co-authored-by: wxiaoguang <wxiaoguang@gmail.com> (cherry picked from commit 1761459ebc7eb6d432eced093b4583425a5c5d4b) 2024-06-14 04:26:33 +03:00			`bs := UnsafeStringToBytes(s)`
Use a more general (and faster) method to sanitize URLs with credentials (#19239) Use a more general method to sanitize URLs with credentials: Simple and intuitive / Faster / Remove all credentials in all URLs 2022-03-31 05:25:40 +03:00			`schemeSepPos := bytes.Index(bs, schemeSep)`
			`if schemeSepPos == -1 \|\| bytes.IndexByte(bs[schemeSepPos:], '@') == -1 {`
			`return s // fast return if there is no URL scheme or no userinfo`
Fix error message sanitiziation (#3082) 2017-12-04 04:48:03 +03:00			`}`
Use a more general (and faster) method to sanitize URLs with credentials (#19239) Use a more general method to sanitize URLs with credentials: Simple and intuitive / Faster / Remove all credentials in all URLs 2022-03-31 05:25:40 +03:00			`out := make([]byte, 0, len(bs)+len(userPlaceholder))`
			`for schemeSepPos != -1 {`
			`schemeSepPos += 3 // skip the "://"`
			`sepAtPos := -1 // the possible '@' position: "https://foo@[^here]host"`
			`sepEndPos := schemeSepPos // the possible end position: "The https://host[^here] in log for test"`
			`sepLoop:`
			`for ; sepEndPos < len(bs); sepEndPos++ {`
			`c := bs[sepEndPos]`
			`if ('A' <= c && c <= 'Z') \|\| ('a' <= c && c <= 'z') \|\| ('0' <= c && c <= '9') {`
			`continue`
			`}`
			`switch c {`
			`case '@':`
			`sepAtPos = sepEndPos`
			`case '-', '.', '_', '~', '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '%':`
			`continue // due to RFC 3986, userinfo can contain - . _ ~ ! $ & ' ( ) * + , ; = : and any percent-encoded chars`
			`default:`
			`break sepLoop // if it is an invalid char for URL (eg: space, '/', and others), stop the loop`
			`}`
			`}`
			`// if there is '@', and the string is like "s://u@h", then hide the "u" part`
			`if sepAtPos != -1 && (schemeSepPos >= 4 && unicode.IsLetter(rune(bs[schemeSepPos-4]))) && sepAtPos-schemeSepPos > 0 && sepEndPos-sepAtPos > 0 {`
			`out = append(out, bs[:schemeSepPos]...)`
			`out = append(out, userPlaceholder...)`
			`out = append(out, bs[sepAtPos:sepEndPos]...)`
			`} else {`
			`out = append(out, bs[:sepEndPos]...)`
			`}`
			`bs = bs[sepEndPos:]`
			`schemeSepPos = bytes.Index(bs, schemeSep)`
Add push to remote mirror repository (#15157) * Added push mirror model. * Integrated push mirror into queue. * Moved methods into own file. * Added basic implementation. * Mirror wiki too. * Removed duplicated method. * Get url for different remotes. * Added migration. * Unified remote url access. * Add/Remove push mirror remotes. * Prevent hangs with missing credentials. * Moved code between files. * Changed sanitizer interface. * Added push mirror backend methods. * Only update the mirror remote. * Limit refs on push. * Added UI part. * Added missing table. * Delete mirror if repository gets removed. * Changed signature. Handle object errors. * Added upload method. * Added "upload" unit tests. * Added transfer adapter unit tests. * Send correct headers. * Added pushing of LFS objects. * Added more logging. * Simpler body handling. * Process files in batches to reduce HTTP calls. * Added created timestamp. * Fixed invalid column name. * Changed name to prevent xorm auto setting. * Remove table header im empty. * Strip exit code from error message. * Added docs page about mirroring. * Fixed date. * Fixed merge errors. * Moved test to integrations. * Added push mirror test. * Added test. 2021-06-14 20:20:43 +03:00			`}`
Use a more general (and faster) method to sanitize URLs with credentials (#19239) Use a more general method to sanitize URLs with credentials: Simple and intuitive / Faster / Remove all credentials in all URLs 2022-03-31 05:25:40 +03:00			`out = append(out, bs...)`
Refactor to use UnsafeStringToBytes (#31358) The PR replaces all `goldmark/util.BytesToReadOnlyString` with `util.UnsafeBytesToString`, `goldmark/util.StringToReadOnlyBytes` with `util.UnsafeStringToBytes`. This removes one `TODO`. Co-authored-by: wxiaoguang <wxiaoguang@gmail.com> (cherry picked from commit 1761459ebc7eb6d432eced093b4583425a5c5d4b) 2024-06-14 04:26:33 +03:00			`return UnsafeBytesToString(out)`
Fix error message sanitiziation (#3082) 2017-12-04 04:48:03 +03:00			`}`