forgejo/services/repository/files/patch.go

199 lines
5.5 KiB
Go
Raw Normal View History

// Copyright 2021 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package files
import (
"context"
"fmt"
"strings"
"code.gitea.io/gitea/models"
git_model "code.gitea.io/gitea/models/git"
repo_model "code.gitea.io/gitea/models/repo"
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/structs"
asymkey_service "code.gitea.io/gitea/services/asymkey"
)
// ApplyDiffPatchOptions holds the repository diff patch update options
type ApplyDiffPatchOptions struct {
LastCommitID string
OldBranch string
NewBranch string
Message string
Content string
SHA string
Author *IdentityOptions
Committer *IdentityOptions
Dates *CommitDateOptions
Signoff bool
}
// Validate validates the provided options
func (opts *ApplyDiffPatchOptions) Validate(ctx context.Context, repo *repo_model.Repository, doer *user_model.User) error {
// If no branch name is set, assume master
if opts.OldBranch == "" {
opts.OldBranch = repo.DefaultBranch
}
if opts.NewBranch == "" {
opts.NewBranch = opts.OldBranch
}
gitRepo, closer, err := git.RepositoryFromContextOrOpen(ctx, repo.RepoPath())
if err != nil {
return err
}
defer closer.Close()
// oldBranch must exist for this operation
if _, err := gitRepo.GetBranch(opts.OldBranch); err != nil {
return err
}
// A NewBranch can be specified for the patch to be applied to.
// Check to make sure the branch does not already exist, otherwise we can't proceed.
// If we aren't branching to a new branch, make sure user can commit to the given branch
if opts.NewBranch != opts.OldBranch {
existingBranch, err := gitRepo.GetBranch(opts.NewBranch)
if existingBranch != nil {
return git_model.ErrBranchAlreadyExists{
BranchName: opts.NewBranch,
}
}
if err != nil && !git.IsErrBranchNotExist(err) {
return err
}
} else {
protectedBranch, err := git_model.GetFirstMatchProtectedBranchRule(ctx, repo.ID, opts.OldBranch)
if err != nil {
return err
}
if protectedBranch != nil {
protectedBranch.Repo = repo
if !protectedBranch.CanUserPush(ctx, doer) {
return models.ErrUserCannotCommit{
UserName: doer.LowerName,
}
}
}
if protectedBranch != nil && protectedBranch.RequireSignedCommits {
_, _, _, err := asymkey_service.SignCRUDAction(ctx, repo.RepoPath(), doer, repo.RepoPath(), opts.OldBranch)
if err != nil {
if !asymkey_service.IsErrWontSign(err) {
return err
}
return models.ErrUserCannotCommit{
UserName: doer.LowerName,
}
}
}
}
return nil
}
// ApplyDiffPatch applies a patch to the given repository
func ApplyDiffPatch(ctx context.Context, repo *repo_model.Repository, doer *user_model.User, opts *ApplyDiffPatchOptions) (*structs.FileResponse, error) {
err := repo.MustNotBeArchived()
if err != nil {
return nil, err
}
if err := opts.Validate(ctx, repo, doer); err != nil {
return nil, err
}
message := strings.TrimSpace(opts.Message)
author, committer := GetAuthorAndCommitterUsers(opts.Author, opts.Committer, doer)
t, err := NewTemporaryUploadRepository(ctx, repo)
if err != nil {
log.Error("%v", err)
}
defer t.Close()
Fix reverting a merge commit failing (#28794) Fixes #22236 --- Error occurring currently while trying to revert commit using read-tree -m approach: > 2022/12/26 16:04:43 ...rvices/pull/patch.go:240:AttemptThreeWayMerge() [E] [63a9c61a] Unable to run read-tree -m! Error: exit status 128 - fatal: this operation must be run in a work tree > - fatal: this operation must be run in a work tree We need to clone a non-bare repository for `git read-tree -m` to work. https://github.com/go-gitea/gitea/commit/bb371aee6ecf5e570cdf7b5f7f0d6f47a607a325 adds support to create a non-bare cloned temporary upload repository. After cloning a non-bare temporary upload repository, we [set default index](https://github.com/go-gitea/gitea/blob/main/services/repository/files/cherry_pick.go#L37) (`git read-tree HEAD`). This operation ends up resetting the git index file (see investigation details below), due to which, we need to call `git update-index --refresh` afterward. Here's the diff of the index file before and after we execute SetDefaultIndex: https://www.diffchecker.com/hyOP3eJy/ Notice the **ctime**, **mtime** are set to 0 after SetDefaultIndex. You can reproduce the same behavior using these steps: ```bash $ git clone https://try.gitea.io/me-heer/test.git -s -b main $ cd test $ git read-tree HEAD $ git read-tree -m 1f085d7ed8 1f085d7ed8 9933caed00 error: Entry '1' not uptodate. Cannot merge. ``` After which, we can fix like this: ``` $ git update-index --refresh $ git read-tree -m 1f085d7ed8 1f085d7ed8 9933caed00 ```
2024-01-16 18:06:51 +03:00
if err := t.Clone(opts.OldBranch, true); err != nil {
return nil, err
}
if err := t.SetDefaultIndex(); err != nil {
return nil, err
}
// Get the commit of the original branch
commit, err := t.GetBranchCommit(opts.OldBranch)
if err != nil {
return nil, err // Couldn't get a commit for the branch
}
// Assigned LastCommitID in opts if it hasn't been set
if opts.LastCommitID == "" {
opts.LastCommitID = commit.ID.String()
} else {
lastCommitID, err := t.gitRepo.ConvertToGitID(opts.LastCommitID)
if err != nil {
return nil, fmt.Errorf("ApplyPatch: Invalid last commit ID: %w", err)
}
opts.LastCommitID = lastCommitID.String()
if commit.ID.String() != opts.LastCommitID {
return nil, models.ErrCommitIDDoesNotMatch{
GivenCommitID: opts.LastCommitID,
CurrentCommitID: opts.LastCommitID,
}
}
}
stdout := &strings.Builder{}
stderr := &strings.Builder{}
Refactor git command package to improve security and maintainability (#22678) This PR follows #21535 (and replace #22592) ## Review without space diff https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1 ## Purpose of this PR 1. Make git module command completely safe (risky user inputs won't be passed as argument option anymore) 2. Avoid low-level mistakes like https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918 3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg` type 4. Simplify code when using git command ## The main idea of this PR * Move the `git.CmdArg` to the `internal` package, then no other package except `git` could use it. Then developers could never do `AddArguments(git.CmdArg(userInput))` any more. * Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already trusted arguments. It's only used in a few cases, for example: use git arguments from config file, help unit test with some arguments. * Introduce `AddOptionValues` and `AddOptionFormat`, they make code more clear and simple: * Before: `AddArguments("-m").AddDynamicArguments(message)` * After: `AddOptionValues("-m", message)` * - * Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'", sig.Name, sig.Email)))` * After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)` ## FAQ ### Why these changes were not done in #21535 ? #21535 is mainly a search&replace, it did its best to not change too much logic. Making the framework better needs a lot of changes, so this separate PR is needed as the second step. ### The naming of `AddOptionXxx` According to git's manual, the `--xxx` part is called `option`. ### How can it guarantee that `internal.CmdArg` won't be not misused? Go's specification guarantees that. Trying to access other package's internal package causes compilation error. And, `golangci-lint` also denies the git/internal package. Only the `git/command.go` can use it carefully. ### There is still a `ToTrustedCmdArgs`, will it still allow developers to make mistakes and pass untrusted arguments? Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code will be very complex (see the changes for examples). Then developers and reviewers can know that something might be unreasonable. ### Why there was a `CmdArgCheck` and why it's removed? At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck` was introduced as a hacky patch. Now, almost all code could be written as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for `CmdArgCheck` anymore. ### Why many codes for `signArg == ""` is deleted? Because in the old code, `signArg` could never be empty string, it's either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just dead code. --------- Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 05:30:43 +03:00
cmdApply := git.NewCommand(ctx, "apply", "--index", "--recount", "--cached", "--ignore-whitespace", "--whitespace=fix", "--binary")
if git.CheckGitVersionAtLeast("2.32") == nil {
Refactor git command package to improve security and maintainability (#22678) This PR follows #21535 (and replace #22592) ## Review without space diff https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1 ## Purpose of this PR 1. Make git module command completely safe (risky user inputs won't be passed as argument option anymore) 2. Avoid low-level mistakes like https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918 3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg` type 4. Simplify code when using git command ## The main idea of this PR * Move the `git.CmdArg` to the `internal` package, then no other package except `git` could use it. Then developers could never do `AddArguments(git.CmdArg(userInput))` any more. * Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already trusted arguments. It's only used in a few cases, for example: use git arguments from config file, help unit test with some arguments. * Introduce `AddOptionValues` and `AddOptionFormat`, they make code more clear and simple: * Before: `AddArguments("-m").AddDynamicArguments(message)` * After: `AddOptionValues("-m", message)` * - * Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'", sig.Name, sig.Email)))` * After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)` ## FAQ ### Why these changes were not done in #21535 ? #21535 is mainly a search&replace, it did its best to not change too much logic. Making the framework better needs a lot of changes, so this separate PR is needed as the second step. ### The naming of `AddOptionXxx` According to git's manual, the `--xxx` part is called `option`. ### How can it guarantee that `internal.CmdArg` won't be not misused? Go's specification guarantees that. Trying to access other package's internal package causes compilation error. And, `golangci-lint` also denies the git/internal package. Only the `git/command.go` can use it carefully. ### There is still a `ToTrustedCmdArgs`, will it still allow developers to make mistakes and pass untrusted arguments? Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code will be very complex (see the changes for examples). Then developers and reviewers can know that something might be unreasonable. ### Why there was a `CmdArgCheck` and why it's removed? At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck` was introduced as a hacky patch. Now, almost all code could be written as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for `CmdArgCheck` anymore. ### Why many codes for `signArg == ""` is deleted? Because in the old code, `signArg` could never be empty string, it's either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just dead code. --------- Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 05:30:43 +03:00
cmdApply.AddArguments("-3")
}
Refactor git command package to improve security and maintainability (#22678) This PR follows #21535 (and replace #22592) ## Review without space diff https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1 ## Purpose of this PR 1. Make git module command completely safe (risky user inputs won't be passed as argument option anymore) 2. Avoid low-level mistakes like https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918 3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg` type 4. Simplify code when using git command ## The main idea of this PR * Move the `git.CmdArg` to the `internal` package, then no other package except `git` could use it. Then developers could never do `AddArguments(git.CmdArg(userInput))` any more. * Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already trusted arguments. It's only used in a few cases, for example: use git arguments from config file, help unit test with some arguments. * Introduce `AddOptionValues` and `AddOptionFormat`, they make code more clear and simple: * Before: `AddArguments("-m").AddDynamicArguments(message)` * After: `AddOptionValues("-m", message)` * - * Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'", sig.Name, sig.Email)))` * After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)` ## FAQ ### Why these changes were not done in #21535 ? #21535 is mainly a search&replace, it did its best to not change too much logic. Making the framework better needs a lot of changes, so this separate PR is needed as the second step. ### The naming of `AddOptionXxx` According to git's manual, the `--xxx` part is called `option`. ### How can it guarantee that `internal.CmdArg` won't be not misused? Go's specification guarantees that. Trying to access other package's internal package causes compilation error. And, `golangci-lint` also denies the git/internal package. Only the `git/command.go` can use it carefully. ### There is still a `ToTrustedCmdArgs`, will it still allow developers to make mistakes and pass untrusted arguments? Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code will be very complex (see the changes for examples). Then developers and reviewers can know that something might be unreasonable. ### Why there was a `CmdArgCheck` and why it's removed? At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck` was introduced as a hacky patch. Now, almost all code could be written as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for `CmdArgCheck` anymore. ### Why many codes for `signArg == ""` is deleted? Because in the old code, `signArg` could never be empty string, it's either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just dead code. --------- Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 05:30:43 +03:00
if err := cmdApply.Run(&git.RunOpts{
Dir: t.basePath,
Stdout: stdout,
Stderr: stderr,
Stdin: strings.NewReader(opts.Content),
}); err != nil {
return nil, fmt.Errorf("Error: Stdout: %s\nStderr: %s\nErr: %w", stdout.String(), stderr.String(), err)
}
// Now write the tree
treeHash, err := t.WriteTree()
if err != nil {
return nil, err
}
// Now commit the tree
var commitHash string
if opts.Dates != nil {
commitHash, err = t.CommitTreeWithDate("HEAD", author, committer, treeHash, message, opts.Signoff, opts.Dates.Author, opts.Dates.Committer)
} else {
commitHash, err = t.CommitTree("HEAD", author, committer, treeHash, message, opts.Signoff)
}
if err != nil {
return nil, err
}
// Then push this tree to NewBranch
if err := t.Push(doer, commitHash, opts.NewBranch); err != nil {
return nil, err
}
commit, err = t.GetCommit(commitHash)
if err != nil {
return nil, err
}
fileCommitResponse, _ := GetFileCommitResponse(repo, commit) // ok if fails, then will be nil
Add context cache as a request level cache (#22294) To avoid duplicated load of the same data in an HTTP request, we can set a context cache to do that. i.e. Some pages may load a user from a database with the same id in different areas on the same page. But the code is hidden in two different deep logic. How should we share the user? As a result of this PR, now if both entry functions accept `context.Context` as the first parameter and we just need to refactor `GetUserByID` to reuse the user from the context cache. Then it will not be loaded twice on an HTTP request. But of course, sometimes we would like to reload an object from the database, that's why `RemoveContextData` is also exposed. The core context cache is here. It defines a new context ```go type cacheContext struct { ctx context.Context data map[any]map[any]any lock sync.RWMutex } var cacheContextKey = struct{}{} func WithCacheContext(ctx context.Context) context.Context { return context.WithValue(ctx, cacheContextKey, &cacheContext{ ctx: ctx, data: make(map[any]map[any]any), }) } ``` Then you can use the below 4 methods to read/write/del the data within the same context. ```go func GetContextData(ctx context.Context, tp, key any) any func SetContextData(ctx context.Context, tp, key, value any) func RemoveContextData(ctx context.Context, tp, key any) func GetWithContextCache[T any](ctx context.Context, cacheGroupKey string, cacheTargetID any, f func() (T, error)) (T, error) ``` Then let's take a look at how `system.GetString` implement it. ```go func GetSetting(ctx context.Context, key string) (string, error) { return cache.GetWithContextCache(ctx, contextCacheKey, key, func() (string, error) { return cache.GetString(genSettingCacheKey(key), func() (string, error) { res, err := GetSettingNoCache(ctx, key) if err != nil { return "", err } return res.SettingValue, nil }) }) } ``` First, it will check if context data include the setting object with the key. If not, it will query from the global cache which may be memory or a Redis cache. If not, it will get the object from the database. In the end, if the object gets from the global cache or database, it will be set into the context cache. An object stored in the context cache will only be destroyed after the context disappeared.
2023-02-15 16:37:34 +03:00
verification := GetPayloadCommitVerification(ctx, commit)
fileResponse := &structs.FileResponse{
Commit: fileCommitResponse,
Verification: verification,
}
return fileResponse, nil
}