2022-05-07 20:05:52 +03:00
// Copyright 2021 Gitea. All rights reserved.
2022-11-27 21:20:29 +03:00
// SPDX-License-Identifier: MIT
2022-05-07 20:05:52 +03:00
package automerge
import (
"context"
"errors"
"fmt"
2022-05-08 16:46:34 +03:00
"code.gitea.io/gitea/models/db"
2022-06-13 12:37:59 +03:00
issues_model "code.gitea.io/gitea/models/issues"
2022-05-11 13:09:36 +03:00
access_model "code.gitea.io/gitea/models/perm/access"
2022-05-07 20:05:52 +03:00
pull_model "code.gitea.io/gitea/models/pull"
repo_model "code.gitea.io/gitea/models/repo"
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/git"
Simplify how git repositories are opened (#28937)
## Purpose
This is a refactor toward building an abstraction over managing git
repositories.
Afterwards, it does not matter anymore if they are stored on the local
disk or somewhere remote.
## What this PR changes
We used `git.OpenRepository` everywhere previously.
Now, we should split them into two distinct functions:
Firstly, there are temporary repositories which do not change:
```go
git.OpenRepository(ctx, diskPath)
```
Gitea managed repositories having a record in the database in the
`repository` table are moved into the new package `gitrepo`:
```go
gitrepo.OpenRepository(ctx, repo_model.Repo)
```
Why is `repo_model.Repository` the second parameter instead of file
path?
Because then we can easily adapt our repository storage strategy.
The repositories can be stored locally, however, they could just as well
be stored on a remote server.
## Further changes in other PRs
- A Git Command wrapper on package `gitrepo` could be created. i.e.
`NewCommand(ctx, repo_model.Repository, commands...)`. `git.RunOpts{Dir:
repo.RepoPath()}`, the directory should be empty before invoking this
method and it can be filled in the function only. #28940
- Remove the `RepoPath()`/`WikiPath()` functions to reduce the
possibility of mistakes.
---------
Co-authored-by: delvh <dev.lh@web.de>
2024-01-27 23:09:51 +03:00
"code.gitea.io/gitea/modules/gitrepo"
2022-05-07 20:05:52 +03:00
"code.gitea.io/gitea/modules/graceful"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/process"
"code.gitea.io/gitea/modules/queue"
2024-05-21 18:23:22 +03:00
notify_service "code.gitea.io/gitea/services/notify"
2022-05-07 20:05:52 +03:00
pull_service "code.gitea.io/gitea/services/pull"
2024-10-21 22:21:50 +03:00
repo_service "code.gitea.io/gitea/services/repository"
2024-10-23 22:31:52 +03:00
shared_automerge "code.gitea.io/gitea/services/shared/automerge"
2022-05-07 20:05:52 +03:00
)
// Init runs the task queue to that handles auto merges
func Init ( ) error {
2024-05-21 18:23:22 +03:00
notify_service . RegisterNotifier ( NewNotifier ( ) )
2024-10-23 22:31:52 +03:00
shared_automerge . PRAutoMergeQueue = queue . CreateUniqueQueue ( graceful . GetManager ( ) . ShutdownContext ( ) , "pr_auto_merge" , handler )
if shared_automerge . PRAutoMergeQueue == nil {
Improve queue and logger context (#24924)
Before there was a "graceful function": RunWithShutdownFns, it's mainly
for some modules which doesn't support context.
The old queue system doesn't work well with context, so the old queues
need it.
After the queue refactoring, the new queue works with context well, so,
use Golang context as much as possible, the `RunWithShutdownFns` could
be removed (replaced by RunWithCancel for context cancel mechanism), the
related code could be simplified.
This PR also fixes some legacy queue-init problems, eg:
* typo : archiver: "unable to create codes indexer queue" => "unable to
create repo-archive queue"
* no nil check for failed queues, which causes unfriendly panic
After this PR, many goroutines could have better display name:
![image](https://github.com/go-gitea/gitea/assets/2114189/701b2a9b-8065-4137-aeaa-0bda2b34604a)
![image](https://github.com/go-gitea/gitea/assets/2114189/f1d5f50f-0534-40f0-b0be-f2c9daa5fe92)
2023-05-26 10:31:55 +03:00
return fmt . Errorf ( "unable to create pr_auto_merge queue" )
2022-05-07 20:05:52 +03:00
}
2024-10-23 22:31:52 +03:00
go graceful . GetManager ( ) . RunWithCancel ( shared_automerge . PRAutoMergeQueue )
2022-05-07 20:05:52 +03:00
return nil
}
// handle passed PR IDs and test the PRs
Rewrite queue (#24505)
# ⚠️ Breaking
Many deprecated queue config options are removed (actually, they should
have been removed in 1.18/1.19).
If you see the fatal message when starting Gitea: "Please update your
app.ini to remove deprecated config options", please follow the error
messages to remove these options from your app.ini.
Example:
```
2023/05/06 19:39:22 [E] Removed queue option: `[indexer].ISSUE_INDEXER_QUEUE_TYPE`. Use new options in `[queue.issue_indexer]`
2023/05/06 19:39:22 [E] Removed queue option: `[indexer].UPDATE_BUFFER_LEN`. Use new options in `[queue.issue_indexer]`
2023/05/06 19:39:22 [F] Please update your app.ini to remove deprecated config options
```
Many options in `[queue]` are are dropped, including:
`WRAP_IF_NECESSARY`, `MAX_ATTEMPTS`, `TIMEOUT`, `WORKERS`,
`BLOCK_TIMEOUT`, `BOOST_TIMEOUT`, `BOOST_WORKERS`, they can be removed
from app.ini.
# The problem
The old queue package has some legacy problems:
* complexity: I doubt few people could tell how it works.
* maintainability: Too many channels and mutex/cond are mixed together,
too many different structs/interfaces depends each other.
* stability: due to the complexity & maintainability, sometimes there
are strange bugs and difficult to debug, and some code doesn't have test
(indeed some code is difficult to test because a lot of things are mixed
together).
* general applicability: although it is called "queue", its behavior is
not a well-known queue.
* scalability: it doesn't seem easy to make it work with a cluster
without breaking its behaviors.
It came from some very old code to "avoid breaking", however, its
technical debt is too heavy now. It's a good time to introduce a better
"queue" package.
# The new queue package
It keeps using old config and concept as much as possible.
* It only contains two major kinds of concepts:
* The "base queue": channel, levelqueue, redis
* They have the same abstraction, the same interface, and they are
tested by the same testing code.
* The "WokerPoolQueue", it uses the "base queue" to provide "worker
pool" function, calls the "handler" to process the data in the base
queue.
* The new code doesn't do "PushBack"
* Think about a queue with many workers, the "PushBack" can't guarantee
the order for re-queued unhandled items, so in new code it just does
"normal push"
* The new code doesn't do "pause/resume"
* The "pause/resume" was designed to handle some handler's failure: eg:
document indexer (elasticsearch) is down
* If a queue is paused for long time, either the producers blocks or the
new items are dropped.
* The new code doesn't do such "pause/resume" trick, it's not a common
queue's behavior and it doesn't help much.
* If there are unhandled items, the "push" function just blocks for a
few seconds and then re-queue them and retry.
* The new code doesn't do "worker booster"
* Gitea's queue's handlers are light functions, the cost is only the
go-routine, so it doesn't make sense to "boost" them.
* The new code only use "max worker number" to limit the concurrent
workers.
* The new "Push" never blocks forever
* Instead of creating more and more blocking goroutines, return an error
is more friendly to the server and to the end user.
There are more details in code comments: eg: the "Flush" problem, the
strange "code.index" hanging problem, the "immediate" queue problem.
Almost ready for review.
TODO:
* [x] add some necessary comments during review
* [x] add some more tests if necessary
* [x] update documents and config options
* [x] test max worker / active worker
* [x] re-run the CI tasks to see whether any test is flaky
* [x] improve the `handleOldLengthConfiguration` to provide more
friendly messages
* [x] fine tune default config values (eg: length?)
## Code coverage:
![image](https://user-images.githubusercontent.com/2114189/236620635-55576955-f95d-4810-b12f-879026a3afdf.png)
2023-05-08 14:49:59 +03:00
func handler ( items ... string ) [ ] string {
for _ , s := range items {
2022-05-07 20:05:52 +03:00
var id int64
var sha string
Rewrite queue (#24505)
# ⚠️ Breaking
Many deprecated queue config options are removed (actually, they should
have been removed in 1.18/1.19).
If you see the fatal message when starting Gitea: "Please update your
app.ini to remove deprecated config options", please follow the error
messages to remove these options from your app.ini.
Example:
```
2023/05/06 19:39:22 [E] Removed queue option: `[indexer].ISSUE_INDEXER_QUEUE_TYPE`. Use new options in `[queue.issue_indexer]`
2023/05/06 19:39:22 [E] Removed queue option: `[indexer].UPDATE_BUFFER_LEN`. Use new options in `[queue.issue_indexer]`
2023/05/06 19:39:22 [F] Please update your app.ini to remove deprecated config options
```
Many options in `[queue]` are are dropped, including:
`WRAP_IF_NECESSARY`, `MAX_ATTEMPTS`, `TIMEOUT`, `WORKERS`,
`BLOCK_TIMEOUT`, `BOOST_TIMEOUT`, `BOOST_WORKERS`, they can be removed
from app.ini.
# The problem
The old queue package has some legacy problems:
* complexity: I doubt few people could tell how it works.
* maintainability: Too many channels and mutex/cond are mixed together,
too many different structs/interfaces depends each other.
* stability: due to the complexity & maintainability, sometimes there
are strange bugs and difficult to debug, and some code doesn't have test
(indeed some code is difficult to test because a lot of things are mixed
together).
* general applicability: although it is called "queue", its behavior is
not a well-known queue.
* scalability: it doesn't seem easy to make it work with a cluster
without breaking its behaviors.
It came from some very old code to "avoid breaking", however, its
technical debt is too heavy now. It's a good time to introduce a better
"queue" package.
# The new queue package
It keeps using old config and concept as much as possible.
* It only contains two major kinds of concepts:
* The "base queue": channel, levelqueue, redis
* They have the same abstraction, the same interface, and they are
tested by the same testing code.
* The "WokerPoolQueue", it uses the "base queue" to provide "worker
pool" function, calls the "handler" to process the data in the base
queue.
* The new code doesn't do "PushBack"
* Think about a queue with many workers, the "PushBack" can't guarantee
the order for re-queued unhandled items, so in new code it just does
"normal push"
* The new code doesn't do "pause/resume"
* The "pause/resume" was designed to handle some handler's failure: eg:
document indexer (elasticsearch) is down
* If a queue is paused for long time, either the producers blocks or the
new items are dropped.
* The new code doesn't do such "pause/resume" trick, it's not a common
queue's behavior and it doesn't help much.
* If there are unhandled items, the "push" function just blocks for a
few seconds and then re-queue them and retry.
* The new code doesn't do "worker booster"
* Gitea's queue's handlers are light functions, the cost is only the
go-routine, so it doesn't make sense to "boost" them.
* The new code only use "max worker number" to limit the concurrent
workers.
* The new "Push" never blocks forever
* Instead of creating more and more blocking goroutines, return an error
is more friendly to the server and to the end user.
There are more details in code comments: eg: the "Flush" problem, the
strange "code.index" hanging problem, the "immediate" queue problem.
Almost ready for review.
TODO:
* [x] add some necessary comments during review
* [x] add some more tests if necessary
* [x] update documents and config options
* [x] test max worker / active worker
* [x] re-run the CI tasks to see whether any test is flaky
* [x] improve the `handleOldLengthConfiguration` to provide more
friendly messages
* [x] fine tune default config values (eg: length?)
## Code coverage:
![image](https://user-images.githubusercontent.com/2114189/236620635-55576955-f95d-4810-b12f-879026a3afdf.png)
2023-05-08 14:49:59 +03:00
if _ , err := fmt . Sscanf ( s , "%d_%s" , & id , & sha ) ; err != nil {
log . Error ( "could not parse data from pr_auto_merge queue (%v): %v" , s , err )
2022-05-07 20:05:52 +03:00
continue
}
2024-05-21 18:23:22 +03:00
handlePullRequestAutoMerge ( id , sha )
2022-05-07 20:05:52 +03:00
}
return nil
}
// ScheduleAutoMerge if schedule is false and no error, pull can be merged directly
2024-10-21 22:21:50 +03:00
func ScheduleAutoMerge ( ctx context . Context , doer * user_model . User , pull * issues_model . PullRequest , style repo_model . MergeStyle , message string , deleteBranch bool ) ( scheduled bool , err error ) {
2022-11-12 23:18:50 +03:00
err = db . WithTx ( ctx , func ( ctx context . Context ) error {
2024-10-21 22:21:50 +03:00
if err := pull_model . ScheduleAutoMerge ( ctx , doer , pull . ID , style , message , deleteBranch ) ; err != nil {
2022-05-08 16:46:34 +03:00
return err
}
scheduled = true
2022-05-07 20:05:52 +03:00
2022-06-13 12:37:59 +03:00
_ , err = issues_model . CreateAutoMergeComment ( ctx , issues_model . CommentTypePRScheduledToAutoMerge , pull , doer )
2022-05-08 16:46:34 +03:00
return err
2022-11-12 23:18:50 +03:00
} )
2022-06-20 13:02:49 +03:00
return scheduled , err
2022-05-08 16:46:34 +03:00
}
// RemoveScheduledAutoMerge cancels a previously scheduled pull request
2022-06-13 12:37:59 +03:00
func RemoveScheduledAutoMerge ( ctx context . Context , doer * user_model . User , pull * issues_model . PullRequest ) error {
2022-11-12 23:18:50 +03:00
return db . WithTx ( ctx , func ( ctx context . Context ) error {
2022-05-08 16:46:34 +03:00
if err := pull_model . DeleteScheduledAutoMerge ( ctx , pull . ID ) ; err != nil {
return err
}
2022-06-13 12:37:59 +03:00
_ , err := issues_model . CreateAutoMergeComment ( ctx , issues_model . CommentTypePRUnScheduledToAutoMerge , pull , doer )
2022-05-08 16:46:34 +03:00
return err
2022-11-12 23:18:50 +03:00
} )
2022-05-07 20:05:52 +03:00
}
2024-05-21 18:23:22 +03:00
// StartPRCheckAndAutoMergeBySHA start an automerge check and auto merge task for all pull requests of repository and SHA
func StartPRCheckAndAutoMergeBySHA ( ctx context . Context , sha string , repo * repo_model . Repository ) error {
2024-10-23 22:31:52 +03:00
return shared_automerge . StartPRCheckAndAutoMergeBySHA ( ctx , sha , repo )
2022-05-07 20:05:52 +03:00
}
2024-05-21 18:23:22 +03:00
// StartPRCheckAndAutoMerge start an automerge check and auto merge task for a pull request
func StartPRCheckAndAutoMerge ( ctx context . Context , pull * issues_model . PullRequest ) {
2024-10-23 22:31:52 +03:00
shared_automerge . StartPRCheckAndAutoMerge ( ctx , pull )
2022-05-07 20:05:52 +03:00
}
2024-05-21 18:23:22 +03:00
// handlePullRequestAutoMerge merge the pull request if all checks are successful
func handlePullRequestAutoMerge ( pullID int64 , sha string ) {
2022-05-07 20:05:52 +03:00
ctx , _ , finished := process . GetManager ( ) . AddContext ( graceful . GetManager ( ) . HammerContext ( ) ,
2023-02-04 02:11:48 +03:00
fmt . Sprintf ( "Handle AutoMerge of PR[%d] with sha[%s]" , pullID , sha ) )
2022-05-07 20:05:52 +03:00
defer finished ( )
2022-06-13 12:37:59 +03:00
pr , err := issues_model . GetPullRequestByID ( ctx , pullID )
2022-05-07 20:05:52 +03:00
if err != nil {
log . Error ( "GetPullRequestByID[%d]: %v" , pullID , err )
return
}
// Check if there is a scheduled pr in the db
exists , scheduledPRM , err := pull_model . GetScheduledMergeByPullID ( ctx , pr . ID )
if err != nil {
2023-02-04 02:11:48 +03:00
log . Error ( "%-v GetScheduledMergeByPullID: %v" , pr , err )
2022-05-07 20:05:52 +03:00
return
}
if ! exists {
return
}
2024-05-21 18:23:22 +03:00
if err = pr . LoadBaseRepo ( ctx ) ; err != nil {
log . Error ( "%-v LoadBaseRepo: %v" , pr , err )
return
}
// check the sha is the same as pull request head commit id
baseGitRepo , err := gitrepo . OpenRepository ( ctx , pr . BaseRepo )
if err != nil {
log . Error ( "OpenRepository: %v" , err )
return
}
defer baseGitRepo . Close ( )
headCommitID , err := baseGitRepo . GetRefCommitID ( pr . GetGitRefName ( ) )
if err != nil {
log . Error ( "GetRefCommitID: %v" , err )
return
}
if headCommitID != sha {
log . Warn ( "Head commit id of auto merge %-v does not match sha [%s], it may means the head branch has been updated. Just ignore this request because a new request expected in the queue" , pr , sha )
return
}
2022-05-07 20:05:52 +03:00
// Get all checks for this pr
// We get the latest sha commit hash again to handle the case where the check of a previous push
// did not succeed or was not finished yet.
2022-11-19 11:12:33 +03:00
if err = pr . LoadHeadRepo ( ctx ) ; err != nil {
2023-02-04 02:11:48 +03:00
log . Error ( "%-v LoadHeadRepo: %v" , pr , err )
2022-05-07 20:05:52 +03:00
return
}
2024-05-21 18:23:22 +03:00
var headGitRepo * git . Repository
if pr . BaseRepoID == pr . HeadRepoID {
headGitRepo = baseGitRepo
} else {
headGitRepo , err = gitrepo . OpenRepository ( ctx , pr . HeadRepo )
if err != nil {
log . Error ( "OpenRepository %-v: %v" , pr . HeadRepo , err )
return
}
defer headGitRepo . Close ( )
2022-05-07 20:05:52 +03:00
}
2024-08-20 09:17:21 +03:00
switch pr . Flow {
case issues_model . PullRequestFlowGithub :
headBranchExist := headGitRepo . IsBranchExist ( pr . HeadBranch )
if pr . HeadRepo == nil || ! headBranchExist {
log . Warn ( "Head branch of auto merge %-v does not exist [HeadRepoID: %d, Branch: %s]" , pr , pr . HeadRepoID , pr . HeadBranch )
return
}
case issues_model . PullRequestFlowAGit :
headBranchExist := git . IsReferenceExist ( ctx , baseGitRepo . Path , pr . GetGitRefName ( ) )
if ! headBranchExist {
log . Warn ( "Head branch of auto merge %-v does not exist [HeadRepoID: %d, Branch(Agit): %s]" , pr , pr . HeadRepoID , pr . HeadBranch )
return
}
default :
log . Error ( "wrong flow type %d" , pr . Flow )
2022-05-07 20:05:52 +03:00
return
}
// Check if all checks succeeded
pass , err := pull_service . IsPullCommitStatusPass ( ctx , pr )
if err != nil {
2023-02-04 02:11:48 +03:00
log . Error ( "%-v IsPullCommitStatusPass: %v" , pr , err )
2022-05-07 20:05:52 +03:00
return
}
if ! pass {
2023-02-04 02:11:48 +03:00
log . Info ( "Scheduled auto merge %-v has unsuccessful status checks" , pr )
2022-05-07 20:05:52 +03:00
return
}
// Merge if all checks succeeded
2022-12-03 05:48:26 +03:00
doer , err := user_model . GetUserByID ( ctx , scheduledPRM . DoerID )
2022-05-07 20:05:52 +03:00
if err != nil {
2023-02-04 02:11:48 +03:00
log . Error ( "Unable to get scheduled User[%d]: %v" , scheduledPRM . DoerID , err )
2022-05-07 20:05:52 +03:00
return
}
2022-05-11 13:09:36 +03:00
perm , err := access_model . GetUserRepoPermission ( ctx , pr . HeadRepo , doer )
2022-05-07 20:05:52 +03:00
if err != nil {
2023-02-04 02:11:48 +03:00
log . Error ( "GetUserRepoPermission %-v: %v" , pr . HeadRepo , err )
2022-05-07 20:05:52 +03:00
return
}
2024-05-08 19:11:43 +03:00
if err := pull_service . CheckPullMergeable ( ctx , doer , & perm , pr , pull_service . MergeCheckTypeGeneral , false ) ; err != nil {
2024-08-14 12:43:42 +03:00
if errors . Is ( err , pull_service . ErrUserNotAllowedToMerge ) {
2023-02-04 02:11:48 +03:00
log . Info ( "%-v was scheduled to automerge by an unauthorized user" , pr )
2022-05-07 20:05:52 +03:00
return
}
2024-05-08 19:11:43 +03:00
log . Error ( "%-v CheckPullMergeable: %v" , pr , err )
2022-05-07 20:05:52 +03:00
return
}
2022-11-03 18:49:00 +03:00
if err := pull_service . Merge ( ctx , pr , doer , baseGitRepo , scheduledPRM . MergeStyle , "" , scheduledPRM . Message , true ) ; err != nil {
2022-05-07 20:05:52 +03:00
log . Error ( "pull_service.Merge: %v" , err )
2024-05-21 18:23:22 +03:00
// FIXME: if merge failed, we should display some error message to the pull request page.
// The resolution is add a new column on automerge table named `error_message` to store the error message and displayed
// on the pull request page. But this should not be finished in a bug fix PR which will be backport to release branch.
2022-05-07 20:05:52 +03:00
return
}
2024-10-21 22:21:50 +03:00
if scheduledPRM . DeleteBranchAfterMerge {
err := repo_service . DeleteBranchAfterMerge ( ctx , doer , pr , headGitRepo )
if err != nil {
log . Error ( "%d repo_service.DeleteBranchIfUnused: %v" , pr . ID , err )
}
}
2022-05-07 20:05:52 +03:00
}