2021-06-14 20:20:43 +03:00
// Copyright 2021 The Gitea Authors. All rights reserved.
2022-11-27 21:20:29 +03:00
// SPDX-License-Identifier: MIT
2021-06-14 20:20:43 +03:00
package mirror
import (
"context"
"fmt"
"strings"
"time"
2022-05-20 17:08:52 +03:00
"code.gitea.io/gitea/models/db"
2021-12-10 04:27:50 +03:00
repo_model "code.gitea.io/gitea/models/repo"
2022-10-17 02:29:26 +03:00
system_model "code.gitea.io/gitea/models/system"
2021-06-14 20:20:43 +03:00
"code.gitea.io/gitea/modules/cache"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/lfs"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/notification"
2021-11-30 23:06:32 +03:00
"code.gitea.io/gitea/modules/process"
2023-02-11 03:39:50 +03:00
"code.gitea.io/gitea/modules/proxy"
2021-06-14 20:20:43 +03:00
repo_module "code.gitea.io/gitea/modules/repository"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/modules/util"
)
// gitShortEmptySha Git short empty SHA
const gitShortEmptySha = "0000000"
// UpdateAddress writes new address to Git repository and database
2022-01-20 02:26:57 +03:00
func UpdateAddress ( ctx context . Context , m * repo_model . Mirror , addr string ) error {
2021-06-14 20:20:43 +03:00
remoteName := m . GetRemoteName ( )
2022-06-02 19:56:32 +03:00
repoPath := m . GetRepository ( ) . RepoPath ( )
2021-06-14 20:20:43 +03:00
// Remove old remote
2022-10-23 17:44:45 +03:00
_ , _ , err := git . NewCommand ( ctx , "remote" , "rm" ) . AddDynamicArguments ( remoteName ) . RunStdString ( & git . RunOpts { Dir : repoPath } )
2021-06-14 20:20:43 +03:00
if err != nil && ! strings . HasPrefix ( err . Error ( ) , "exit status 128 - fatal: No such remote " ) {
return err
}
2022-10-23 17:44:45 +03:00
cmd := git . NewCommand ( ctx , "remote" , "add" ) . AddDynamicArguments ( remoteName ) . AddArguments ( "--mirror=fetch" ) . AddDynamicArguments ( addr )
2022-03-27 14:54:09 +03:00
if strings . Contains ( addr , "://" ) && strings . Contains ( addr , "@" ) {
2022-03-31 05:25:40 +03:00
cmd . SetDescription ( fmt . Sprintf ( "remote add %s --mirror=fetch %s [repo_path: %s]" , remoteName , util . SanitizeCredentialURLs ( addr ) , repoPath ) )
2022-03-27 14:54:09 +03:00
} else {
cmd . SetDescription ( fmt . Sprintf ( "remote add %s --mirror=fetch %s [repo_path: %s]" , remoteName , addr , repoPath ) )
}
2022-04-01 05:55:30 +03:00
_ , _ , err = cmd . RunStdString ( & git . RunOpts { Dir : repoPath } )
2021-06-14 20:20:43 +03:00
if err != nil && ! strings . HasPrefix ( err . Error ( ) , "exit status 128 - fatal: No such remote " ) {
return err
}
if m . Repo . HasWiki ( ) {
wikiPath := m . Repo . WikiPath ( )
2022-01-20 02:26:57 +03:00
wikiRemotePath := repo_module . WikiRemoteURL ( ctx , addr )
2021-06-14 20:20:43 +03:00
// Remove old remote of wiki
2022-10-23 17:44:45 +03:00
_ , _ , err = git . NewCommand ( ctx , "remote" , "rm" ) . AddDynamicArguments ( remoteName ) . RunStdString ( & git . RunOpts { Dir : wikiPath } )
2021-06-14 20:20:43 +03:00
if err != nil && ! strings . HasPrefix ( err . Error ( ) , "exit status 128 - fatal: No such remote " ) {
return err
}
2022-10-23 17:44:45 +03:00
cmd = git . NewCommand ( ctx , "remote" , "add" ) . AddDynamicArguments ( remoteName ) . AddArguments ( "--mirror=fetch" ) . AddDynamicArguments ( wikiRemotePath )
2022-03-27 14:54:09 +03:00
if strings . Contains ( wikiRemotePath , "://" ) && strings . Contains ( wikiRemotePath , "@" ) {
2022-03-31 05:25:40 +03:00
cmd . SetDescription ( fmt . Sprintf ( "remote add %s --mirror=fetch %s [repo_path: %s]" , remoteName , util . SanitizeCredentialURLs ( wikiRemotePath ) , wikiPath ) )
2022-03-27 14:54:09 +03:00
} else {
cmd . SetDescription ( fmt . Sprintf ( "remote add %s --mirror=fetch %s [repo_path: %s]" , remoteName , wikiRemotePath , wikiPath ) )
}
2022-04-01 05:55:30 +03:00
_ , _ , err = cmd . RunStdString ( & git . RunOpts { Dir : wikiPath } )
2021-06-14 20:20:43 +03:00
if err != nil && ! strings . HasPrefix ( err . Error ( ) , "exit status 128 - fatal: No such remote " ) {
return err
}
}
m . Repo . OriginalURL = addr
2022-05-20 17:08:52 +03:00
return repo_model . UpdateRepositoryCols ( ctx , m . Repo , "original_url" )
2021-06-14 20:20:43 +03:00
}
// mirrorSyncResult contains information of a updated reference.
// If the oldCommitID is "0000000", it means a new reference, the value of newCommitID is empty.
// If the newCommitID is "0000000", it means the reference is deleted, the value of oldCommitID is empty.
type mirrorSyncResult struct {
refName string
oldCommitID string
newCommitID string
}
// parseRemoteUpdateOutput detects create, update and delete operations of references from upstream.
func parseRemoteUpdateOutput ( output string ) [ ] * mirrorSyncResult {
results := make ( [ ] * mirrorSyncResult , 0 , 3 )
lines := strings . Split ( output , "\n" )
for i := range lines {
// Make sure reference name is presented before continue
idx := strings . Index ( lines [ i ] , "-> " )
if idx == - 1 {
continue
}
refName := lines [ i ] [ idx + 3 : ]
switch {
case strings . HasPrefix ( lines [ i ] , " * " ) : // New reference
if strings . HasPrefix ( lines [ i ] , " * [new tag]" ) {
refName = git . TagPrefix + refName
} else if strings . HasPrefix ( lines [ i ] , " * [new branch]" ) {
refName = git . BranchPrefix + refName
}
results = append ( results , & mirrorSyncResult {
refName : refName ,
oldCommitID : gitShortEmptySha ,
} )
case strings . HasPrefix ( lines [ i ] , " - " ) : // Delete reference
results = append ( results , & mirrorSyncResult {
refName : refName ,
newCommitID : gitShortEmptySha ,
} )
case strings . HasPrefix ( lines [ i ] , " + " ) : // Force update
if idx := strings . Index ( refName , " " ) ; idx > - 1 {
refName = refName [ : idx ]
}
delimIdx := strings . Index ( lines [ i ] [ 3 : ] , " " )
if delimIdx == - 1 {
log . Error ( "SHA delimiter not found: %q" , lines [ i ] )
continue
}
shas := strings . Split ( lines [ i ] [ 3 : delimIdx + 3 ] , "..." )
if len ( shas ) != 2 {
log . Error ( "Expect two SHAs but not what found: %q" , lines [ i ] )
continue
}
results = append ( results , & mirrorSyncResult {
refName : refName ,
oldCommitID : shas [ 0 ] ,
newCommitID : shas [ 1 ] ,
} )
case strings . HasPrefix ( lines [ i ] , " " ) : // New commits of a reference
delimIdx := strings . Index ( lines [ i ] [ 3 : ] , " " )
if delimIdx == - 1 {
log . Error ( "SHA delimiter not found: %q" , lines [ i ] )
continue
}
shas := strings . Split ( lines [ i ] [ 3 : delimIdx + 3 ] , ".." )
if len ( shas ) != 2 {
log . Error ( "Expect two SHAs but not what found: %q" , lines [ i ] )
continue
}
results = append ( results , & mirrorSyncResult {
refName : refName ,
oldCommitID : shas [ 0 ] ,
newCommitID : shas [ 1 ] ,
} )
default :
log . Warn ( "parseRemoteUpdateOutput: unexpected update line %q" , lines [ i ] )
}
}
return results
}
2021-10-21 11:45:25 +03:00
func pruneBrokenReferences ( ctx context . Context ,
2021-12-10 04:27:50 +03:00
m * repo_model . Mirror ,
2021-10-21 11:45:25 +03:00
repoPath string ,
timeout time . Duration ,
stdoutBuilder , stderrBuilder * strings . Builder ,
2022-01-20 20:46:10 +03:00
isWiki bool ,
) error {
2021-10-21 11:45:25 +03:00
wiki := ""
if isWiki {
wiki = "Wiki "
}
stderrBuilder . Reset ( )
stdoutBuilder . Reset ( )
2022-10-23 17:44:45 +03:00
pruneErr := git . NewCommand ( ctx , "remote" , "prune" ) . AddDynamicArguments ( m . GetRemoteName ( ) ) .
2021-10-21 11:45:25 +03:00
SetDescription ( fmt . Sprintf ( "Mirror.runSync %ssPrune references: %s " , wiki , m . Repo . FullName ( ) ) ) .
2022-04-01 05:55:30 +03:00
Run ( & git . RunOpts {
2022-02-11 15:47:22 +03:00
Timeout : timeout ,
Dir : repoPath ,
Stdout : stdoutBuilder ,
Stderr : stderrBuilder ,
} )
2021-10-21 11:45:25 +03:00
if pruneErr != nil {
stdout := stdoutBuilder . String ( )
stderr := stderrBuilder . String ( )
// sanitize the output, since it may contain the remote address, which may
// contain a password
2022-03-31 05:25:40 +03:00
stderrMessage := util . SanitizeCredentialURLs ( stderr )
stdoutMessage := util . SanitizeCredentialURLs ( stdout )
2021-10-21 11:45:25 +03:00
log . Error ( "Failed to prune mirror repository %s%-v references:\nStdout: %s\nStderr: %s\nErr: %v" , wiki , m . Repo , stdoutMessage , stderrMessage , pruneErr )
desc := fmt . Sprintf ( "Failed to prune mirror repository %s'%s' references: %s" , wiki , repoPath , stderrMessage )
2022-10-17 02:29:26 +03:00
if err := system_model . CreateRepositoryNotice ( desc ) ; err != nil {
2021-10-21 11:45:25 +03:00
log . Error ( "CreateRepositoryNotice: %v" , err )
}
// this if will only be reached on a successful prune so try to get the mirror again
}
return pruneErr
}
2021-06-14 20:20:43 +03:00
// runSync returns true if sync finished without error.
2021-12-10 04:27:50 +03:00
func runSync ( ctx context . Context , m * repo_model . Mirror ) ( [ ] * mirrorSyncResult , bool ) {
2021-06-14 20:20:43 +03:00
repoPath := m . Repo . RepoPath ( )
wikiPath := m . Repo . WikiPath ( )
timeout := time . Duration ( setting . Git . Timeout . Mirror ) * time . Second
log . Trace ( "SyncMirrors [repo: %-v]: running git remote update..." , m . Repo )
2022-03-29 20:12:33 +03:00
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 05:30:43 +03:00
cmd := git . NewCommand ( ctx , "remote" , "update" )
2021-06-14 20:20:43 +03:00
if m . EnablePrune {
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 05:30:43 +03:00
cmd . AddArguments ( "--prune" )
2021-06-14 20:20:43 +03:00
}
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 05:30:43 +03:00
cmd . AddDynamicArguments ( m . GetRemoteName ( ) )
2021-06-14 20:20:43 +03:00
2022-06-11 16:50:14 +03:00
remoteURL , remoteErr := git . GetRemoteURL ( ctx , repoPath , m . GetRemoteName ( ) )
2021-06-14 20:20:43 +03:00
if remoteErr != nil {
2022-03-10 13:09:48 +03:00
log . Error ( "SyncMirrors [repo: %-v]: GetRemoteAddress Error %v" , m . Repo , remoteErr )
2022-06-11 16:50:14 +03:00
return nil , false
2021-06-14 20:20:43 +03:00
}
2023-02-11 03:39:50 +03:00
envs := proxy . EnvWithProxy ( remoteURL . URL )
2021-06-14 20:20:43 +03:00
stdoutBuilder := strings . Builder { }
stderrBuilder := strings . Builder { }
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 05:30:43 +03:00
if err := cmd .
2021-06-14 20:20:43 +03:00
SetDescription ( fmt . Sprintf ( "Mirror.runSync: %s" , m . Repo . FullName ( ) ) ) .
2022-04-01 05:55:30 +03:00
Run ( & git . RunOpts {
2022-02-11 15:47:22 +03:00
Timeout : timeout ,
Dir : repoPath ,
2023-02-11 03:39:50 +03:00
Env : envs ,
2022-02-11 15:47:22 +03:00
Stdout : & stdoutBuilder ,
Stderr : & stderrBuilder ,
} ) ; err != nil {
2021-06-14 20:20:43 +03:00
stdout := stdoutBuilder . String ( )
stderr := stderrBuilder . String ( )
2022-03-31 05:25:40 +03:00
// sanitize the output, since it may contain the remote address, which may contain a password
stderrMessage := util . SanitizeCredentialURLs ( stderr )
stdoutMessage := util . SanitizeCredentialURLs ( stdout )
2021-06-14 20:20:43 +03:00
2021-10-21 11:45:25 +03:00
// Now check if the error is a resolve reference due to broken reference
if strings . Contains ( stderr , "unable to resolve reference" ) && strings . Contains ( stderr , "reference broken" ) {
2022-03-10 13:09:48 +03:00
log . Warn ( "SyncMirrors [repo: %-v]: failed to update mirror repository due to broken references:\nStdout: %s\nStderr: %s\nErr: %v\nAttempting Prune" , m . Repo , stdoutMessage , stderrMessage , err )
2021-10-21 11:45:25 +03:00
err = nil
// Attempt prune
2022-03-31 05:25:40 +03:00
pruneErr := pruneBrokenReferences ( ctx , m , repoPath , timeout , & stdoutBuilder , & stderrBuilder , false )
2021-10-21 11:45:25 +03:00
if pruneErr == nil {
// Successful prune - reattempt mirror
stderrBuilder . Reset ( )
stdoutBuilder . Reset ( )
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 05:30:43 +03:00
if err = cmd .
2021-10-21 11:45:25 +03:00
SetDescription ( fmt . Sprintf ( "Mirror.runSync: %s" , m . Repo . FullName ( ) ) ) .
2022-04-01 05:55:30 +03:00
Run ( & git . RunOpts {
2022-02-11 15:47:22 +03:00
Timeout : timeout ,
Dir : repoPath ,
Stdout : & stdoutBuilder ,
Stderr : & stderrBuilder ,
} ) ; err != nil {
2021-10-21 11:45:25 +03:00
stdout := stdoutBuilder . String ( )
stderr := stderrBuilder . String ( )
// sanitize the output, since it may contain the remote address, which may
// contain a password
2022-03-31 05:25:40 +03:00
stderrMessage = util . SanitizeCredentialURLs ( stderr )
stdoutMessage = util . SanitizeCredentialURLs ( stdout )
2021-10-21 11:45:25 +03:00
}
}
}
// If there is still an error (or there always was an error)
if err != nil {
2022-03-10 13:09:48 +03:00
log . Error ( "SyncMirrors [repo: %-v]: failed to update mirror repository:\nStdout: %s\nStderr: %s\nErr: %v" , m . Repo , stdoutMessage , stderrMessage , err )
2021-10-21 11:45:25 +03:00
desc := fmt . Sprintf ( "Failed to update mirror repository '%s': %s" , repoPath , stderrMessage )
2022-10-17 02:29:26 +03:00
if err = system_model . CreateRepositoryNotice ( desc ) ; err != nil {
2021-10-21 11:45:25 +03:00
log . Error ( "CreateRepositoryNotice: %v" , err )
}
return nil , false
2021-06-14 20:20:43 +03:00
}
}
output := stderrBuilder . String ( )
2022-03-29 20:12:33 +03:00
if err := git . WriteCommitGraph ( ctx , repoPath ) ; err != nil {
log . Error ( "SyncMirrors [repo: %-v]: %v" , m . Repo , err )
}
2022-03-29 22:13:41 +03:00
gitRepo , err := git . OpenRepository ( ctx , repoPath )
2021-06-14 20:20:43 +03:00
if err != nil {
2022-03-10 13:09:48 +03:00
log . Error ( "SyncMirrors [repo: %-v]: failed to OpenRepository: %v" , m . Repo , err )
2021-06-14 20:20:43 +03:00
return nil , false
}
log . Trace ( "SyncMirrors [repo: %-v]: syncing releases with tags..." , m . Repo )
if err = repo_module . SyncReleasesWithTags ( m . Repo , gitRepo ) ; err != nil {
2022-03-10 13:09:48 +03:00
log . Error ( "SyncMirrors [repo: %-v]: failed to synchronize tags to releases: %v" , m . Repo , err )
2021-06-14 20:20:43 +03:00
}
if m . LFS && setting . LFS . StartServer {
log . Trace ( "SyncMirrors [repo: %-v]: syncing LFS objects..." , m . Repo )
2022-06-11 16:50:14 +03:00
endpoint := lfs . DetermineEndpoint ( remoteURL . String ( ) , m . LFSEndpoint )
2021-11-20 12:34:05 +03:00
lfsClient := lfs . NewClient ( endpoint , nil )
if err = repo_module . StoreMissingLfsObjectsInRepository ( ctx , m . Repo , gitRepo , lfsClient ) ; err != nil {
2022-03-10 13:09:48 +03:00
log . Error ( "SyncMirrors [repo: %-v]: failed to synchronize LFS objects for repository: %v" , m . Repo , err )
2021-06-14 20:20:43 +03:00
}
}
gitRepo . Close ( )
log . Trace ( "SyncMirrors [repo: %-v]: updating size of repository" , m . Repo )
2022-06-06 11:01:49 +03:00
if err := repo_module . UpdateRepoSize ( ctx , m . Repo ) ; err != nil {
2022-03-10 13:09:48 +03:00
log . Error ( "SyncMirrors [repo: %-v]: failed to update size for mirror repository: %v" , m . Repo , err )
2021-06-14 20:20:43 +03:00
}
if m . Repo . HasWiki ( ) {
log . Trace ( "SyncMirrors [repo: %-v Wiki]: running git remote update..." , m . Repo )
stderrBuilder . Reset ( )
stdoutBuilder . Reset ( )
2022-10-23 17:44:45 +03:00
if err := git . NewCommand ( ctx , "remote" , "update" , "--prune" ) . AddDynamicArguments ( m . GetRemoteName ( ) ) .
2021-06-14 20:20:43 +03:00
SetDescription ( fmt . Sprintf ( "Mirror.runSync Wiki: %s " , m . Repo . FullName ( ) ) ) .
2022-04-01 05:55:30 +03:00
Run ( & git . RunOpts {
2022-02-11 15:47:22 +03:00
Timeout : timeout ,
Dir : wikiPath ,
Stdout : & stdoutBuilder ,
Stderr : & stderrBuilder ,
} ) ; err != nil {
2021-06-14 20:20:43 +03:00
stdout := stdoutBuilder . String ( )
stderr := stderrBuilder . String ( )
2022-03-31 05:25:40 +03:00
// sanitize the output, since it may contain the remote address, which may contain a password
stderrMessage := util . SanitizeCredentialURLs ( stderr )
stdoutMessage := util . SanitizeCredentialURLs ( stdout )
2021-06-14 20:20:43 +03:00
2021-10-21 11:45:25 +03:00
// Now check if the error is a resolve reference due to broken reference
if strings . Contains ( stderrMessage , "unable to resolve reference" ) && strings . Contains ( stderrMessage , "reference broken" ) {
2022-03-10 13:09:48 +03:00
log . Warn ( "SyncMirrors [repo: %-v Wiki]: failed to update mirror wiki repository due to broken references:\nStdout: %s\nStderr: %s\nErr: %v\nAttempting Prune" , m . Repo , stdoutMessage , stderrMessage , err )
2021-10-21 11:45:25 +03:00
err = nil
// Attempt prune
2022-03-31 05:25:40 +03:00
pruneErr := pruneBrokenReferences ( ctx , m , repoPath , timeout , & stdoutBuilder , & stderrBuilder , true )
2021-10-21 11:45:25 +03:00
if pruneErr == nil {
// Successful prune - reattempt mirror
stderrBuilder . Reset ( )
stdoutBuilder . Reset ( )
2022-10-23 17:44:45 +03:00
if err = git . NewCommand ( ctx , "remote" , "update" , "--prune" ) . AddDynamicArguments ( m . GetRemoteName ( ) ) .
2021-10-21 11:45:25 +03:00
SetDescription ( fmt . Sprintf ( "Mirror.runSync Wiki: %s " , m . Repo . FullName ( ) ) ) .
2022-04-01 05:55:30 +03:00
Run ( & git . RunOpts {
2022-02-11 15:47:22 +03:00
Timeout : timeout ,
Dir : wikiPath ,
Stdout : & stdoutBuilder ,
Stderr : & stderrBuilder ,
} ) ; err != nil {
2021-10-21 11:45:25 +03:00
stdout := stdoutBuilder . String ( )
stderr := stderrBuilder . String ( )
2022-03-31 05:25:40 +03:00
stderrMessage = util . SanitizeCredentialURLs ( stderr )
stdoutMessage = util . SanitizeCredentialURLs ( stdout )
2021-10-21 11:45:25 +03:00
}
}
}
// If there is still an error (or there always was an error)
if err != nil {
2022-03-10 13:09:48 +03:00
log . Error ( "SyncMirrors [repo: %-v Wiki]: failed to update mirror repository wiki:\nStdout: %s\nStderr: %s\nErr: %v" , m . Repo , stdoutMessage , stderrMessage , err )
2021-10-21 11:45:25 +03:00
desc := fmt . Sprintf ( "Failed to update mirror repository wiki '%s': %s" , wikiPath , stderrMessage )
2022-10-17 02:29:26 +03:00
if err = system_model . CreateRepositoryNotice ( desc ) ; err != nil {
2021-10-21 11:45:25 +03:00
log . Error ( "CreateRepositoryNotice: %v" , err )
}
return nil , false
2021-06-14 20:20:43 +03:00
}
2022-03-29 20:12:33 +03:00
if err := git . WriteCommitGraph ( ctx , wikiPath ) ; err != nil {
log . Error ( "SyncMirrors [repo: %-v]: %v" , m . Repo , err )
}
2021-06-14 20:20:43 +03:00
}
log . Trace ( "SyncMirrors [repo: %-v Wiki]: git remote update complete" , m . Repo )
}
log . Trace ( "SyncMirrors [repo: %-v]: invalidating mirror branch caches..." , m . Repo )
2022-01-20 02:26:57 +03:00
branches , _ , err := git . GetBranchesByPath ( ctx , m . Repo . RepoPath ( ) , 0 , 0 )
2021-06-14 20:20:43 +03:00
if err != nil {
2022-03-10 13:09:48 +03:00
log . Error ( "SyncMirrors [repo: %-v]: failed to GetBranches: %v" , m . Repo , err )
2021-06-14 20:20:43 +03:00
return nil , false
}
for _ , branch := range branches {
cache . Remove ( m . Repo . GetCommitsCountCacheKey ( branch . Name , true ) )
}
m . UpdatedUnix = timeutil . TimeStampNow ( )
return parseRemoteUpdateOutput ( output ) , true
}
// SyncPullMirror starts the sync of the pull mirror and schedules the next run.
func SyncPullMirror ( ctx context . Context , repoID int64 ) bool {
log . Trace ( "SyncMirrors [repo_id: %v]" , repoID )
defer func ( ) {
err := recover ( )
if err == nil {
return
}
// There was a panic whilst syncMirrors...
2022-03-10 13:09:48 +03:00
log . Error ( "PANIC whilst SyncMirrors[repo_id: %d] Panic: %v\nStacktrace: %s" , repoID , err , log . Stack ( 2 ) )
2021-06-14 20:20:43 +03:00
} ( )
2022-05-20 17:08:52 +03:00
m , err := repo_model . GetMirrorByRepoID ( ctx , repoID )
2021-06-14 20:20:43 +03:00
if err != nil {
2022-03-10 13:09:48 +03:00
log . Error ( "SyncMirrors [repo_id: %v]: unable to GetMirrorByRepoID: %v" , repoID , err )
2021-06-14 20:20:43 +03:00
return false
}
2022-05-20 17:08:52 +03:00
_ = m . GetRepository ( ) // force load repository of mirror
2021-06-14 20:20:43 +03:00
2021-11-30 23:06:32 +03:00
ctx , _ , finished := process . GetManager ( ) . AddContext ( ctx , fmt . Sprintf ( "Syncing Mirror %s/%s" , m . Repo . OwnerName , m . Repo . Name ) )
defer finished ( )
2021-06-14 20:20:43 +03:00
log . Trace ( "SyncMirrors [repo: %-v]: Running Sync" , m . Repo )
results , ok := runSync ( ctx , m )
if ! ok {
2022-03-27 17:40:17 +03:00
if err = repo_model . TouchMirror ( ctx , m ) ; err != nil {
log . Error ( "SyncMirrors [repo: %-v]: failed to TouchMirror: %v" , m . Repo , err )
}
2021-06-14 20:20:43 +03:00
return false
}
log . Trace ( "SyncMirrors [repo: %-v]: Scheduling next update" , m . Repo )
m . ScheduleNextUpdate ( )
2022-05-20 17:08:52 +03:00
if err = repo_model . UpdateMirror ( ctx , m ) ; err != nil {
2022-03-10 13:09:48 +03:00
log . Error ( "SyncMirrors [repo: %-v]: failed to UpdateMirror with next update date: %v" , m . Repo , err )
2021-06-14 20:20:43 +03:00
return false
}
var gitRepo * git . Repository
if len ( results ) == 0 {
log . Trace ( "SyncMirrors [repo: %-v]: no branches updated" , m . Repo )
} else {
log . Trace ( "SyncMirrors [repo: %-v]: %d branches updated" , m . Repo , len ( results ) )
2022-03-29 22:13:41 +03:00
gitRepo , err = git . OpenRepository ( ctx , m . Repo . RepoPath ( ) )
2021-06-14 20:20:43 +03:00
if err != nil {
2022-03-10 13:09:48 +03:00
log . Error ( "SyncMirrors [repo: %-v]: unable to OpenRepository: %v" , m . Repo , err )
2021-06-14 20:20:43 +03:00
return false
}
defer gitRepo . Close ( )
if ok := checkAndUpdateEmptyRepository ( m , gitRepo , results ) ; ! ok {
return false
}
}
for _ , result := range results {
// Discard GitHub pull requests, i.e. refs/pull/*
2021-12-02 10:28:08 +03:00
if strings . HasPrefix ( result . refName , git . PullPrefix ) {
2021-06-14 20:20:43 +03:00
continue
}
tp , _ := git . SplitRefName ( result . refName )
// Create reference
if result . oldCommitID == gitShortEmptySha {
if tp == git . TagPrefix {
tp = "tag"
} else if tp == git . BranchPrefix {
tp = "branch"
}
commitID , err := gitRepo . GetRefCommitID ( result . refName )
if err != nil {
2022-03-10 13:09:48 +03:00
log . Error ( "SyncMirrors [repo: %-v]: unable to GetRefCommitID [ref_name: %s]: %v" , m . Repo , result . refName , err )
2021-06-14 20:20:43 +03:00
continue
}
2022-11-19 11:12:33 +03:00
notification . NotifySyncPushCommits ( ctx , m . Repo . MustOwner ( ctx ) , m . Repo , & repo_module . PushUpdateOptions {
2021-06-14 20:20:43 +03:00
RefFullName : result . refName ,
OldCommitID : git . EmptySHA ,
NewCommitID : commitID ,
} , repo_module . NewPushCommits ( ) )
2022-11-19 11:12:33 +03:00
notification . NotifySyncCreateRef ( ctx , m . Repo . MustOwner ( ctx ) , m . Repo , tp , result . refName , commitID )
2021-06-14 20:20:43 +03:00
continue
}
// Delete reference
if result . newCommitID == gitShortEmptySha {
2022-11-19 11:12:33 +03:00
notification . NotifySyncDeleteRef ( ctx , m . Repo . MustOwner ( ctx ) , m . Repo , tp , result . refName )
2021-06-14 20:20:43 +03:00
continue
}
// Push commits
2022-01-20 02:26:57 +03:00
oldCommitID , err := git . GetFullCommitID ( gitRepo . Ctx , gitRepo . Path , result . oldCommitID )
2021-06-14 20:20:43 +03:00
if err != nil {
2022-03-10 13:09:48 +03:00
log . Error ( "SyncMirrors [repo: %-v]: unable to get GetFullCommitID[%s]: %v" , m . Repo , result . oldCommitID , err )
2021-06-14 20:20:43 +03:00
continue
}
2022-01-20 02:26:57 +03:00
newCommitID , err := git . GetFullCommitID ( gitRepo . Ctx , gitRepo . Path , result . newCommitID )
2021-06-14 20:20:43 +03:00
if err != nil {
2022-03-10 13:09:48 +03:00
log . Error ( "SyncMirrors [repo: %-v]: unable to get GetFullCommitID [%s]: %v" , m . Repo , result . newCommitID , err )
2021-06-14 20:20:43 +03:00
continue
}
commits , err := gitRepo . CommitsBetweenIDs ( newCommitID , oldCommitID )
if err != nil {
2022-03-10 13:09:48 +03:00
log . Error ( "SyncMirrors [repo: %-v]: unable to get CommitsBetweenIDs [new_commit_id: %s, old_commit_id: %s]: %v" , m . Repo , newCommitID , oldCommitID , err )
2021-06-14 20:20:43 +03:00
continue
}
2021-08-09 21:08:51 +03:00
theCommits := repo_module . GitToPushCommits ( commits )
2021-06-14 20:20:43 +03:00
if len ( theCommits . Commits ) > setting . UI . FeedMaxCommitNum {
theCommits . Commits = theCommits . Commits [ : setting . UI . FeedMaxCommitNum ]
}
2023-03-04 07:45:53 +03:00
if newCommit , err := gitRepo . GetCommit ( newCommitID ) ; err != nil {
log . Error ( "SyncMirrors [repo: %-v]: unable to get commit %s: %v" , m . Repo , newCommitID , err )
continue
} else {
theCommits . HeadCommit = repo_module . CommitToPushCommit ( newCommit )
}
2021-06-14 20:20:43 +03:00
theCommits . CompareURL = m . Repo . ComposeCompareURL ( oldCommitID , newCommitID )
2022-11-19 11:12:33 +03:00
notification . NotifySyncPushCommits ( ctx , m . Repo . MustOwner ( ctx ) , m . Repo , & repo_module . PushUpdateOptions {
2021-06-14 20:20:43 +03:00
RefFullName : result . refName ,
OldCommitID : oldCommitID ,
NewCommitID : newCommitID ,
} , theCommits )
}
log . Trace ( "SyncMirrors [repo: %-v]: done notifying updated branches/tags - now updating last commit time" , m . Repo )
// Get latest commit date and update to current repository updated time
2022-01-20 02:26:57 +03:00
commitDate , err := git . GetLatestCommitTime ( ctx , m . Repo . RepoPath ( ) )
2021-06-14 20:20:43 +03:00
if err != nil {
2022-03-10 13:09:48 +03:00
log . Error ( "SyncMirrors [repo: %-v]: unable to GetLatestCommitDate: %v" , m . Repo , err )
2021-06-14 20:20:43 +03:00
return false
}
2021-12-12 18:48:20 +03:00
if err = repo_model . UpdateRepositoryUpdatedTime ( m . RepoID , commitDate ) ; err != nil {
2022-03-10 13:09:48 +03:00
log . Error ( "SyncMirrors [repo: %-v]: unable to update repository 'updated_unix': %v" , m . Repo , err )
2021-06-14 20:20:43 +03:00
return false
}
log . Trace ( "SyncMirrors [repo: %-v]: Successfully updated" , m . Repo )
return true
}
2021-12-10 04:27:50 +03:00
func checkAndUpdateEmptyRepository ( m * repo_model . Mirror , gitRepo * git . Repository , results [ ] * mirrorSyncResult ) bool {
2021-06-14 20:20:43 +03:00
if ! m . Repo . IsEmpty {
return true
}
hasDefault := false
hasMaster := false
hasMain := false
defaultBranchName := m . Repo . DefaultBranch
if len ( defaultBranchName ) == 0 {
defaultBranchName = setting . Repository . DefaultBranch
}
firstName := ""
for _ , result := range results {
2021-12-02 10:28:08 +03:00
if strings . HasPrefix ( result . refName , git . PullPrefix ) {
2021-06-14 20:20:43 +03:00
continue
}
tp , name := git . SplitRefName ( result . refName )
if len ( tp ) > 0 && tp != git . BranchPrefix {
continue
}
if len ( firstName ) == 0 {
firstName = name
}
hasDefault = hasDefault || name == defaultBranchName
hasMaster = hasMaster || name == "master"
hasMain = hasMain || name == "main"
}
if len ( firstName ) > 0 {
if hasDefault {
m . Repo . DefaultBranch = defaultBranchName
} else if hasMaster {
m . Repo . DefaultBranch = "master"
} else if hasMain {
m . Repo . DefaultBranch = "main"
} else {
m . Repo . DefaultBranch = firstName
}
// Update the git repository default branch
if err := gitRepo . SetDefaultBranch ( m . Repo . DefaultBranch ) ; err != nil {
if ! git . IsErrUnsupportedVersion ( err ) {
log . Error ( "Failed to update default branch of underlying git repository %-v. Error: %v" , m . Repo , err )
desc := fmt . Sprintf ( "Failed to uupdate default branch of underlying git repository '%s': %v" , m . Repo . RepoPath ( ) , err )
2022-10-17 02:29:26 +03:00
if err = system_model . CreateRepositoryNotice ( desc ) ; err != nil {
2021-06-14 20:20:43 +03:00
log . Error ( "CreateRepositoryNotice: %v" , err )
}
return false
}
}
m . Repo . IsEmpty = false
// Update the is empty and default_branch columns
2022-05-20 17:08:52 +03:00
if err := repo_model . UpdateRepositoryCols ( db . DefaultContext , m . Repo , "default_branch" , "is_empty" ) ; err != nil {
2021-06-14 20:20:43 +03:00
log . Error ( "Failed to update default branch of repository %-v. Error: %v" , m . Repo , err )
desc := fmt . Sprintf ( "Failed to uupdate default branch of repository '%s': %v" , m . Repo . RepoPath ( ) , err )
2022-10-17 02:29:26 +03:00
if err = system_model . CreateRepositoryNotice ( desc ) ; err != nil {
2021-06-14 20:20:43 +03:00
log . Error ( "CreateRepositoryNotice: %v" , err )
}
return false
}
}
return true
}