mirror of
https://codeberg.org/forgejo/forgejo.git
synced 2024-12-13 16:55:37 +03:00
684b7a999f
* Dump: Use mholt/archive/v3 to support tar including many compressions Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: Allow dump output to stdout Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: Fixed bug present since #6677 where SessionConfig.Provider is never "file" Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: never pack RepoRootPath, LFS.ContentPath and LogRootPath when they are below AppDataPath Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: also dump LFS (fixes #10058) Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: never dump CustomPath if CustomPath is a subdir of or equal to AppDataPath (fixes #10365) Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Use log.Info instead of fmt.Fprintf Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * import ordering * make fmt Co-authored-by: zeripath <art27@cantab.net> Co-authored-by: techknowlogick <techknowlogick@gitea.io> Co-authored-by: Matti R <matti@mdranta.net>
616 lines
15 KiB
Go
616 lines
15 KiB
Go
package archiver
|
|
|
|
import (
|
|
"archive/tar"
|
|
"bytes"
|
|
"fmt"
|
|
"io"
|
|
"log"
|
|
"os"
|
|
"path"
|
|
"path/filepath"
|
|
"strconv"
|
|
"strings"
|
|
)
|
|
|
|
// Tar provides facilities for operating TAR archives.
|
|
// See http://www.gnu.org/software/tar/manual/html_node/Standard.html.
|
|
type Tar struct {
|
|
// Whether to overwrite existing files; if false,
|
|
// an error is returned if the file exists.
|
|
OverwriteExisting bool
|
|
|
|
// Whether to make all the directories necessary
|
|
// to create a tar archive in the desired path.
|
|
MkdirAll bool
|
|
|
|
// A single top-level folder can be implicitly
|
|
// created by the Archive or Unarchive methods
|
|
// if the files to be added to the archive
|
|
// or the files to be extracted from the archive
|
|
// do not all have a common root. This roughly
|
|
// mimics the behavior of archival tools integrated
|
|
// into OS file browsers which create a subfolder
|
|
// to avoid unexpectedly littering the destination
|
|
// folder with potentially many files, causing a
|
|
// problematic cleanup/organization situation.
|
|
// This feature is available for both creation
|
|
// and extraction of archives, but may be slightly
|
|
// inefficient with lots and lots of files,
|
|
// especially on extraction.
|
|
ImplicitTopLevelFolder bool
|
|
|
|
// If true, errors encountered during reading
|
|
// or writing a single file will be logged and
|
|
// the operation will continue on remaining files.
|
|
ContinueOnError bool
|
|
|
|
tw *tar.Writer
|
|
tr *tar.Reader
|
|
|
|
readerWrapFn func(io.Reader) (io.Reader, error)
|
|
writerWrapFn func(io.Writer) (io.Writer, error)
|
|
cleanupWrapFn func()
|
|
}
|
|
|
|
// CheckExt ensures the file extension matches the format.
|
|
func (*Tar) CheckExt(filename string) error {
|
|
if !strings.HasSuffix(filename, ".tar") {
|
|
return fmt.Errorf("filename must have a .tar extension")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Archive creates a tarball file at destination containing
|
|
// the files listed in sources. The destination must end with
|
|
// ".tar". File paths can be those of regular files or
|
|
// directories; directories will be recursively added.
|
|
func (t *Tar) Archive(sources []string, destination string) error {
|
|
err := t.CheckExt(destination)
|
|
if t.writerWrapFn == nil && err != nil {
|
|
return fmt.Errorf("checking extension: %v", err)
|
|
}
|
|
if !t.OverwriteExisting && fileExists(destination) {
|
|
return fmt.Errorf("file already exists: %s", destination)
|
|
}
|
|
|
|
// make the folder to contain the resulting archive
|
|
// if it does not already exist
|
|
destDir := filepath.Dir(destination)
|
|
if t.MkdirAll && !fileExists(destDir) {
|
|
err := mkdir(destDir, 0755)
|
|
if err != nil {
|
|
return fmt.Errorf("making folder for destination: %v", err)
|
|
}
|
|
}
|
|
|
|
out, err := os.Create(destination)
|
|
if err != nil {
|
|
return fmt.Errorf("creating %s: %v", destination, err)
|
|
}
|
|
defer out.Close()
|
|
|
|
err = t.Create(out)
|
|
if err != nil {
|
|
return fmt.Errorf("creating tar: %v", err)
|
|
}
|
|
defer t.Close()
|
|
|
|
var topLevelFolder string
|
|
if t.ImplicitTopLevelFolder && multipleTopLevels(sources) {
|
|
topLevelFolder = folderNameFromFileName(destination)
|
|
}
|
|
|
|
for _, source := range sources {
|
|
err := t.writeWalk(source, topLevelFolder, destination)
|
|
if err != nil {
|
|
return fmt.Errorf("walking %s: %v", source, err)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Unarchive unpacks the .tar file at source to destination.
|
|
// Destination will be treated as a folder name.
|
|
func (t *Tar) Unarchive(source, destination string) error {
|
|
if !fileExists(destination) && t.MkdirAll {
|
|
err := mkdir(destination, 0755)
|
|
if err != nil {
|
|
return fmt.Errorf("preparing destination: %v", err)
|
|
}
|
|
}
|
|
|
|
// if the files in the archive do not all share a common
|
|
// root, then make sure we extract to a single subfolder
|
|
// rather than potentially littering the destination...
|
|
if t.ImplicitTopLevelFolder {
|
|
var err error
|
|
destination, err = t.addTopLevelFolder(source, destination)
|
|
if err != nil {
|
|
return fmt.Errorf("scanning source archive: %v", err)
|
|
}
|
|
}
|
|
|
|
file, err := os.Open(source)
|
|
if err != nil {
|
|
return fmt.Errorf("opening source archive: %v", err)
|
|
}
|
|
defer file.Close()
|
|
|
|
err = t.Open(file, 0)
|
|
if err != nil {
|
|
return fmt.Errorf("opening tar archive for reading: %v", err)
|
|
}
|
|
defer t.Close()
|
|
|
|
for {
|
|
err := t.untarNext(destination)
|
|
if err == io.EOF {
|
|
break
|
|
}
|
|
if err != nil {
|
|
if t.ContinueOnError {
|
|
log.Printf("[ERROR] Reading file in tar archive: %v", err)
|
|
continue
|
|
}
|
|
return fmt.Errorf("reading file in tar archive: %v", err)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// addTopLevelFolder scans the files contained inside
|
|
// the tarball named sourceArchive and returns a modified
|
|
// destination if all the files do not share the same
|
|
// top-level folder.
|
|
func (t *Tar) addTopLevelFolder(sourceArchive, destination string) (string, error) {
|
|
file, err := os.Open(sourceArchive)
|
|
if err != nil {
|
|
return "", fmt.Errorf("opening source archive: %v", err)
|
|
}
|
|
defer file.Close()
|
|
|
|
// if the reader is to be wrapped, ensure we do that now
|
|
// or we will not be able to read the archive successfully
|
|
reader := io.Reader(file)
|
|
if t.readerWrapFn != nil {
|
|
reader, err = t.readerWrapFn(reader)
|
|
if err != nil {
|
|
return "", fmt.Errorf("wrapping reader: %v", err)
|
|
}
|
|
}
|
|
if t.cleanupWrapFn != nil {
|
|
defer t.cleanupWrapFn()
|
|
}
|
|
|
|
tr := tar.NewReader(reader)
|
|
|
|
var files []string
|
|
for {
|
|
hdr, err := tr.Next()
|
|
if err == io.EOF {
|
|
break
|
|
}
|
|
if err != nil {
|
|
return "", fmt.Errorf("scanning tarball's file listing: %v", err)
|
|
}
|
|
files = append(files, hdr.Name)
|
|
}
|
|
|
|
if multipleTopLevels(files) {
|
|
destination = filepath.Join(destination, folderNameFromFileName(sourceArchive))
|
|
}
|
|
|
|
return destination, nil
|
|
}
|
|
|
|
func (t *Tar) untarNext(to string) error {
|
|
f, err := t.Read()
|
|
if err != nil {
|
|
return err // don't wrap error; calling loop must break on io.EOF
|
|
}
|
|
header, ok := f.Header.(*tar.Header)
|
|
if !ok {
|
|
return fmt.Errorf("expected header to be *tar.Header but was %T", f.Header)
|
|
}
|
|
return t.untarFile(f, filepath.Join(to, header.Name))
|
|
}
|
|
|
|
func (t *Tar) untarFile(f File, to string) error {
|
|
// do not overwrite existing files, if configured
|
|
if !f.IsDir() && !t.OverwriteExisting && fileExists(to) {
|
|
return fmt.Errorf("file already exists: %s", to)
|
|
}
|
|
|
|
hdr, ok := f.Header.(*tar.Header)
|
|
if !ok {
|
|
return fmt.Errorf("expected header to be *tar.Header but was %T", f.Header)
|
|
}
|
|
|
|
switch hdr.Typeflag {
|
|
case tar.TypeDir:
|
|
return mkdir(to, f.Mode())
|
|
case tar.TypeReg, tar.TypeRegA, tar.TypeChar, tar.TypeBlock, tar.TypeFifo, tar.TypeGNUSparse:
|
|
return writeNewFile(to, f, f.Mode())
|
|
case tar.TypeSymlink:
|
|
return writeNewSymbolicLink(to, hdr.Linkname)
|
|
case tar.TypeLink:
|
|
return writeNewHardLink(to, filepath.Join(to, hdr.Linkname))
|
|
case tar.TypeXGlobalHeader:
|
|
return nil // ignore the pax global header from git-generated tarballs
|
|
default:
|
|
return fmt.Errorf("%s: unknown type flag: %c", hdr.Name, hdr.Typeflag)
|
|
}
|
|
}
|
|
|
|
func (t *Tar) writeWalk(source, topLevelFolder, destination string) error {
|
|
sourceInfo, err := os.Stat(source)
|
|
if err != nil {
|
|
return fmt.Errorf("%s: stat: %v", source, err)
|
|
}
|
|
destAbs, err := filepath.Abs(destination)
|
|
if err != nil {
|
|
return fmt.Errorf("%s: getting absolute path of destination %s: %v", source, destination, err)
|
|
}
|
|
|
|
return filepath.Walk(source, func(fpath string, info os.FileInfo, err error) error {
|
|
handleErr := func(err error) error {
|
|
if t.ContinueOnError {
|
|
log.Printf("[ERROR] Walking %s: %v", fpath, err)
|
|
return nil
|
|
}
|
|
return err
|
|
}
|
|
if err != nil {
|
|
return handleErr(fmt.Errorf("traversing %s: %v", fpath, err))
|
|
}
|
|
if info == nil {
|
|
return handleErr(fmt.Errorf("no file info"))
|
|
}
|
|
|
|
// make sure we do not copy our output file into itself
|
|
fpathAbs, err := filepath.Abs(fpath)
|
|
if err != nil {
|
|
return handleErr(fmt.Errorf("%s: getting absolute path: %v", fpath, err))
|
|
}
|
|
if within(fpathAbs, destAbs) {
|
|
return nil
|
|
}
|
|
|
|
// build the name to be used within the archive
|
|
nameInArchive, err := makeNameInArchive(sourceInfo, source, topLevelFolder, fpath)
|
|
if err != nil {
|
|
return handleErr(err)
|
|
}
|
|
|
|
var file io.ReadCloser
|
|
if info.Mode().IsRegular() {
|
|
file, err = os.Open(fpath)
|
|
if err != nil {
|
|
return handleErr(fmt.Errorf("%s: opening: %v", fpath, err))
|
|
}
|
|
defer file.Close()
|
|
}
|
|
err = t.Write(File{
|
|
FileInfo: FileInfo{
|
|
FileInfo: info,
|
|
CustomName: nameInArchive,
|
|
},
|
|
ReadCloser: file,
|
|
})
|
|
if err != nil {
|
|
return handleErr(fmt.Errorf("%s: writing: %s", fpath, err))
|
|
}
|
|
|
|
return nil
|
|
})
|
|
}
|
|
|
|
// Create opens t for writing a tar archive to out.
|
|
func (t *Tar) Create(out io.Writer) error {
|
|
if t.tw != nil {
|
|
return fmt.Errorf("tar archive is already created for writing")
|
|
}
|
|
|
|
// wrapping writers allows us to output
|
|
// compressed tarballs, for example
|
|
if t.writerWrapFn != nil {
|
|
var err error
|
|
out, err = t.writerWrapFn(out)
|
|
if err != nil {
|
|
return fmt.Errorf("wrapping writer: %v", err)
|
|
}
|
|
}
|
|
|
|
t.tw = tar.NewWriter(out)
|
|
return nil
|
|
}
|
|
|
|
// Write writes f to t, which must have been opened for writing first.
|
|
func (t *Tar) Write(f File) error {
|
|
if t.tw == nil {
|
|
return fmt.Errorf("tar archive was not created for writing first")
|
|
}
|
|
if f.FileInfo == nil {
|
|
return fmt.Errorf("no file info")
|
|
}
|
|
if f.FileInfo.Name() == "" {
|
|
return fmt.Errorf("missing file name")
|
|
}
|
|
|
|
var linkTarget string
|
|
if isSymlink(f) {
|
|
var err error
|
|
linkTarget, err = os.Readlink(f.Name())
|
|
if err != nil {
|
|
return fmt.Errorf("%s: readlink: %v", f.Name(), err)
|
|
}
|
|
}
|
|
|
|
hdr, err := tar.FileInfoHeader(f, filepath.ToSlash(linkTarget))
|
|
if err != nil {
|
|
return fmt.Errorf("%s: making header: %v", f.Name(), err)
|
|
}
|
|
|
|
err = t.tw.WriteHeader(hdr)
|
|
if err != nil {
|
|
return fmt.Errorf("%s: writing header: %v", hdr.Name, err)
|
|
}
|
|
|
|
if f.IsDir() {
|
|
return nil // directories have no contents
|
|
}
|
|
|
|
if hdr.Typeflag == tar.TypeReg {
|
|
if f.ReadCloser == nil {
|
|
return fmt.Errorf("%s: no way to read file contents", f.Name())
|
|
}
|
|
_, err := io.Copy(t.tw, f)
|
|
if err != nil {
|
|
return fmt.Errorf("%s: copying contents: %v", f.Name(), err)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Open opens t for reading an archive from
|
|
// in. The size parameter is not used.
|
|
func (t *Tar) Open(in io.Reader, size int64) error {
|
|
if t.tr != nil {
|
|
return fmt.Errorf("tar archive is already open for reading")
|
|
}
|
|
// wrapping readers allows us to open compressed tarballs
|
|
if t.readerWrapFn != nil {
|
|
var err error
|
|
in, err = t.readerWrapFn(in)
|
|
if err != nil {
|
|
return fmt.Errorf("wrapping file reader: %v", err)
|
|
}
|
|
}
|
|
t.tr = tar.NewReader(in)
|
|
return nil
|
|
}
|
|
|
|
// Read reads the next file from t, which must have
|
|
// already been opened for reading. If there are no
|
|
// more files, the error is io.EOF. The File must
|
|
// be closed when finished reading from it.
|
|
func (t *Tar) Read() (File, error) {
|
|
if t.tr == nil {
|
|
return File{}, fmt.Errorf("tar archive is not open")
|
|
}
|
|
|
|
hdr, err := t.tr.Next()
|
|
if err != nil {
|
|
return File{}, err // don't wrap error; preserve io.EOF
|
|
}
|
|
|
|
file := File{
|
|
FileInfo: hdr.FileInfo(),
|
|
Header: hdr,
|
|
ReadCloser: ReadFakeCloser{t.tr},
|
|
}
|
|
|
|
return file, nil
|
|
}
|
|
|
|
// Close closes the tar archive(s) opened by Create and Open.
|
|
func (t *Tar) Close() error {
|
|
var err error
|
|
if t.tr != nil {
|
|
t.tr = nil
|
|
}
|
|
if t.tw != nil {
|
|
tw := t.tw
|
|
t.tw = nil
|
|
err = tw.Close()
|
|
}
|
|
// make sure cleanup of "Reader/Writer wrapper"
|
|
// (say that ten times fast) happens AFTER the
|
|
// underlying stream is closed
|
|
if t.cleanupWrapFn != nil {
|
|
t.cleanupWrapFn()
|
|
}
|
|
return err
|
|
}
|
|
|
|
// Walk calls walkFn for each visited item in archive.
|
|
func (t *Tar) Walk(archive string, walkFn WalkFunc) error {
|
|
file, err := os.Open(archive)
|
|
if err != nil {
|
|
return fmt.Errorf("opening archive file: %v", err)
|
|
}
|
|
defer file.Close()
|
|
|
|
err = t.Open(file, 0)
|
|
if err != nil {
|
|
return fmt.Errorf("opening archive: %v", err)
|
|
}
|
|
defer t.Close()
|
|
|
|
for {
|
|
f, err := t.Read()
|
|
if err == io.EOF {
|
|
break
|
|
}
|
|
if err != nil {
|
|
if t.ContinueOnError {
|
|
log.Printf("[ERROR] Opening next file: %v", err)
|
|
continue
|
|
}
|
|
return fmt.Errorf("opening next file: %v", err)
|
|
}
|
|
err = walkFn(f)
|
|
if err != nil {
|
|
if err == ErrStopWalk {
|
|
break
|
|
}
|
|
if t.ContinueOnError {
|
|
log.Printf("[ERROR] Walking %s: %v", f.Name(), err)
|
|
continue
|
|
}
|
|
return fmt.Errorf("walking %s: %v", f.Name(), err)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Extract extracts a single file from the tar archive.
|
|
// If the target is a directory, the entire folder will
|
|
// be extracted into destination.
|
|
func (t *Tar) Extract(source, target, destination string) error {
|
|
// target refers to a path inside the archive, which should be clean also
|
|
target = path.Clean(target)
|
|
|
|
// if the target ends up being a directory, then
|
|
// we will continue walking and extracting files
|
|
// until we are no longer within that directory
|
|
var targetDirPath string
|
|
|
|
return t.Walk(source, func(f File) error {
|
|
th, ok := f.Header.(*tar.Header)
|
|
if !ok {
|
|
return fmt.Errorf("expected header to be *tar.Header but was %T", f.Header)
|
|
}
|
|
|
|
// importantly, cleaning the path strips tailing slash,
|
|
// which must be appended to folders within the archive
|
|
name := path.Clean(th.Name)
|
|
if f.IsDir() && target == name {
|
|
targetDirPath = path.Dir(name)
|
|
}
|
|
|
|
if within(target, th.Name) {
|
|
// either this is the exact file we want, or is
|
|
// in the directory we want to extract
|
|
|
|
// build the filename we will extract to
|
|
end, err := filepath.Rel(targetDirPath, th.Name)
|
|
if err != nil {
|
|
return fmt.Errorf("relativizing paths: %v", err)
|
|
}
|
|
joined := filepath.Join(destination, end)
|
|
|
|
err = t.untarFile(f, joined)
|
|
if err != nil {
|
|
return fmt.Errorf("extracting file %s: %v", th.Name, err)
|
|
}
|
|
|
|
// if our target was not a directory, stop walk
|
|
if targetDirPath == "" {
|
|
return ErrStopWalk
|
|
}
|
|
} else if targetDirPath != "" {
|
|
// finished walking the entire directory
|
|
return ErrStopWalk
|
|
}
|
|
|
|
return nil
|
|
})
|
|
}
|
|
|
|
// Match returns true if the format of file matches this
|
|
// type's format. It should not affect reader position.
|
|
func (*Tar) Match(file io.ReadSeeker) (bool, error) {
|
|
currentPos, err := file.Seek(0, io.SeekCurrent)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
_, err = file.Seek(0, 0)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
defer file.Seek(currentPos, io.SeekStart)
|
|
|
|
buf := make([]byte, tarBlockSize)
|
|
if _, err = io.ReadFull(file, buf); err != nil {
|
|
return false, nil
|
|
}
|
|
return hasTarHeader(buf), nil
|
|
}
|
|
|
|
// hasTarHeader checks passed bytes has a valid tar header or not. buf must
|
|
// contain at least 512 bytes and if not, it always returns false.
|
|
func hasTarHeader(buf []byte) bool {
|
|
if len(buf) < tarBlockSize {
|
|
return false
|
|
}
|
|
|
|
b := buf[148:156]
|
|
b = bytes.Trim(b, " \x00") // clean up all spaces and null bytes
|
|
if len(b) == 0 {
|
|
return false // unknown format
|
|
}
|
|
hdrSum, err := strconv.ParseUint(string(b), 8, 64)
|
|
if err != nil {
|
|
return false
|
|
}
|
|
|
|
// According to the go official archive/tar, Sun tar uses signed byte
|
|
// values so this calcs both signed and unsigned
|
|
var usum uint64
|
|
var sum int64
|
|
for i, c := range buf {
|
|
if 148 <= i && i < 156 {
|
|
c = ' ' // checksum field itself is counted as branks
|
|
}
|
|
usum += uint64(uint8(c))
|
|
sum += int64(int8(c))
|
|
}
|
|
|
|
if hdrSum != usum && int64(hdrSum) != sum {
|
|
return false // invalid checksum
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
func (t *Tar) String() string { return "tar" }
|
|
|
|
// NewTar returns a new, default instance ready to be customized and used.
|
|
func NewTar() *Tar {
|
|
return &Tar{
|
|
MkdirAll: true,
|
|
}
|
|
}
|
|
|
|
const tarBlockSize = 512
|
|
|
|
// Compile-time checks to ensure type implements desired interfaces.
|
|
var (
|
|
_ = Reader(new(Tar))
|
|
_ = Writer(new(Tar))
|
|
_ = Archiver(new(Tar))
|
|
_ = Unarchiver(new(Tar))
|
|
_ = Walker(new(Tar))
|
|
_ = Extractor(new(Tar))
|
|
_ = Matcher(new(Tar))
|
|
_ = ExtensionChecker(new(Tar))
|
|
)
|
|
|
|
// DefaultTar is a default instance that is conveniently ready to use.
|
|
var DefaultTar = NewTar()
|