diff --git a/.github/SECURITY.md b/.github/SECURITY.md new file mode 100644 index 00000000..fe3dcf81 --- /dev/null +++ b/.github/SECURITY.md @@ -0,0 +1,14 @@ +# Security Policy + +## Supported Versions + +| Version | Supported | +| ------- | ------------------ | +| >= 4.x | :white_check_mark: | +| < 4.0 | :x: | + +## Reporting a Vulnerability + +Please send the details to: + +- Matthew Holt \ No newline at end of file diff --git a/.github/workflows/macos-latest.yml b/.github/workflows/macos-latest.yml index 519ef22e..e2668261 100644 --- a/.github/workflows/macos-latest.yml +++ b/.github/workflows/macos-latest.yml @@ -8,7 +8,7 @@ jobs: strategy: matrix: - go-version: [1.13, 1.17] + go-version: [1.17] runs-on: macos-latest steps: - name: Install Go diff --git a/.github/workflows/ubuntu-latest.yml b/.github/workflows/ubuntu-latest.yml index 0ce2449f..948a8bdd 100644 --- a/.github/workflows/ubuntu-latest.yml +++ b/.github/workflows/ubuntu-latest.yml @@ -8,7 +8,7 @@ jobs: strategy: matrix: - go-version: [1.13, 1.17] + go-version: [1.17] runs-on: ubuntu-latest steps: - name: Install Go diff --git a/.github/workflows/windows-latest.yml b/.github/workflows/windows-latest.yml index 93717281..810912f6 100644 --- a/.github/workflows/windows-latest.yml +++ b/.github/workflows/windows-latest.yml @@ -8,7 +8,7 @@ jobs: strategy: matrix: - go-version: [1.13, 1.17] + go-version: [1.17] runs-on: windows-latest steps: - name: Install Go diff --git a/.gitignore b/.gitignore index 4a87fc1a..58accbac 100644 --- a/.gitignore +++ b/.gitignore @@ -1,10 +1 @@ -/arc -/cmd/arc/arc -/dist/ -/vendor/ - -.DS_Store _gitignore -builds/ -*.test -.*.sw* diff --git a/.prettierrc b/.prettierrc deleted file mode 100644 index f9f5139c..00000000 --- a/.prettierrc +++ /dev/null @@ -1,4 +0,0 @@ -{ - "bracketSpacing": true, - "printWidth": 120, -} diff --git a/README.md b/README.md index c8de5e7e..28e1e110 100644 --- a/README.md +++ b/README.md @@ -1,33 +1,32 @@ -# archiver [![archiver GoDoc](https://img.shields.io/badge/reference-godoc-blue.svg?style=flat-square)](https://pkg.go.dev/github.com/mholt/archiver?tab=doc) [![Ubuntu-latest](https://github.com/mholt/archiver/actions/workflows/ubuntu-latest.yml/badge.svg)](https://github.com/mholt/archiver/actions/workflows/ubuntu-latest.yml) [![Macos-latest](https://github.com/mholt/archiver/actions/workflows/macos-latest.yml/badge.svg)](https://github.com/mholt/archiver/actions/workflows/macos-latest.yml) [![Windows-latest](https://github.com/mholt/archiver/actions/workflows/windows-latest.yml/badge.svg)](https://github.com/mholt/archiver/actions/workflows/windows-latest.yml) +# archiver [![Go Reference](https://pkg.go.dev/badge/github.com/mholt/archiver/v4.svg)](https://pkg.go.dev/github.com/mholt/archiver/v4) [![Ubuntu-latest](https://github.com/mholt/archiver/actions/workflows/ubuntu-latest.yml/badge.svg)](https://github.com/mholt/archiver/actions/workflows/ubuntu-latest.yml) [![Macos-latest](https://github.com/mholt/archiver/actions/workflows/macos-latest.yml/badge.svg)](https://github.com/mholt/archiver/actions/workflows/macos-latest.yml) [![Windows-latest](https://github.com/mholt/archiver/actions/workflows/windows-latest.yml/badge.svg)](https://github.com/mholt/archiver/actions/workflows/windows-latest.yml) -Introducing **Archiver 3.1** - a cross-platform, multi-format archive utility and Go library. A powerful and flexible library meets an elegant CLI in this generic replacement for several platform-specific or format-specific archive utilities. +Introducing **Archiver 4.0** - a cross-platform, multi-format archive utility and Go library. A powerful and flexible library meets an elegant CLI in this generic replacement for several platform-specific or format-specific archive utilities. -## Features - -Package archiver makes it trivially easy to make and extract common archive formats such as tarball (and its compressed variants) and zip. Simply name the input and output file(s). The `arc` command runs the same on all platforms and has no external dependencies (not even libc). It is powered by the Go standard library and several third-party, pure-Go libraries. - -Files are put into the root of the archive; directories are recursively added, preserving structure. - -- Make whole archives from a list of files -- Open whole archives to a folder -- Extract specific files/folders from archives -- Stream files in and out of archives without needing actual files on disk -- Traverse archive contents without loading them -- Compress files -- Decompress files -- Streaming compression and decompression -- Several archive and compression formats supported +**:warning: v4 is in ALPHA. The core library APIs work pretty well but the command has not been implemented yet, nor have most automated tests. If you need the `arc` command, stick with v3 for now.** -### Format-dependent features +## Features -- Gzip is multithreaded -- Optionally create a top-level folder to avoid littering a directory or archive root with files -- Toggle overwrite existing files -- Adjust compression level -- Zip: store (not compress) already-compressed files -- Make all necessary directories +- Stream-oriented APIs +- Automatically identify archive and compression formats: + - By file name + - By header +- Traverse directories, archive files, and any other file uniformly as [`io/fs`](https://pkg.go.dev/io/fs) file systems: + - [`DirFS`](https://pkg.go.dev/github.com/mholt/archiver/v4#DirFS) + - [`FileFS`](https://pkg.go.dev/github.com/mholt/archiver/v4#FileFS) + - [`ArchiveFS`](https://pkg.go.dev/github.com/mholt/archiver/v4#ArchiveFS) +- Compress and decompress files +- Create and extract archive files +- Walk or traverse into archive files +- Extract only specific files from archives +- Insert (append) into .tar files +- Numerous archive and compression formats supported +- Extensible (add more formats just by registering them) +- Cross-platform, static binary +- Pure Go (no cgo) +- Multithreaded Gzip +- Adjust compression levels +- Automatically add compressed files to zip archives without re-compressing - Open password-protected RAR archives -- Optionally continue with other files after an error ### Supported compression formats @@ -38,7 +37,7 @@ Files are put into the root of the archive; directories are recursively added, p - lz4 - snappy (sz) - xz -- zstandard (zstd) +- zstandard (zst) ### Supported archive formats @@ -46,279 +45,247 @@ Files are put into the root of the archive; directories are recursively added, p - .tar (including any compressed variants like .tar.gz) - .rar (read-only) -Tar files can optionally be compressed using any of the above compression formats. - -## GoDoc - -See - -## Install - -### With webi - -[`webi`](https://webinstall.dev/arc) will install `webi` and `arc` to `~/.local/bin/` and update your `PATH`. - -#### Mac, Linux, Raspberry Pi - -```bash -curl -fsS https://webinstall.dev/arc | bash -``` - -#### Windows 10 - -```pwsh -curl.exe -fsS -A MS https://webinstall.dev/arc | powershell -``` - -### With Go - -To install the runnable binary to your \$GOPATH/bin: - -```bash -go install github.com/mholt/archiver/v3/cmd/arc@latest -``` - -### Manually - -To install manually +Tar files can optionally be compressed using any compression format. -1. Download the binary for your platform from the [Github Releases](https://github.com/mholt/archiver/releases) page. -2. Move the binary to a location in your path, for example: - - without `sudo`: - ```bash - chmod a+x ~/Downloads/arc_* - mkdir -p ~/.local/bin - mv ~/Downloads/arc_* ~/.local/bin/arc - ``` - - as `root`: - ```bash - chmod a+x ~/Downloads/arc_* - sudo mkdir -p /usr/local/bin - sudo mv ~/Downloads/arc_* /usr/local/bin/arc - ``` -3. If needed, update `~/.bashrc` or `~/.profile` to include add `arc` in your `PATH`, for example: - ``` - echo 'PATH="$HOME:/.local/bin:$PATH"' >> ~/.bashrc - ``` +## Command use -## Build from Source +Coming soon for v4. See [the last v3 docs](https://github.com/mholt/archiver/tree/v3.5.1). -You can successfully build `arc` with just the go tooling, or with `goreleaser`. -### With `go` +## Library use ```bash -go build cmd/arc/*.go +$ go get github.com/mholt/archiver/v4 ``` -### Multi-platform with `goreleaser` -Builds with `goreleaser` will also include version info. +### Create archive -```bash -goreleaser --snapshot --skip-publish --rm-dist -``` - -## Command Use - -### Make new archive +Creating archives can be done entirely without needing a real disk or storage device since all you need is a list of [`File` structs](https://pkg.go.dev/github.com/mholt/archiver/v4#File) to pass in. -```bash -# Syntax: arc archive [archive name] [input files...] +However, creating archives from files on disk is very common, so you can use the `FilesFromDisk()` function to help you map filenames on disk to their paths in the archive. Then create and customize the format type. -arc archive test.tar.gz file1.txt images/file2.jpg folder/subfolder -``` +In this example, we add 2 files and a directory (which includes its contents recursively) to a .tar.gz file: -(At least one input file is required.) +```go +// map files on disk to their paths in the archive +files, err := archiver.FilesFromDisk(map[string]string{ + "/path/on/disk/file1.txt": "file1.txt", + "/path/on/disk/file2.txt": "subfolder/file2.txt", + "/path/on/disk/folder": "", +}) +if err != nil { + return err +} -### Extract entire archive +// create the output file we'll write to +out, err := os.Create("example.tar.gz") +if err != nil { + return err +} +defer out.Close() -```bash -# Syntax: arc unarchive [archive name] [destination] +// we can use the CompressedArchive type to gzip a tarball +// (compression is not required; you could use Tar directly) +format := archiver.CompressedArchive{ + Compression: archiver.Gz{}, + Archival: archiver.Tar{}, +} -arc unarchive test.tar.gz +// create the archive +err = format.Archive(context.Background(), out, files) +if err != nil { + return err +} ``` -(The destination path is optional; default is current directory.) - -The archive name must end with a supported file extension—this is how it knows what kind of archive to make. Run `arc help` for more help. +### Extract archive -### List archive contents +Extracting an archive, extracting _from_ an archive, and walking an archive are all the same function. -```bash -# Syntax: arc ls [archive name] +Simply use your format type (e.g. `Zip`) to call `Extract()`. You'll pass in a context (for cancellation), the input stream, the list of files you want out of the archive, and a callback function to handle each file. -arc ls caddy_dist.tar.gz -``` +If you want all the files, pass in a nil list of file paths. -```txt -drwxr-xr-x matt staff 0 2018-09-19 15:47:18 -0600 MDT dist/ --rw-r--r-- matt staff 6148 2017-08-07 18:34:22 -0600 MDT dist/.DS_Store --rw-r--r-- matt staff 22481 2018-09-19 15:47:18 -0600 MDT dist/CHANGES.txt --rw-r--r-- matt staff 17189 2018-09-19 15:47:18 -0600 MDT dist/EULA.txt --rw-r--r-- matt staff 25261 2016-03-07 16:32:00 -0700 MST dist/LICENSES.txt --rw-r--r-- matt staff 1017 2018-09-19 15:47:18 -0600 MDT dist/README.txt --rw-r--r-- matt staff 288 2016-03-21 11:52:38 -0600 MDT dist/gitcookie.sh.enc -... -``` +```go +// the type that will be used to read the input stream +format := archiver.Zip{} -### Extract a specific file or folder from an archive +// the list of files we want out of the archive; any +// directories will include all their contents unless +// we return fs.SkipDir from our handler +// (leave this nil to walk ALL files from the archive) +fileList := []string{"file1.txt", "subfolder"} -```bash -# Syntax: arc extract [archive name] [path in archive] [destination on disk] +handler := func(ctx context.Context, f archiver.File) error { + // do something with the file + return nil +} -arc extract test.tar.gz foo/hello.txt extracted/hello.txt +err := format.Extract(ctx, input, fileList, handler) +if err != nil { + return err +} ``` -### Compress a single file - -```bash -# Syntax: arc compress [input file] [output file] +### Identifying formats -arc compress test.txt compressed_test.txt.gz -arc compress test.txt gz -``` +Have an input stream with unknown contents? No problem, archiver can identify it for you. It will try matching based on filename and/or the header (which peeks at the stream): -For convenience, the output file (second argument) may simply be a compression format (without leading dot), in which case the output filename will be the same as the input filename but with the format extension appended, and the input file will be deleted if successful. +```go +format, err := archiver.Identify("filename.tar.zst", input) +if err != nil { + return err +} +// you can now type-assert format to whatever you need -### Decompress a single file +// want to extract something? +if ex, ok := format.(archiver.Extractor); ok { + // ... proceed to extract +} -```bash -# Syntax: arc decompress [input file] [output file] +// or maybe it's compressed and you want to decompress it? +if decom, ok := format.(archiver.Decompressor); ok { + rc, err := decom.OpenReader(unknownFile) + if err != nil { + return err + } + defer rc.Close() -arc decompress test.txt.gz original_test.txt -arc decompress test.txt.gz + // read from rc to get decompressed data +} ``` -For convenience, the output file (second argument) may be omitted. In that case, the output filename will have the same name as the input filename, but with the compression extension stripped from the end; and the input file will be deleted if successful. - -### Flags - -Flags are specified before the subcommand. Use `arc help` or `arc -h` to get usage help and a description of flags with their default values. - -## Library Use - -The archiver package allows you to easily create and open archives, walk their contents, extract specific files, compress and decompress files, and even stream archives in and out using pure io.Reader and io.Writer interfaces, without ever needing to touch the disk. - -To use as a dependency in your project: +### Virtual file systems -```bash -go get github.com/mholt/archiver/v3 -``` - -```go -import "github.com/mholt/archiver/v3" -``` +This is my favorite feature. -[See the package's GoDoc](https://pkg.go.dev/github.com/mholt/archiver?tab=doc) for full API documentation. +Let's say you have a file. It could be a real directory on disk, an archive, a compressed archive, or any other regular file. You don't really care; you just want to use it uniformly no matter what it is. -For example, creating or unpacking an archive file: +Use archiver to simply create a file system: ```go -err := archiver.Archive([]string{"testdata", "other/file.txt"}, "test.zip") -// ... -err = archiver.Unarchive("test.tar.gz", "test") +// filename could be: +// - a folder ("/home/you/Desktop") +// - an archive ("example.zip") +// - a compressed archive ("example.tar.gz") +// - a regular file ("example.txt") +fsys, err := archiver.FileSystem(filename) +if err != nil { + return err +} ``` -The archive format is determined by file extension. (There are [several functions in this package](https://pkg.go.dev/github.com/mholt/archiver?tab=doc) which perform a task by inferring the format from file extension or file header, including `Archive()`, `Unarchive()`, `CompressFile()`, and `DecompressFile()`.) +This is a fully-featured `fs.FS`, so you can open files and read directories, no matter what kind of file the input was. -To configure the archiver used or perform, create an instance of the format's type: +For example, to open a specific file: ```go -z := archiver.Zip{ - CompressionLevel: flate.DefaultCompression, - MkdirAll: true, - SelectiveCompression: true, - ContinueOnError: false, - OverwriteExisting: false, - ImplicitTopLevelFolder: false, +f, err := fsys.Open("file") +if err != nil { + return err } - -err := z.Archive([]string{"testdata", "other/file.txt"}, "/Users/matt/Desktop/test.zip") +defer f.Close() ``` -Inspecting an archive: +If you opened a regular file, you can read from it. + +If you opened a directory, you can list its contents: ```go -err = z.Walk("/Users/matt/Desktop/test.zip", func(f archiver.File) error { - zfh, ok := f.Header.(zip.FileHeader) - if ok { - fmt.Println("Filename:", zfh.Name) +if dir, ok := f.(fs.ReadDirFile); ok { + // 0 gets all entries, but you can pass > 0 to paginate + entries, err := dir.ReadDir(0) + if err != nil { + return err } - return nil -}) + for _, e := range entries { + fmt.Println(e.Name()) + } +} ``` -Streaming files into an archive that is being written to the HTTP response: +Or get a directory listing this way: ```go -err = z.Create(responseWriter) +entries, err := fsys.ReadDir("Playlists") if err != nil { return err } -defer z.Close() +for _, e := range entries { + fmt.Println(e.Name()) +} +``` -for _, fname := range filenames { - info, err := os.Stat(fname) - if err != nil { - return err - } +Or maybe you want to walk all or part of the file system, but skip a folder named `.git`: - // get file's name for the inside of the archive - internalName, err := archiver.NameInArchive(info, fname, fname) - if err != nil { - return err - } - - // open the file - file, err := os.Open(f) +```go +err := fs.WalkDir(fsys, ".", func(path string, d fs.DirEntry, err error) error { if err != nil { return err } - - // write it to the archive - err = z.Write(archiver.File{ - FileInfo: archiver.FileInfo{ - FileInfo: info, - CustomName: internalName, - }, - ReadCloser: file, - }) - file.Close() - if err != nil { - return err + if path == ".git" { + return fs.SkipDir } + fmt.Println("Walking:", path, "Dir?", d.IsDir()) + return nil +}) +if err != nil { + return err } ``` -The `archiver.File` type allows you to use actual files with archives, or to mimic files when you only have streams. - -There's a lot more that can be done, too. [See the GoDoc](https://pkg.go.dev/github.com/mholt/archiver?tab=doc) for full API documentation. +### Compress data -**Security note: This package does NOT attempt to mitigate zip-slip attacks.** It is [extremely difficult](https://github.com/rubyzip/rubyzip/pull/376) [to do properly](https://github.com/mholt/archiver/pull/65#issuecomment-395988244) and [seemingly impossible to mitigate effectively across platforms](https://github.com/golang/go/issues/20126). [Attempted fixes have broken processing of legitimate files in production](https://github.com/mholt/archiver/pull/70#issuecomment-423267320), rendering the program unusable. Our recommendation instead is to inspect the contents of an untrusted archive before extracting it (this package provides `Walkers`) and decide if you want to proceed with extraction. +Compression formats let you open writers to compress data: -## Project Values +```go +// wrap underlying writer w +compressor, err := archiver.Zstd{}.OpenWriter(w) +if err != nil { + return err +} +defer compressor.Close() -This project has a few principle-based goals that guide its development: +// writes to compressor will be compressed +``` -- **Do our thing really well.** Our thing is creating, opening, inspecting, compressing, and streaming archive files. It is not meant to be a replacement for specific archive format tools like tar, zip, etc. that have lots of features and customizability. (Some customizability is OK, but not to the extent that it becomes overly complicated or error-prone.) +### Decompress data -- **Have good tests.** Changes should be covered by tests. +Similarly, compression formats let you open readers to decompress data: -- **Limit dependencies.** Keep the package lightweight. +```go +// wrap underlying reader r +decompressor, err := archiver.Brotli{}.OpenReader(r) +if err != nil { + return err +} +defer decompressor.Close() -- **Pure Go.** This means no cgo or other external/system dependencies. This package should be able to stand on its own and cross-compile easily to any platform -- and that includes its library dependencies. +// reads from decompressor will be decompressed +``` -- **Idiomatic Go.** Keep interfaces small, variable names semantic, vet shows no errors, the linter is generally quiet, etc. +### Append to tarball -- **Be elegant.** This package should be elegant to use and its code should be elegant when reading and testing. If it doesn't feel good, fix it up. +Tar archives can be appended to without creating a whole new archive by calling `Insert()` on a tar stream. However, this requires that the tarball is not compressed (due to complexities with modifying compression dictionaries). -- **Well-documented.** Use comments prudently; explain why non-obvious code is necessary (and use tests to enforce it). Keep the docs updated, and have examples where helpful. +Here is an example that appends a file to a tarball on disk: -- **Keep it efficient.** This often means keep it simple. Fast code is valuable. +```go +tarball, err := os.OpenFile("example.tar", os.O_RDWR, 0644) +if err != nil { + return err +} +defer tarball.Close() -- **Consensus.** Contributions should ideally be approved by multiple reviewers before being merged. Generally, avoid merging multi-chunk changes that do not go through at least one or two iterations/reviews. Except for trivial changes, PRs are seldom ready to merge right away. +// prepare a text file for the root of the archive +files, err := archiver.FilesFromDisk(map[string]string{ + "/home/you/lastminute.txt": "", +}) -- **Have fun contributing.** Coding is awesome! +err := archiver.Tar{}.Insert(context.Background(), tarball, files) +if err != nil { + return err +} +``` -We welcome contributions and appreciate your efforts! However, please open issues to discuss any changes before spending the time preparing a pull request. This will save time, reduce frustration, and help coordinate the work. Thank you! diff --git a/SECURITY.md b/SECURITY.md deleted file mode 100644 index f9157124..00000000 --- a/SECURITY.md +++ /dev/null @@ -1,15 +0,0 @@ -# Security Policy - -## Supported Versions - -| Version | Supported | -| ------- | ------------------ | -| >= 3.x | :white_check_mark: | -| < 3.0 | :x: | - -## Reporting a Vulnerability - -Please send the details to both of us: - -- AJ ONeal -- Matthew Holt diff --git a/archiver.go b/archiver.go index 6fdadadc..1ccdd013 100644 --- a/archiver.go +++ b/archiver.go @@ -1,540 +1,182 @@ -// Package archiver facilitates convenient, cross-platform, high-level archival -// and compression operations for a variety of formats and compression algorithms. -// -// This package and its dependencies are written in pure Go (not cgo) and -// have no external dependencies, so they should run on all major platforms. -// (It also comes with a command for CLI use in the cmd/arc folder.) -// -// Each supported format or algorithm has a unique type definition that -// implements the interfaces corresponding to the tasks they perform. For -// example, the Tar type implements Reader, Writer, Archiver, Unarchiver, -// Walker, and several other interfaces. -// -// The most common functions are implemented at the package level for -// convenience: Archive, Unarchive, Walk, Extract, CompressFile, and -// DecompressFile. With these, the format type is chosen implicitly, -// and a sane default configuration is used. -// -// To customize a format's configuration, create an instance of its struct -// with its fields set to the desired values. You can also use and customize -// the handy Default* (replace the wildcard with the format's type name) -// for a quick, one-off instance of the format's type. -// -// To obtain a new instance of a format's struct with the default config, use -// the provided New*() functions. This is not required, however. An empty -// struct of any type, for example &Zip{} is perfectly valid, so you may -// create the structs manually, too. The examples on this page show how -// either may be done. -// -// See the examples in this package for an idea of how to wield this package -// for common tasks. Most of the examples which are specific to a certain -// format type, for example Zip, can be applied to other types that implement -// the same interfaces. For example, using Zip is very similar to using Tar -// or TarGz (etc), and using Gz is very similar to using Sz or Xz (etc). -// -// When creating archives or compressing files using a specific instance of -// the format's type, the name of the output file MUST match that of the -// format, to prevent confusion later on. If you absolutely need a different -// file extension, you may rename the file afterward. -// -// Values in this package are NOT safe for concurrent use. There is no -// performance benefit of reusing them, and since they may contain important -// state (especially while walking, reading, or writing), it is NOT -// recommended to reuse values from this package or change their configuration -// after they are in use. package archiver import ( + "context" "fmt" "io" + "io/fs" "os" "path" "path/filepath" - "runtime" "strings" ) -// Archiver is a type that can create an archive file -// from a list of source file names. -type Archiver interface { - ExtensionChecker - - // Archive adds all the files or folders in sources - // to an archive to be created at destination. Files - // are added to the root of the archive, and directories - // are walked and recursively added, preserving folder - // structure. - Archive(sources []string, destination string) error -} - -// ExtensionChecker validates file extensions -type ExtensionChecker interface { - CheckExt(name string) error -} - -// FilenameChecker validates filenames to prevent path traversal attacks -type FilenameChecker interface { - CheckPath(to, filename string) error -} - -// Unarchiver is a type that can extract archive files -// into a folder. -type Unarchiver interface { - Unarchive(source, destination string) error -} - -// Writer can write discrete byte streams of files to -// an output stream. -type Writer interface { - Create(out io.Writer) error - Write(f File) error - Close() error -} - -// Reader can read discrete byte streams of files from -// an input stream. -type Reader interface { - Open(in io.Reader, size int64) error - Read() (File, error) - Close() error -} - -// Extractor can extract a specific file from a source -// archive to a specific destination folder on disk. -type Extractor interface { - Extract(source, target, destination string) error -} - -// File provides methods for accessing information about -// or contents of a file within an archive. +// File is a virtualized, generalized file abstraction for interacting with archives. +// It implements the fs.File interface. type File struct { - os.FileInfo + fs.FileInfo - // The original header info; depends on - // type of archive -- could be nil, too. + // The file header as used/provided by the archive format. + // Typically, you do not need to set this field when creating + // an archive. Header interface{} - // Allow the file contents to be read (and closed) - io.ReadCloser -} - -// FileInfo is an os.FileInfo but optionally with -// a custom name, useful if dealing with files that -// are not actual files on disk, or which have a -// different name in an archive than on disk. -type FileInfo struct { - os.FileInfo - CustomName string - // Stores path to the source. - // Used when reading a symlink. - SourcePath string -} - -// Name returns fi.CustomName if not empty; -// otherwise it returns fi.FileInfo.Name(). -func (fi FileInfo) Name() string { - if fi.CustomName != "" { - return fi.CustomName - } - return fi.FileInfo.Name() -} - -// ReadFakeCloser is an io.Reader that has -// a no-op close method to satisfy the -// io.ReadCloser interface. -type ReadFakeCloser struct { - io.Reader -} - -// Close implements io.Closer. -func (rfc ReadFakeCloser) Close() error { return nil } - -// Walker can walk an archive file and return information -// about each item in the archive. -type Walker interface { - Walk(archive string, walkFn WalkFunc) error -} - -// WalkFunc is called at each item visited by Walk. -// If an error is returned, the walk may continue -// if the Walker is configured to continue on error. -// The sole exception is the error value ErrStopWalk, -// which stops the walk without an actual error. -type WalkFunc func(f File) error - -// ErrStopWalk signals Walk to break without error. -var ErrStopWalk = fmt.Errorf("walk stopped") - -// ErrFormatNotRecognized is an error that will be -// returned if the file is not a valid archive format. -var ErrFormatNotRecognized = fmt.Errorf("format not recognized") - -// Compressor compresses to out what it reads from in. -// It also ensures a compatible or matching file extension. -type Compressor interface { - ExtensionChecker - Compress(in io.Reader, out io.Writer) error -} + // The path of the file as it appears in the archive. + // This is equivalent to Header.Name (for most Header + // types). We require it to be specified here because + // it is such a common field and we want to preserve + // format-agnosticism (no type assertions) for basic + // operations. + NameInArchive string + + // For symbolic and hard links, the target of the link. + // Not supported by all archive formats. + LinkTarget string + + // A callback function that opens the file to read its + // contents. The file must be closed when reading is + // complete. Nil for files that don't have content + // (such as directories and links). + Open func() (io.ReadCloser, error) +} + +func (f File) Stat() (fs.FileInfo, error) { return f.FileInfo, nil } + +// FilesFromDisk returns a list of files by walking the directories in the +// given filenames map. The keys are the names on disk, and the values are +// their associated names in the archive. For convenience, empty values are +// interpreted as the base name of the file (sans path) in the root of the +// archive. Keys that specify directories on disk will be walked and added +// to the archive recursively, rooted at the named directory. Symbolic links +// will be preserved. +// +// This function is primarily used when preparing a list of files to add to +// an archive. +func FilesFromDisk(filenames map[string]string) ([]File, error) { + var files []File + for rootOnDisk, rootInArchive := range filenames { + if rootInArchive == "" { + rootInArchive = filepath.Base(rootInArchive) + } -// Decompressor decompresses to out what it reads from in. -type Decompressor interface { - Decompress(in io.Reader, out io.Writer) error -} + filepath.WalkDir(rootOnDisk, func(filename string, d fs.DirEntry, err error) error { + if err != nil { + return err + } -// Matcher is a type that can return whether the given -// file appears to match the implementation's format. -// Implementations should return the file's read position -// to where it was when the method was called. -type Matcher interface { - Match(io.ReadSeeker) (bool, error) -} + info, err := d.Info() + if err != nil { + return err + } -// Archive creates an archive of the source files to a new file at destination. -// The archive format is chosen implicitly by file extension. -func Archive(sources []string, destination string) error { - aIface, err := ByExtension(destination) - if err != nil { - return err - } - a, ok := aIface.(Archiver) - if !ok { - return fmt.Errorf("format specified by destination filename is not an archive format: %s (%T)", destination, aIface) - } - return a.Archive(sources, destination) -} + nameInArchive := path.Join(rootInArchive, strings.TrimPrefix(filename, rootOnDisk)) -// Unarchive unarchives the given archive file into the destination folder. -// The archive format is selected implicitly. -func Unarchive(source, destination string) error { - uaIface, err := ByExtension(source) - if err != nil { - return err - } - u, ok := uaIface.(Unarchiver) - if !ok { - return fmt.Errorf("format specified by source filename is not an archive format: %s (%T)", source, uaIface) - } - return u.Unarchive(source, destination) -} + file := File{ + FileInfo: info, + NameInArchive: nameInArchive, + Open: func() (io.ReadCloser, error) { + return os.Open(filename) + }, + } -// Walk calls walkFn for each file within the given archive file. -// The archive format is chosen implicitly. -func Walk(archive string, walkFn WalkFunc) error { - wIface, err := ByExtension(archive) - if err != nil { - return err - } - w, ok := wIface.(Walker) - if !ok { - return fmt.Errorf("format specified by archive filename is not a walker format: %s (%T)", archive, wIface) - } - return w.Walk(archive, walkFn) -} + // preserve symlinks + if isSymlink(info) { + file.LinkTarget, err = os.Readlink(filename) + if err != nil { + return fmt.Errorf("%s: readlink: %w", filename, err) + } + } -// Extract extracts a single file from the given source archive. If the target -// is a directory, the entire folder will be extracted into destination. The -// archive format is chosen implicitly. -func Extract(source, target, destination string) error { - eIface, err := ByExtension(source) - if err != nil { - return err + files = append(files, file) + return nil + }) } - e, ok := eIface.(Extractor) - if !ok { - return fmt.Errorf("format specified by source filename is not an extractor format: %s (%T)", source, eIface) - } - return e.Extract(source, target, destination) + return files, nil } -// CompressFile is a convenience function to simply compress a file. -// The compression algorithm is selected implicitly based on the -// destination's extension. -func CompressFile(source, destination string) error { - cIface, err := ByExtension(destination) - if err != nil { - return err - } - c, ok := cIface.(Compressor) - if !ok { - return fmt.Errorf("format specified by destination filename is not a recognized compression algorithm: %s", destination) - } - return FileCompressor{Compressor: c}.CompressFile(source, destination) -} +// FileHandler is a callback function that is used to handle files as they are read +// from an archive; it is kind of like fs.WalkDirFunc. Handler functions that open +// their files must not overlap or run concurrently, as files may be read from the +// same sequential stream; always close the file before returning. +// +// If the special error value fs.SkipDir is returned, the directory of the file +// (or the file itself if it is a directory) will not be walked. Note that because +// archive contents are not necessarily ordered, skipping directories requires +// memory, and skipping lots of directories may run up your memory bill. +// +// Any other returned error will terminate a walk. +type FileHandler func(ctx context.Context, f File) error -// DecompressFile is a convenience function to simply decompress a file. -// The decompression algorithm is selected implicitly based on the -// source's extension. -func DecompressFile(source, destination string) error { - cIface, err := ByExtension(source) +// openAndCopyFile opens file for reading, copies its +// contents to w, then closes file. +func openAndCopyFile(file File, w io.Writer) error { + fileReader, err := file.Open() if err != nil { return err } - c, ok := cIface.(Decompressor) - if !ok { - return fmt.Errorf("format specified by source filename is not a recognized compression algorithm: %s", source) - } - return FileCompressor{Decompressor: c}.DecompressFile(source, destination) -} - -func fileExists(name string) bool { - _, err := os.Stat(name) - return !os.IsNotExist(err) -} - -func mkdir(dirPath string, dirMode os.FileMode) error { - err := os.MkdirAll(dirPath, dirMode) - if err != nil { - return fmt.Errorf("%s: making directory: %v", dirPath, err) - } - return nil -} - -func writeNewFile(fpath string, in io.Reader, fm os.FileMode) error { - err := os.MkdirAll(filepath.Dir(fpath), 0755) - if err != nil { - return fmt.Errorf("%s: making directory for file: %v", fpath, err) - } - - out, err := os.Create(fpath) - if err != nil { - return fmt.Errorf("%s: creating new file: %v", fpath, err) - } - defer out.Close() - - err = out.Chmod(fm) - if err != nil && runtime.GOOS != "windows" { - return fmt.Errorf("%s: changing file mode: %v", fpath, err) - } - - _, err = io.Copy(out, in) - if err != nil { - return fmt.Errorf("%s: writing file: %v", fpath, err) - } - return nil -} - -func writeNewSymbolicLink(fpath string, target string) error { - err := os.MkdirAll(filepath.Dir(fpath), 0755) - if err != nil { - return fmt.Errorf("%s: making directory for file: %v", fpath, err) - } - - _, err = os.Lstat(fpath) - if err == nil { - err = os.Remove(fpath) - if err != nil { - return fmt.Errorf("%s: failed to unlink: %+v", fpath, err) - } - } - - err = os.Symlink(target, fpath) - if err != nil { - return fmt.Errorf("%s: making symbolic link for: %v", fpath, err) - } - return nil + defer fileReader.Close() + _, err = io.Copy(w, fileReader) + return err } -func writeNewHardLink(fpath string, target string) error { - err := os.MkdirAll(filepath.Dir(fpath), 0755) - if err != nil { - return fmt.Errorf("%s: making directory for file: %v", fpath, err) +// fileIsIncluded returns true if filename is included according to +// filenameList; meaning it is in the list, its parent folder/path +// is in the list, or the list is nil. +func fileIsIncluded(filenameList []string, filename string) bool { + // include all files if there is no specific list + if filenameList == nil { + return true } + trimmedFilename := strings.TrimSuffix(filename, "/") + for _, fn := range filenameList { + trimmedFn := strings.TrimSuffix(fn, "/") - _, err = os.Lstat(fpath) - if err == nil { - err = os.Remove(fpath) - if err != nil { - return fmt.Errorf("%s: failed to unlink: %+v", fpath, err) + // exact matches are of course included + if trimmedFn == trimmedFilename { + return true } - } - - err = os.Link(target, fpath) - if err != nil { - return fmt.Errorf("%s: making hard link for: %v", fpath, err) - } - return nil -} -func isSymlink(fi os.FileInfo) bool { - return fi.Mode()&os.ModeSymlink != 0 -} - -// within returns true if sub is within or equal to parent. -func within(parent, sub string) bool { - rel, err := filepath.Rel(parent, sub) - if err != nil { - return false - } - return !strings.Contains(rel, "..") -} - -// multipleTopLevels returns true if the paths do not -// share a common top-level folder. -func multipleTopLevels(paths []string) bool { - if len(paths) < 2 { - return false - } - var lastTop string - for _, p := range paths { - p = strings.TrimPrefix(strings.Replace(p, `\`, "/", -1), "/") - for { - next := path.Dir(p) - if next == "." { - break - } - p = next - } - if lastTop == "" { - lastTop = p - } - if p != lastTop { + // also consider the file included if its parent + // folder/path is in the list + if strings.HasPrefix(trimmedFilename, trimmedFn+"/") { return true } } return false } -// folderNameFromFileName returns a name for a folder -// that is suitable based on the filename, which will -// be stripped of its extensions. -func folderNameFromFileName(filename string) string { - base := filepath.Base(filename) - firstDot := strings.Index(base, ".") - if firstDot > -1 { - return base[:firstDot] - } - return base -} - -// makeNameInArchive returns the filename for the file given by fpath to be used within -// the archive. sourceInfo is the FileInfo obtained by calling os.Stat on source, and baseDir -// is an optional base directory that becomes the root of the archive. fpath should be the -// unaltered file path of the file given to a filepath.WalkFunc. -func makeNameInArchive(sourceInfo os.FileInfo, source, baseDir, fpath string) (string, error) { - name := filepath.Base(fpath) // start with the file or dir name - if sourceInfo.IsDir() { - // preserve internal directory structure; that's the path components - // between the source directory's leaf and this file's leaf - dir, err := filepath.Rel(filepath.Dir(source), filepath.Dir(fpath)) - if err != nil { - return "", err - } - // prepend the internal directory structure to the leaf name, - // and convert path separators to forward slashes as per spec - name = path.Join(filepath.ToSlash(dir), name) - } - return path.Join(baseDir, name), nil // prepend the base directory +func isSymlink(info fs.FileInfo) bool { + return info.Mode()&os.ModeSymlink != 0 } -// NameInArchive returns a name for the file at fpath suitable for -// the inside of an archive. The source and its associated sourceInfo -// is the path where walking a directory started, and if no directory -// was walked, source may == fpath. The returned name is essentially -// the components of the path between source and fpath, preserving -// the internal directory structure. -func NameInArchive(sourceInfo os.FileInfo, source, fpath string) (string, error) { - return makeNameInArchive(sourceInfo, source, "", fpath) -} +// skipList keeps a list of non-intersecting paths +// as long as its add method is used. Identical +// elements are rejected, more specific paths are +// replaced with broader ones, and more specific +// paths won't be added when a broader one already +// exists in the list. Trailing slashes are ignored. +type skipList []string -// ByExtension returns an archiver and unarchiver, or compressor -// and decompressor, based on the extension of the filename. -func ByExtension(filename string) (interface{}, error) { - var ec interface{} - for _, c := range extCheckers { - if err := c.CheckExt(filename); err == nil { - ec = c - break +func (s *skipList) add(dir string) { + trimmedDir := strings.TrimSuffix(dir, "/") + var dontAdd bool + for i := 0; i < len(*s); i++ { + trimmedElem := strings.TrimSuffix((*s)[i], "/") + if trimmedDir == trimmedElem { + return } - } - switch ec.(type) { - case *Rar: - return NewRar(), nil - case *Tar: - return NewTar(), nil - case *TarBrotli: - return NewTarBrotli(), nil - case *TarBz2: - return NewTarBz2(), nil - case *TarGz: - return NewTarGz(), nil - case *TarLz4: - return NewTarLz4(), nil - case *TarSz: - return NewTarSz(), nil - case *TarXz: - return NewTarXz(), nil - case *TarZstd: - return NewTarZstd(), nil - case *Zip: - return NewZip(), nil - case *Gz: - return NewGz(), nil - case *Bz2: - return NewBz2(), nil - case *Lz4: - return NewLz4(), nil - case *Snappy: - return NewSnappy(), nil - case *Xz: - return NewXz(), nil - case *Zstd: - return NewZstd(), nil - } - return nil, fmt.Errorf("format unrecognized by filename: %s", filename) -} - -// ByHeader returns the unarchiver value that matches the input's -// file header. It does not affect the current read position. -// If the file's header is not a recognized archive format, then -// ErrFormatNotRecognized will be returned. -func ByHeader(input io.ReadSeeker) (Unarchiver, error) { - var matcher Matcher - for _, m := range matchers { - ok, err := m.Match(input) - if err != nil { - return nil, fmt.Errorf("matching on format %s: %v", m, err) + // don't add dir if a broader path already exists in the list + if strings.HasPrefix(trimmedDir, trimmedElem+"/") { + dontAdd = true + continue } - if ok { - matcher = m - break + // if dir is broader than a path in the list, remove more specific path in list + if strings.HasPrefix(trimmedElem, trimmedDir+"/") { + *s = append((*s)[:i], (*s)[i+1:]...) + i-- } } - switch matcher.(type) { - case *Zip: - return NewZip(), nil - case *Tar: - return NewTar(), nil - case *Rar: - return NewRar(), nil + if !dontAdd { + *s = append(*s, dir) } - return nil, ErrFormatNotRecognized -} - -// extCheckers is a list of the format implementations -// that can check extensions. Only to be used for -// checking extensions - not any archival operations. -var extCheckers = []ExtensionChecker{ - &TarBrotli{}, - &TarBz2{}, - &TarGz{}, - &TarLz4{}, - &TarSz{}, - &TarXz{}, - &TarZstd{}, - &Rar{}, - &Tar{}, - &Zip{}, - &Brotli{}, - &Gz{}, - &Bz2{}, - &Lz4{}, - &Snappy{}, - &Xz{}, - &Zstd{}, -} - -var matchers = []Matcher{ - &Rar{}, - &Tar{}, - &Zip{}, } diff --git a/archiver_test.go b/archiver_test.go index ba8b6e9c..27f10dfe 100644 --- a/archiver_test.go +++ b/archiver_test.go @@ -1,591 +1,115 @@ package archiver import ( - "bytes" - "fmt" - "io/ioutil" - "os" - "path/filepath" + "reflect" "testing" - "time" ) -func TestWithin(t *testing.T) { +func TestFileIsIncluded(t *testing.T) { for i, tc := range []struct { - path1, path2 string - expect bool + included []string + candidate string + expect bool }{ { - path1: "/foo", - path2: "/foo/bar", - expect: true, + included: []string{"a"}, + candidate: "a", + expect: true, }, { - path1: "/foo", - path2: "/foobar/asdf", - expect: false, + included: []string{"a", "b", "a/b"}, + candidate: "b", + expect: true, }, { - path1: "/foobar/", - path2: "/foobar/asdf", - expect: true, + included: []string{"a", "b", "c/d"}, + candidate: "c/d/e", + expect: true, }, { - path1: "/foobar/asdf", - path2: "/foobar", - expect: false, + included: []string{"a"}, + candidate: "a/b/c", + expect: true, }, { - path1: "/foobar/asdf", - path2: "/foobar/", - expect: false, + included: []string{"a"}, + candidate: "aa/b/c", + expect: false, }, { - path1: "/", - path2: "/asdf", - expect: true, + included: []string{"a", "b", "c/d"}, + candidate: "b/c", + expect: true, }, { - path1: "/asdf", - path2: "/asdf", - expect: true, + included: []string{"a/"}, + candidate: "a", + expect: true, }, { - path1: "/", - path2: "/", - expect: true, + included: []string{"a"}, + candidate: "a/", + expect: true, }, { - path1: "/foo/bar/daa", - path2: "/foo", - expect: false, - }, - { - path1: "/foo/", - path2: "/foo/bar/daa", - expect: true, + included: []string{"a/"}, + candidate: "a/", + expect: true, }, } { - actual := within(tc.path1, tc.path2) + actual := fileIsIncluded(tc.included, tc.candidate) if actual != tc.expect { - t.Errorf("Test %d: [%s %s] Expected %t but got %t", i, tc.path1, tc.path2, tc.expect, actual) + t.Errorf("Test %d (included=%v candidate=%v): expected %t but got %t", + i, tc.included, tc.candidate, tc.expect, actual) } } } -func TestMultipleTopLevels(t *testing.T) { +func TestSkipList(t *testing.T) { for i, tc := range []struct { - set []string - expect bool + start skipList + add string + expect skipList }{ { - set: []string{}, - expect: false, - }, - { - set: []string{"/foo"}, - expect: false, - }, - { - set: []string{"/foo", "/foo/bar"}, - expect: false, - }, - { - set: []string{"/foo", "/bar"}, - expect: true, - }, - { - set: []string{"/foo", "/foobar"}, - expect: true, + start: skipList{"a", "b", "c"}, + add: "d", + expect: skipList{"a", "b", "c", "d"}, }, { - set: []string{"foo", "foo/bar"}, - expect: false, + start: skipList{"a", "b", "c"}, + add: "b", + expect: skipList{"a", "b", "c"}, }, { - set: []string{"foo", "/foo/bar"}, - expect: false, + start: skipList{"a", "b", "c"}, + add: "b/c", // don't add because b implies b/c + expect: skipList{"a", "b", "c"}, }, { - set: []string{"../foo", "foo/bar"}, - expect: true, + start: skipList{"a", "b", "c"}, + add: "b/c/", // effectively same as above + expect: skipList{"a", "b", "c"}, }, { - set: []string{`C:\foo\bar`, `C:\foo\bar\zee`}, - expect: false, + start: skipList{"a", "b/", "c"}, + add: "b", // effectively same as b/ + expect: skipList{"a", "b/", "c"}, }, { - set: []string{`C:\`, `C:\foo\bar`}, - expect: false, - }, - { - set: []string{`D:\foo`, `E:\foo`}, - expect: true, - }, - { - set: []string{`D:\foo`, `D:\foo\bar`, `C:\foo`}, - expect: true, - }, - { - set: []string{"/foo", "/", "/bar"}, - expect: true, + start: skipList{"a", "b/c", "c"}, + add: "b", // replace b/c because b is broader + expect: skipList{"a", "c", "b"}, }, } { - actual := multipleTopLevels(tc.set) - if actual != tc.expect { - t.Errorf("Test %d: %v: Expected %t but got %t", i, tc.set, tc.expect, actual) - } - } -} - -func TestMakeNameInArchive(t *testing.T) { - for i, tc := range []struct { - sourceInfo fakeFileInfo - source string // a file path explicitly listed by the user to include in the archive - baseDir string // the base or root directory or path within the archive which contains all other files - fpath string // the file path being walked; if source is a directory, this will be a child path - expect string - }{ - { - sourceInfo: fakeFileInfo{isDir: false}, - source: "foo.txt", - baseDir: "", - fpath: "foo.txt", - expect: "foo.txt", - }, - { - sourceInfo: fakeFileInfo{isDir: false}, - source: "foo.txt", - baseDir: "base", - fpath: "foo.txt", - expect: "base/foo.txt", - }, - { - sourceInfo: fakeFileInfo{isDir: false}, - source: "foo/bar.txt", - baseDir: "", - fpath: "foo/bar.txt", - expect: "bar.txt", - }, - { - sourceInfo: fakeFileInfo{isDir: false}, - source: "foo/bar.txt", - baseDir: "base", - fpath: "foo/bar.txt", - expect: "base/bar.txt", - }, - { - sourceInfo: fakeFileInfo{isDir: true}, - source: "foo/bar", - baseDir: "base", - fpath: "foo/bar", - expect: "base/bar", - }, - { - sourceInfo: fakeFileInfo{isDir: false}, - source: "/absolute/path.txt", - baseDir: "", - fpath: "/absolute/path.txt", - expect: "path.txt", - }, - { - sourceInfo: fakeFileInfo{isDir: false}, - source: "/absolute/sub/path.txt", - baseDir: "", - fpath: "/absolute/sub/path.txt", - expect: "path.txt", - }, - { - sourceInfo: fakeFileInfo{isDir: false}, - source: "/absolute/sub/path.txt", - baseDir: "base", - fpath: "/absolute/sub/path.txt", - expect: "base/path.txt", - }, - { - sourceInfo: fakeFileInfo{isDir: false}, - source: "sub/path.txt", - baseDir: "base/subbase", - fpath: "sub/path.txt", - expect: "base/subbase/path.txt", - }, - { - sourceInfo: fakeFileInfo{isDir: true}, - source: "sub/dir", - baseDir: "base/subbase", - fpath: "sub/dir/path.txt", - expect: "base/subbase/dir/path.txt", - }, - { - sourceInfo: fakeFileInfo{isDir: true}, - source: "sub/dir", - baseDir: "base/subbase", - fpath: "sub/dir/sub2/sub3/path.txt", - expect: "base/subbase/dir/sub2/sub3/path.txt", - }, - { - sourceInfo: fakeFileInfo{isDir: true}, - source: `/absolute/dir`, - baseDir: "base", - fpath: `/absolute/dir/sub1/sub2/file.txt`, - expect: "base/dir/sub1/sub2/file.txt", - }, - } { - actual, err := makeNameInArchive(tc.sourceInfo, tc.source, tc.baseDir, tc.fpath) - if err != nil { - t.Errorf("Test %d: Got error: %v", i, err) - } - if actual != tc.expect { - t.Errorf("Test %d: Expected '%s' but got '%s'", i, tc.expect, actual) - } - } -} - -// TODO: We need a new .rar file since we moved the test corpus into the testdata/corpus subfolder. -/* -func TestRarUnarchive(t *testing.T) { - au := DefaultRar - auStr := fmt.Sprintf("%s", au) - - tmp, err := ioutil.TempDir("", "archiver_test") - if err != nil { - t.Fatalf("[%s] %v", auStr, err) - } - defer os.RemoveAll(tmp) - - dest := filepath.Join(tmp, "extraction_test_"+auStr) - os.Mkdir(dest, 0755) - - file := "testdata/sample.rar" - err = au.Unarchive(file, dest) - if err != nil { - t.Fatalf("[%s] extracting archive [%s -> %s]: didn't expect an error, but got: %v", auStr, file, dest, err) - } - - // Check that what was extracted is what was compressed - // Extracting links isn't implemented yet (in github.com/nwaples/rardecode lib there are no methods to get symlink info) - // Files access modes may differs on different machines, we are comparing extracted(as archive host) and local git clone - symmetricTest(t, auStr, dest, false, false) -} -*/ - -func TestArchiveUnarchive(t *testing.T) { - for _, af := range archiveFormats { - au, ok := af.(archiverUnarchiver) - if !ok { - t.Errorf("%s (%T): not an Archiver and Unarchiver", af, af) - continue - } - testArchiveUnarchive(t, au) - } -} - -func TestArchiveUnarchiveWithFolderPermissions(t *testing.T) { - dir := "testdata/corpus/proverbs/extra" - currentPerms, err := os.Stat(dir) - if err != nil { - t.Fatalf("%v", err) - } - err = os.Chmod(dir, 0700) - if err != nil { - t.Fatalf("%v", err) - } - - defer func() { - err := os.Chmod(dir, currentPerms.Mode()) - if err != nil { - t.Fatalf("%v", err) - } - }() - - TestArchiveUnarchive(t) -} - -func testArchiveUnarchive(t *testing.T, au archiverUnarchiver) { - auStr := fmt.Sprintf("%s", au) - - tmp, err := ioutil.TempDir("", "archiver_test") - if err != nil { - t.Fatalf("[%s] %v", auStr, err) - } - defer os.RemoveAll(tmp) - - // Test creating archive - outfile := filepath.Join(tmp, "archiver_test."+auStr) - err = au.Archive([]string{"testdata/corpus"}, outfile) - if err != nil { - t.Fatalf("[%s] making archive: didn't expect an error, but got: %v", auStr, err) - } - - // Test format matching (TODO: Make this its own test, out of band with the archive/unarchive tests) - //testMatching(t, au, outfile) // TODO: Disabled until we can finish implementing this for compressed tar formats - - // Test extracting archive - dest := filepath.Join(tmp, "extraction_test_"+auStr) - _ = os.Mkdir(dest, 0755) - err = au.Unarchive(outfile, dest) - if err != nil { - t.Fatalf("[%s] extracting archive [%s -> %s]: didn't expect an error, but got: %v", auStr, outfile, dest, err) - } - - // Check that what was extracted is what was compressed - symmetricTest(t, auStr, dest, true, true) -} - -/* -// testMatching tests that au can match the format of archiveFile. -func testMatching(t *testing.T, au archiverUnarchiver, archiveFile string) { - m, ok := au.(Matcher) - if !ok { - t.Logf("[NOTICE] %T (%s) is not a Matcher", au, au) - return - } - - file, err := os.Open(archiveFile) - if err != nil { - t.Fatalf("[%s] opening file for matching: %v", au, err) - } - defer file.Close() - - tmpBuf := make([]byte, 2048) - io.ReadFull(file, tmpBuf) - - matched, err := m.Match(file) - if err != nil { - t.Fatalf("%s (%T): testing matching: got error, expected none: %v", m, m, err) - } - if !matched { - t.Fatalf("%s (%T): format should have matched, but didn't", m, m) - } -} -*/ - -// symmetricTest compares the contents of a destination directory to the contents -// of the test corpus and tests that they are equal. -func symmetricTest(t *testing.T, formatName, dest string, testSymlinks, testModes bool) { - var expectedFileCount int - _ = filepath.Walk("testdata/corpus", func(fpath string, info os.FileInfo, err error) error { - if testSymlinks || (info.Mode()&os.ModeSymlink) == 0 { - expectedFileCount++ - } - return nil - }) - - // If outputs equals inputs, we're good; traverse output files - // and compare file names, file contents, and file count. - var actualFileCount int - _ = filepath.Walk(dest, func(fpath string, info os.FileInfo, _ error) error { - if fpath == dest { - return nil - } - if testSymlinks || (info.Mode()&os.ModeSymlink) == 0 { - actualFileCount++ - } - - origPath, err := filepath.Rel(dest, fpath) - if err != nil { - t.Fatalf("[%s] %s: Error inducing original file path: %v", formatName, fpath, err) - } - origPath = filepath.Join("testdata", origPath) - - expectedFileInfo, err := os.Lstat(origPath) - if err != nil { - t.Fatalf("[%s] %s: Error obtaining original file info: %v", formatName, fpath, err) - } - if !testSymlinks && (expectedFileInfo.Mode()&os.ModeSymlink) != 0 { - return nil - } - actualFileInfo, err := os.Lstat(fpath) - if err != nil { - t.Fatalf("[%s] %s: Error obtaining actual file info: %v", formatName, fpath, err) - } - - if testModes && actualFileInfo.Mode() != expectedFileInfo.Mode() { - t.Fatalf("[%s] %s: File mode differed between on disk and compressed", formatName, - expectedFileInfo.Mode().String()+" : "+actualFileInfo.Mode().String()) - } + start := make(skipList, len(tc.start)) + copy(start, tc.start) - if info.IsDir() { - // stat dir instead of read file - _, err = os.Stat(origPath) - if err != nil { - t.Fatalf("[%s] %s: Couldn't stat original directory (%s): %v", formatName, - fpath, origPath, err) - } - return nil - } - - if (actualFileInfo.Mode() & os.ModeSymlink) != 0 { - expectedLinkTarget, err := os.Readlink(origPath) - if err != nil { - t.Fatalf("[%s] %s: Couldn't read original symlink target: %v", formatName, origPath, err) - } - actualLinkTarget, err := os.Readlink(fpath) - if err != nil { - t.Fatalf("[%s] %s: Couldn't read actual symlink target: %v", formatName, fpath, err) - } - if expectedLinkTarget != actualLinkTarget { - t.Fatalf("[%s] %s: Symlink targets differed between on disk and compressed", formatName, origPath) - } - return nil - } - - expected, err := ioutil.ReadFile(origPath) - if err != nil { - t.Fatalf("[%s] %s: Couldn't open original file (%s) from disk: %v", formatName, - fpath, origPath, err) - } - actual, err := ioutil.ReadFile(fpath) - if err != nil { - t.Fatalf("[%s] %s: Couldn't open new file from disk: %v", formatName, fpath, err) - } + tc.start.add(tc.add) - if !bytes.Equal(expected, actual) { - t.Fatalf("[%s] %s: File contents differed between on disk and compressed", formatName, origPath) + if !reflect.DeepEqual(tc.start, tc.expect) { + t.Errorf("Test %d (start=%v add=%v): expected %v but got %v", + i, start, tc.add, tc.expect, tc.start) } - - return nil - }) - - if got, want := actualFileCount, expectedFileCount; got != want { - t.Fatalf("[%s] Expected %d resulting files, got %d", formatName, want, got) } } - -func TestUnarchiveWithStripComponents(t *testing.T) { - testArchives := []string{ - "testdata/sample.rar", - "testdata/testarchives/evilarchives/evil.zip", - "testdata/testarchives/evilarchives/evil.tar", - "testdata/testarchives/evilarchives/evil.tar.gz", - "testdata/testarchives/evilarchives/evil.tar.bz2", - } - - to := "testdata/testarchives/destarchives/" - - for _, archiveName := range testArchives { - f, err := ByExtension(archiveName) - - if err != nil { - t.Error(err) - } - - var target string - - switch v := f.(type) { - case *Rar: - v.OverwriteExisting = false - v.ImplicitTopLevelFolder = false - v.StripComponents = 1 - target = "quote1.txt" - case *Zip: - case *Tar: - v.OverwriteExisting = false - v.ImplicitTopLevelFolder = false - v.StripComponents = 1 - target = "safefile" - case *TarGz: - case *TarBz2: - v.Tar.OverwriteExisting = false - v.Tar.ImplicitTopLevelFolder = false - v.Tar.StripComponents = 1 - target = "safefile" - } - - u := f.(Unarchiver) - - if err := u.Unarchive(archiveName, to); err != nil { - fmt.Println(err) - } - - if _, err := os.Stat(filepath.Join(to, target)); os.IsNotExist(err) { - t.Errorf("file is incorrectly extracted: %s", target) - } - - os.RemoveAll(to) - } -} - -// test at runtime if the CheckFilename function is behaving properly for the archive formats -func TestSafeExtraction(t *testing.T) { - - testArchives := []string{ - "testdata/testarchives/evilarchives/evil.zip", - "testdata/testarchives/evilarchives/evil.tar", - "testdata/testarchives/evilarchives/evil.tar.gz", - "testdata/testarchives/evilarchives/evil.tar.bz2", - } - - for _, archiveName := range testArchives { - - expected := true // 'evilfile' should not be extracted outside of destination directory and 'safefile' should be extracted anyway in the destination folder anyway - - if _, err := os.Stat(archiveName); os.IsNotExist(err) { - t.Errorf("archive not found") - } - - actual := CheckFilenames(archiveName) - - if actual != expected { - t.Errorf("CheckFilename is misbehaving for archive format type %s", filepath.Ext(archiveName)) - } - } -} - -func CheckFilenames(archiveName string) bool { - - evilNotExtracted := false // by default we cannot assume that the path traversal filename is mitigated by CheckFilename - safeExtracted := false // by default we cannot assume that a benign file can be extracted successfully - - // clean the destination folder after this test - defer os.RemoveAll("testdata/testarchives/destarchives/") - - err := Unarchive(archiveName, "testdata/testarchives/destarchives/") - if err != nil { - fmt.Println(err) - } - - // is 'evilfile' prevented to be extracted outside of the destination folder? - if _, err := os.Stat("testdata/testarchives/evilfile"); os.IsNotExist(err) { - evilNotExtracted = true - } - // is 'safefile' safely extracted without errors inside the destination path? - if _, err := os.Stat("testdata/testarchives/destarchives/safedir/safefile"); !os.IsNotExist(err) { - safeExtracted = true - } - - return evilNotExtracted && safeExtracted -} - -var archiveFormats = []interface{}{ - DefaultZip, - DefaultTar, - DefaultTarBrotli, - DefaultTarBz2, - DefaultTarGz, - DefaultTarLz4, - DefaultTarSz, - DefaultTarXz, - DefaultTarZstd, -} - -type archiverUnarchiver interface { - Archiver - Unarchiver -} - -type fakeFileInfo struct { - name string - size int64 - mode os.FileMode - modTime time.Time - isDir bool - sys interface{} -} - -func (ffi fakeFileInfo) Name() string { return ffi.name } -func (ffi fakeFileInfo) Size() int64 { return ffi.size } -func (ffi fakeFileInfo) Mode() os.FileMode { return ffi.mode } -func (ffi fakeFileInfo) ModTime() time.Time { return ffi.modTime } -func (ffi fakeFileInfo) IsDir() bool { return ffi.isDir } -func (ffi fakeFileInfo) Sys() interface{} { return ffi.sys } diff --git a/brotli.go b/brotli.go index d594d66f..5d17fae7 100644 --- a/brotli.go +++ b/brotli.go @@ -1,55 +1,42 @@ package archiver import ( - "fmt" "io" - "path/filepath" + "strings" "github.com/andybalholm/brotli" ) +func init() { + RegisterFormat(Brotli{}) +} + // Brotli facilitates brotli compression. type Brotli struct { Quality int } -// Compress reads in, compresses it, and writes it to out. -func (br *Brotli) Compress(in io.Reader, out io.Writer) error { - w := brotli.NewWriterLevel(out, br.Quality) - defer w.Close() - _, err := io.Copy(w, in) - return err -} +func (Brotli) Name() string { return ".br" } -// Decompress reads in, decompresses it, and writes it to out. -func (br *Brotli) Decompress(in io.Reader, out io.Writer) error { - r := brotli.NewReader(in) - _, err := io.Copy(out, r) - return err -} +func (br Brotli) Match(filename string, stream io.Reader) (MatchResult, error) { + var mr MatchResult -// CheckExt ensures the file extension matches the format. -func (br *Brotli) CheckExt(filename string) error { - if filepath.Ext(filename) != ".br" { - return fmt.Errorf("filename must have a .br extension") + // match filename + if strings.Contains(strings.ToLower(filename), br.Name()) { + mr.ByName = true } - return nil -} -func (br *Brotli) String() string { return "brotli" } + // brotli does not have well-defined file headers; the + // best way to match the stream would be to try decoding + // part of it, and this is not implemented for now -// NewBrotli returns a new, default instance ready to be customized and used. -func NewBrotli() *Brotli { - return &Brotli{ - Quality: brotli.DefaultCompression, - } + return mr, nil } -// Compile-time checks to ensure type implements desired interfaces. -var ( - _ = Compressor(new(Brotli)) - _ = Decompressor(new(Brotli)) -) +func (br Brotli) OpenWriter(w io.Writer) (io.WriteCloser, error) { + return brotli.NewWriterLevel(w, br.Quality), nil +} -// DefaultBrotli is a default instance that is conveniently ready to use. -var DefaultBrotli = NewBrotli() +func (Brotli) OpenReader(r io.Reader) (io.ReadCloser, error) { + return io.NopCloser(brotli.NewReader(r)), nil +} diff --git a/build.bash b/build.bash deleted file mode 100755 index 225ffc2d..00000000 --- a/build.bash +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/env bash -set -ex - -# This script builds archiver for most common platforms. - -export CGO_ENABLED=0 - -cd cmd/arc -GOOS=linux GOARCH=amd64 go build -o ../../builds/arc_linux_amd64 -GOOS=linux GOARCH=arm go build -o ../../builds/arc_linux_arm7 -GOOS=darwin GOARCH=amd64 go build -o ../../builds/arc_mac_amd64 -GOOS=windows GOARCH=amd64 go build -o ../../builds/arc_windows_amd64.exe -cd ../.. diff --git a/bz2.go b/bz2.go index 2eb4ac2b..b62192d4 100644 --- a/bz2.go +++ b/bz2.go @@ -1,64 +1,50 @@ package archiver import ( - "fmt" + "bytes" "io" - "path/filepath" + "strings" "github.com/dsnet/compress/bzip2" ) +func init() { + RegisterFormat(Bz2{}) +} + // Bz2 facilitates bzip2 compression. type Bz2 struct { CompressionLevel int } -// Compress reads in, compresses it, and writes it to out. -func (bz *Bz2) Compress(in io.Reader, out io.Writer) error { - w, err := bzip2.NewWriter(out, &bzip2.WriterConfig{ - Level: bz.CompressionLevel, - }) - if err != nil { - return err - } - defer w.Close() - _, err = io.Copy(w, in) - return err -} +func (Bz2) Name() string { return ".bz2" } -// Decompress reads in, decompresses it, and writes it to out. -func (bz *Bz2) Decompress(in io.Reader, out io.Writer) error { - r, err := bzip2.NewReader(in, nil) - if err != nil { - return err +func (bz Bz2) Match(filename string, stream io.Reader) (MatchResult, error) { + var mr MatchResult + + // match filename + if strings.Contains(strings.ToLower(filename), bz.Name()) { + mr.ByName = true } - defer r.Close() - _, err = io.Copy(out, r) - return err -} -// CheckExt ensures the file extension matches the format. -func (bz *Bz2) CheckExt(filename string) error { - if filepath.Ext(filename) != ".bz2" { - return fmt.Errorf("filename must have a .bz2 extension") + // match file header + buf := make([]byte, len(bzip2Header)) + if _, err := io.ReadFull(stream, buf); err != nil { + return mr, err } - return nil -} + mr.ByStream = bytes.Equal(buf, bzip2Header) -func (bz *Bz2) String() string { return "bz2" } + return mr, nil +} -// NewBz2 returns a new, default instance ready to be customized and used. -func NewBz2() *Bz2 { - return &Bz2{ - CompressionLevel: bzip2.DefaultCompression, - } +func (bz Bz2) OpenWriter(w io.Writer) (io.WriteCloser, error) { + return bzip2.NewWriter(w, &bzip2.WriterConfig{ + Level: bz.CompressionLevel, + }) } -// Compile-time checks to ensure type implements desired interfaces. -var ( - _ = Compressor(new(Bz2)) - _ = Decompressor(new(Bz2)) -) +func (Bz2) OpenReader(r io.Reader) (io.ReadCloser, error) { + return bzip2.NewReader(r, nil) +} -// DefaultBz2 is a default instance that is conveniently ready to use. -var DefaultBz2 = NewBz2() +var bzip2Header = []byte("BZh") diff --git a/cmd/arc/main.go b/cmd/arc/main.go index fd28ac5f..6b87c0f7 100644 --- a/cmd/arc/main.go +++ b/cmd/arc/main.go @@ -1,376 +1,7 @@ package main -import ( - "archive/tar" - "bytes" - "compress/flate" - "flag" - "fmt" - "os" - "path/filepath" - "strings" - - "github.com/klauspost/compress/zip" - "github.com/mholt/archiver/v3" - "github.com/nwaples/rardecode" -) - -var ( - compressionLevel int - overwriteExisting bool - mkdirAll bool - selectiveCompression bool - implicitTopLevelFolder bool - stripComponents int - continueOnError bool - specifyFileType string -) - -var ( - version string - commit string - date string -) - -func init() { - flag.IntVar(&compressionLevel, "level", flate.DefaultCompression, "Compression level") - flag.BoolVar(&overwriteExisting, "overwrite", false, "Overwrite existing files") - flag.BoolVar(&mkdirAll, "mkdirs", false, "Make all necessary directories") - flag.BoolVar(&selectiveCompression, "smart", true, "Only compress files which are not already compressed (zip only)") - flag.BoolVar(&implicitTopLevelFolder, "folder-safe", true, "If an archive does not have a single top-level folder, create one implicitly") - flag.IntVar(&stripComponents, "strip-components", 0, "Strip number of leading paths") - flag.BoolVar(&continueOnError, "allow-errors", true, "Log errors and continue processing") - flag.StringVar(&specifyFileType, "ext", "", "specify file type") -} +import "fmt" func main() { - if len(os.Args) >= 2 && - (os.Args[1] == "-h" || os.Args[1] == "--help" || os.Args[1] == "help") { - fmt.Println(usageString()) - os.Exit(0) - } - if len(os.Args) >= 2 && - (os.Args[1] == "-V" || os.Args[1] == "--version" || os.Args[1] == "version") { - fmt.Printf("arc v%s %s (%s)", version, commit, date) - os.Exit(0) - } - if len(os.Args) < 3 { - fatal(usageString()) - } - flag.Parse() - - subcommand := flag.Arg(0) - - // get the format we're working with - iface, err := getFormat(subcommand) - if err != nil { - fatal(err) - } - - // run the desired command - switch subcommand { - case "archive": - a, ok := iface.(archiver.Archiver) - if !ok { - fatalf("the archive command does not support the %s format", iface) - } - - var sources []string - for _, src := range flag.Args()[2:] { - srcs, err := filepath.Glob(src) - if err != nil { - fatalf(err.Error()) - } - sources = append(sources, srcs...) - } - - err = a.Archive(sources, flag.Arg(1)) - - case "unarchive": - u, ok := iface.(archiver.Unarchiver) - if !ok { - fatalf("the unarchive command does not support the %s format", iface) - } - err = u.Unarchive(flag.Arg(1), flag.Arg(2)) - - case "extract": - e, ok := iface.(archiver.Extractor) - if !ok { - fatalf("the extract command does not support the %s format", iface) - } - err = e.Extract(flag.Arg(1), flag.Arg(2), flag.Arg(3)) - - case "ls": - w, ok := iface.(archiver.Walker) - if !ok { - fatalf("the ls command does not support the %s format", iface) - } - - var count int - err = w.Walk(flag.Arg(1), func(f archiver.File) error { - count++ - switch h := f.Header.(type) { - case zip.FileHeader: - fmt.Printf("%s\t%d\t%d\t%s\t%s\n", - f.Mode(), - h.Method, - f.Size(), - f.ModTime(), - h.Name, - ) - case *tar.Header: - fmt.Printf("%s\t%s\t%s\t%d\t%s\t%s\n", - f.Mode(), - h.Uname, - h.Gname, - f.Size(), - f.ModTime(), - h.Name, - ) - - case *rardecode.FileHeader: - fmt.Printf("%s\t%d\t%d\t%s\t%s\n", - f.Mode(), - int(h.HostOS), - f.Size(), - f.ModTime(), - h.Name, - ) - - default: - fmt.Printf("%s\t%d\t%s\t?/%s\n", - f.Mode(), - f.Size(), - f.ModTime(), - f.Name(), // we don't know full path from this - ) - } - return nil - }) - - fmt.Printf("total %d\n", count) - - case "compress": - c, ok := iface.(archiver.Compressor) - if !ok { - fatalf("the compress command does not support the %s format", iface) - } - fc := archiver.FileCompressor{Compressor: c} - - in := flag.Arg(1) - out := flag.Arg(2) - - var deleteWhenDone bool - if cs, ok := c.(fmt.Stringer); ok && out == cs.String() { - out = in + "." + out - deleteWhenDone = true - } - - err = fc.CompressFile(in, out) - if err == nil && deleteWhenDone { - err = os.Remove(in) - } - - case "decompress": - c, ok := iface.(archiver.Decompressor) - if !ok { - fatalf("the compress command does not support the %s format", iface) - } - fc := archiver.FileCompressor{Decompressor: c} - - in := flag.Arg(1) - out := flag.Arg(2) - - var deleteWhenDone bool - if cs, ok := c.(fmt.Stringer); ok && out == "" { - out = strings.TrimSuffix(in, "."+cs.String()) - deleteWhenDone = true - } - - err = fc.DecompressFile(in, out) - if err == nil && deleteWhenDone { - err = os.Remove(in) - } - - default: - fatalf("unrecognized command: %s", flag.Arg(0)) - } - if err != nil { - fatal(err) - } -} - -func getFormat(subcommand string) (interface{}, error) { - // prepare the filename, with which we will find a suitable format - formatPos := 1 - if subcommand == "compress" { - formatPos = 2 - } - filename := flag.Arg(formatPos) - if subcommand == "compress" && !strings.Contains(filename, ".") { - filename = "." + filename // leading dot needed for extension matching - } - - // get the format by filename extension - if specifyFileType != "" { - filename = "." + specifyFileType - } - f, err := archiver.ByExtension(filename) - if err != nil { - return nil, err - } - - // prepare a single Tar, in case it's needed - mytar := &archiver.Tar{ - OverwriteExisting: overwriteExisting, - MkdirAll: mkdirAll, - ImplicitTopLevelFolder: implicitTopLevelFolder, - StripComponents: stripComponents, - ContinueOnError: continueOnError, - } - - // fully configure the new value - switch v := f.(type) { - case *archiver.Rar: - v.OverwriteExisting = overwriteExisting - v.MkdirAll = mkdirAll - v.ImplicitTopLevelFolder = implicitTopLevelFolder - v.StripComponents = stripComponents - v.ContinueOnError = continueOnError - v.Password = os.Getenv("ARCHIVE_PASSWORD") - case *archiver.Tar: - f = mytar - case *archiver.TarBrotli: - v.Tar = mytar - v.Quality = compressionLevel - case *archiver.TarBz2: - v.Tar = mytar - v.CompressionLevel = compressionLevel - case *archiver.TarGz: - v.Tar = mytar - v.CompressionLevel = compressionLevel - case *archiver.TarLz4: - v.Tar = mytar - v.CompressionLevel = compressionLevel - case *archiver.TarSz: - v.Tar = mytar - case *archiver.TarXz: - v.Tar = mytar - case *archiver.TarZstd: - v.Tar = mytar - case *archiver.Zip: - v.CompressionLevel = compressionLevel - v.OverwriteExisting = overwriteExisting - v.MkdirAll = mkdirAll - v.SelectiveCompression = selectiveCompression - v.ImplicitTopLevelFolder = implicitTopLevelFolder - v.StripComponents = stripComponents - v.ContinueOnError = continueOnError - case *archiver.Gz: - v.CompressionLevel = compressionLevel - case *archiver.Brotli: - v.Quality = compressionLevel - case *archiver.Bz2: - v.CompressionLevel = compressionLevel - case *archiver.Lz4: - v.CompressionLevel = compressionLevel - case *archiver.Snappy: - // nothing to customize - case *archiver.Xz: - // nothing to customize - case *archiver.Zstd: - // nothing to customize - default: - return nil, fmt.Errorf("format does not support customization: %s", f) - } - - return f, nil + fmt.Println("TODO: not yet implemented for archiver v4; use v3 for now") } - -func fatal(v ...interface{}) { - fmt.Fprintln(os.Stderr, v...) - os.Exit(1) -} - -func fatalf(s string, v ...interface{}) { - fmt.Fprintf(os.Stderr, s+"\n", v...) - os.Exit(1) -} - -func usageString() string { - buf := new(bytes.Buffer) - buf.WriteString(usage) - flag.CommandLine.SetOutput(buf) - flag.CommandLine.PrintDefaults() - return buf.String() -} - -const usage = `Usage: arc {archive|unarchive|extract|ls|compress|decompress|help} [arguments...] - archive - Create a new archive file. List the files/folders - to include in the archive; at least one required. - unarchive - Extract an archive file. Provide the archive to - open and the destination folder to extract into. - extract - Extract a single file or folder (recursively) from - an archive. First argument is the source archive, - second is the file to extract (exact path within the - archive is required), and third is destination. - ls - List the contents of the archive. - compress - Compresses a file, destination optional. - decompress - Decompresses a file, destination optional. - help - Display this help text. Also -h or --help. - - SPECIFYING THE ARCHIVE FORMAT - The format of the archive is determined by its - file extension*. Supported extensions: - .zip - .tar - .tar.br - .tbr - .tar.gz - .tgz - .tar.bz2 - .tbz2 - .tar.xz - .txz - .tar.lz4 - .tlz4 - .tar.sz - .tsz - .zst - .tar.zst - .rar (open only) - .bz2 - .gz - .lz4 - .sz - .xz - - *use flag --ext to manually set filetype. example: --ext=tar.gz - - (DE)COMPRESSING SINGLE FILES - Some formats are compression-only, and can be used - with the compress and decompress commands on a - single file; they do not bundle multiple files. - - To replace a file when compressing, specify the - source file name for the first argument, and the - compression format (without leading dot) for the - second argument. To replace a file when decompressing, - specify only the source file and no destination. - - PASSWORD-PROTECTED RAR FILES - Export the ARCHIVE_PASSWORD environment variable - to be able to open password-protected rar archives. - - GLOBAL FLAG REFERENCE - The following global flags may be used before the - sub-command (some flags are format-specific): - -` diff --git a/doc_test.go b/doc_test.go deleted file mode 100644 index aadcc2d2..00000000 --- a/doc_test.go +++ /dev/null @@ -1,260 +0,0 @@ -package archiver - -import ( - "fmt" - "io" - "log" - "net/http" - "os" - "strconv" -) - -// The simplest use of this package: create an archive file -// from a list of filenames. This is the recommended way to -// do so using a default configuration, as it guarantees -// the file format matches the file extension, because the -// format to write is determined by the given extension. -func ExampleArchive() { - // any files in this list are added - // to the top level of the archive; - // directories are recursively added - files := []string{ - "index.html", - "photo.jpg", - "blog", // directory - "/home/website/copyright.txt", - } - - // archive format is determined by file extension - err := Archive(files, "blog_site.zip") - if err != nil { - log.Fatal(err) - } -} - -// The simplest use of this package: extract all of an archive's -// contents to a folder on disk using the default configuration. -// The archive format is determined automatically. -func ExampleUnarchive() { - err := Unarchive("blog_site.zip", "extracted/mysite") - if err != nil { - log.Fatal(err) - } -} - -// In this example, the DefaultZip is being customized so that -// all calls to its methods will use that configuration. -func ExampleZip_default() { - DefaultZip.OverwriteExisting = true - DefaultZip.ImplicitTopLevelFolder = true - // any subsequent use of DefaultZip uses - // this modified configuration -} - -// Here we create our own instance of the Zip format. No need -// to use the constructor function (NewZip) or the default -// instance (DefaultZip) if we do not want to. Instantiating -// the type like this allows us to easily be very explicit -// about our configuration. -func ExampleZip_custom() { - z := &Zip{ - CompressionLevel: 3, - OverwriteExisting: false, - MkdirAll: true, - SelectiveCompression: true, - ImplicitTopLevelFolder: true, - ContinueOnError: false, - } - // z is now ready to use for whatever (this is a dumb example) - fmt.Println(z.CheckExt("test.zip")) -} - -// Much like the package-level Archive function, this creates an -// archive using the configuration of the Zip instance it is called -// on. The output filename must match the format's recognized file -// extension(s). -func ExampleZip_Archive() { - err := DefaultZip.Archive([]string{"..."}, "example.zip") - if err != nil { - log.Fatal(err) - } -} - -// It's easy to list the items in an archive. This example -// prints the name and size of each file in the archive. Like -// other top-level functions in this package, the format is -// inferred automatically for you. -func ExampleWalk() { - err := Walk("example.tar.gz", func(f File) error { - fmt.Println(f.Name(), f.Size()) - // you could also read the contents; f is an io.Reader! - return nil - }) - if err != nil { - log.Fatal(err) - } -} - -// This example extracts target.txt from inside example.rar -// and puts it into a folder on disk called output/dir. -func ExampleExtract() { - err := Extract("example.rar", "target.txt", "output/dir") - if err != nil { - log.Fatal(err) - } -} - -// This example demonstrates how to read an -// archive in a streaming fashion. The idea -// is that you can stream the bytes of an -// archive from a stream, regardless of -// whether it is an actual file on disk. -// This means that you can read a huge -// archive file-by-file rather than having -// to store it all on disk first. In this -// example, we read a hypothetical archive -// from a (fake) HTTP request body and -// print its file names and sizes. The -// files can be read, of course, but they -// do not have to be. -func ExampleZip_streamingRead() { - // for the sake of the example compiling, pretend we have an HTTP request - req := new(http.Request) - contentLen, err := strconv.Atoi(req.Header.Get("Content-Length")) - if err != nil { - log.Fatal(err) - } - - // the Zip format requires knowing the length of the stream, - // but other formats don't generally require it, so it - // could be left as 0 when using those - err = DefaultZip.Open(req.Body, int64(contentLen)) - if err != nil { - log.Fatal(err) - } - defer DefaultZip.Close() - - // Note that DefaultZip now contains some state that - // is critical to reading the stream until it is closed, - // so do not reuse it until then. - - // iterate each file in the archive until EOF - for { - f, err := DefaultZip.Read() - if err == io.EOF { - break - } - if err != nil { - log.Fatal(err) - } - - // f is an io.ReadCloser, so you can read its contents - // if you wish; or you can access its header info through - // f.Header or the embedded os.FileInfo - fmt.Println("File name:", f.Name(), "File size:", f.Size()) - - // be sure to close f before moving on!! - err = f.Close() - if err != nil { - log.Fatal(err) - } - } -} - -// This example demonstrates how to write an -// archive in a streaming fashion. The idea -// is that you can stream the bytes of a new -// archive that is created on-the-fly from -// generic streams. Those streams could be -// actual files on disk, or they could be over -// a network, or standard output, or any other -// io.Reader/io.Writer. This example only adds -// one file to the archive and writes the -// resulting archive to standard output, but you -// could add as many files as needed with a loop. -func ExampleZip_streamingWrite() { - err := DefaultZip.Create(os.Stdout) - if err != nil { - log.Fatal(err) - } - defer DefaultZip.Close() - - // Note that DefaultZip now contains state - // critical to a successful write until it - // is closed, so don't reuse it for anything - // else until then. - - // At this point, you can open an actual file - // to add to the archive, or the "file" could - // come from any io.ReadCloser stream. If you - // only have an io.Reader, you can use - // ReadFakeCloser to make it into an - // io.ReadCloser. - - // The next part is a little tricky if you - // don't have an actual file because you will - // need an os.FileInfo. Fortunately, that's an - // interface! So go ahead and implement it in - // whatever way makes the most sense to you. - // You'll also need to give the file a name - // for within the archive. In this example, - // we'll open a real file. - - file, err := os.Open("foo.txt") - if err != nil { - log.Fatal(err) - } - defer file.Close() - fileInfo, err := file.Stat() - if err != nil { - log.Fatal(err) - } - - err = DefaultZip.Write(File{ - FileInfo: FileInfo{ - FileInfo: fileInfo, - CustomName: "name/in/archive.txt", - }, - ReadCloser: file, // does not have to be an actual file - }) - if err != nil { - log.Fatal(err) - } -} - -// This example compresses a standard tar file into a tar.gz file. -// Compression formats are selected by file extension. -func ExampleCompressFile() { - err := CompressFile("example.tar", "example.tar.gz") - if err != nil { - log.Fatal(err) - } -} - -// This example changes the default configuration for -// the Gz compression format. -func ExampleCompressFile_custom() { - DefaultGz.CompressionLevel = 5 - // any calls to DefaultGz now use the modified configuration -} - -// This example creates a new Gz instance and -// uses it to compress a stream, writing to -// another stream. This is sometimes preferable -// over modifying the DefaultGz. -func ExampleGz_Compress_custom() { - gz := &Gz{CompressionLevel: 5} - err := gz.Compress(os.Stdin, os.Stdout) - if err != nil { - log.Fatal(err) - } -} - -// This example decompresses a gzipped tarball and writes -// it to an adjacent file. -func ExampleDecompressFile() { - err := DecompressFile("example.tar.gz", "example.tar") - if err != nil { - log.Fatal(err) - } -} diff --git a/error.go b/error.go deleted file mode 100644 index a46235c6..00000000 --- a/error.go +++ /dev/null @@ -1,27 +0,0 @@ -package archiver - -import ( - "fmt" - "strings" -) - -// IllegalPathError is an error returned when an illegal -// path is detected during the archival process. -// -// By default, only the Filename is showed on error, but you might -// also get the absolute value of the invalid path on the AbsolutePath -// field. -type IllegalPathError struct { - AbsolutePath string - Filename string -} - -func (err *IllegalPathError) Error() string { - return fmt.Sprintf("illegal file path: %s", err.Filename) -} - -// IsIllegalPathError returns true if the provided error is of -// the type IllegalPathError. -func IsIllegalPathError(err error) bool { - return err != nil && strings.Contains(err.Error(), "illegal file path: ") -} diff --git a/error_test.go b/error_test.go deleted file mode 100644 index d47ed85f..00000000 --- a/error_test.go +++ /dev/null @@ -1,54 +0,0 @@ -package archiver_test - -import ( - "errors" - "fmt" - "os" - "testing" - - "github.com/mholt/archiver/v3" -) - -func TestIllegalPathErrorString(t *testing.T) { - tests := []struct { - instance *archiver.IllegalPathError - expected string - }{ - {instance: &archiver.IllegalPathError{Filename: "foo.txt"}, expected: "illegal file path: foo.txt"}, - {instance: &archiver.IllegalPathError{AbsolutePath: "/tmp/bar.txt", Filename: "bar.txt"}, expected: "illegal file path: bar.txt"}, - } - - for i, test := range tests { - test := test - - t.Run(fmt.Sprintf("Case %d", i), func(t *testing.T) { - if test.expected != test.instance.Error() { - t.Fatalf("Excepected '%s', but got '%s'", test.expected, test.instance.Error()) - } - }) - } -} - -func TestIsIllegalPathError(t *testing.T) { - tests := []struct { - instance error - expected bool - }{ - {instance: nil, expected: false}, - {instance: os.ErrNotExist, expected: false}, - {instance: fmt.Errorf("some error"), expected: false}, - {instance: errors.New("another error"), expected: false}, - {instance: &archiver.IllegalPathError{Filename: "foo.txt"}, expected: true}, - } - - for i, test := range tests { - test := test - - t.Run(fmt.Sprintf("Case %d", i), func(t *testing.T) { - actual := archiver.IsIllegalPathError(test.instance) - if actual != test.expected { - t.Fatalf("Excepected '%v', but got '%v'", test.expected, actual) - } - }) - } -} diff --git a/filecompressor.go b/filecompressor.go deleted file mode 100644 index ab1fd3b8..00000000 --- a/filecompressor.go +++ /dev/null @@ -1,67 +0,0 @@ -package archiver - -import ( - "fmt" - "os" -) - -// FileCompressor can compress and decompress single files. -type FileCompressor struct { - Compressor - Decompressor - - // Whether to overwrite existing files when creating files. - OverwriteExisting bool -} - -// CompressFile reads the source file and compresses it to destination. -// The destination must have a matching extension. -func (fc FileCompressor) CompressFile(source, destination string) error { - if err := fc.CheckExt(destination); err != nil { - return err - } - if fc.Compressor == nil { - return fmt.Errorf("no compressor specified") - } - if !fc.OverwriteExisting && fileExists(destination) { - return fmt.Errorf("file exists: %s", destination) - } - - in, err := os.Open(source) - if err != nil { - return err - } - defer in.Close() - - out, err := os.Create(destination) - if err != nil { - return err - } - defer out.Close() - - return fc.Compress(in, out) -} - -// DecompressFile reads the source file and decompresses it to destination. -func (fc FileCompressor) DecompressFile(source, destination string) error { - if fc.Decompressor == nil { - return fmt.Errorf("no decompressor specified") - } - if !fc.OverwriteExisting && fileExists(destination) { - return fmt.Errorf("file exists: %s", destination) - } - - in, err := os.Open(source) - if err != nil { - return err - } - defer in.Close() - - out, err := os.Create(destination) - if err != nil { - return err - } - defer out.Close() - - return fc.Decompress(in, out) -} diff --git a/filecompressor_test.go b/filecompressor_test.go deleted file mode 100644 index 44b27d7d..00000000 --- a/filecompressor_test.go +++ /dev/null @@ -1,122 +0,0 @@ -package archiver - -import ( - "fmt" - "io/ioutil" - "os" - "path/filepath" - "testing" -) - -func TestCheckExtension(t *testing.T) { - testdir, err := ioutil.TempDir("", "archiver_checkext_test_") - if err != nil { - t.Fatalf("Making temporary directory: %v", err) - } - defer os.RemoveAll(testdir) - testfile, err := ioutil.TempFile(testdir, "compressor_test_input_*.txt") - if err != nil { - t.Fatalf("Making temporary file: %v", err) - } - defer os.Remove(testfile.Name()) - defer testfile.Close() - - for i, tc := range []struct { - checker ExtensionChecker - ext string // including leading dot - shouldErr bool - }{ - {checker: NewBz2(), ext: ".bz2", shouldErr: false}, - {checker: NewBz2(), ext: ".gz", shouldErr: true}, - - {checker: NewGz(), ext: ".gz", shouldErr: false}, - {checker: NewGz(), ext: ".sz", shouldErr: true}, - - {checker: NewLz4(), ext: ".lz4", shouldErr: false}, - {checker: NewLz4(), ext: ".xz", shouldErr: true}, - - {checker: NewSnappy(), ext: ".sz", shouldErr: false}, - {checker: NewSnappy(), ext: ".lz4", shouldErr: true}, - - {checker: NewXz(), ext: ".xz", shouldErr: false}, - {checker: NewXz(), ext: ".bz2", shouldErr: true}, - - {checker: NewZip(), ext: ".zip", shouldErr: false}, - {checker: NewZip(), ext: ".zip.gz", shouldErr: true}, - {checker: NewZip(), ext: ".tgz", shouldErr: true}, - {checker: NewZip(), ext: ".gz", shouldErr: true}, - - {checker: NewTar(), ext: ".tar", shouldErr: false}, - {checker: NewTar(), ext: ".zip", shouldErr: true}, - {checker: NewTar(), ext: ".tar.gz", shouldErr: true}, - {checker: NewTar(), ext: ".tgz", shouldErr: true}, - - {checker: NewTarBz2(), ext: ".tar.bz2", shouldErr: false}, - {checker: NewTarBz2(), ext: ".tbz2", shouldErr: false}, - {checker: NewTarBz2(), ext: ".zip", shouldErr: true}, - {checker: NewTarBz2(), ext: ".tar", shouldErr: true}, - {checker: NewTarBz2(), ext: ".bz2", shouldErr: true}, - - {checker: NewTarGz(), ext: ".tar.gz", shouldErr: false}, - {checker: NewTarGz(), ext: ".tgz", shouldErr: false}, - {checker: NewTarGz(), ext: ".zip", shouldErr: true}, - {checker: NewTarGz(), ext: ".tar", shouldErr: true}, - {checker: NewTarGz(), ext: ".gz", shouldErr: true}, - - {checker: NewTarLz4(), ext: ".tar.lz4", shouldErr: false}, - {checker: NewTarLz4(), ext: ".tlz4", shouldErr: false}, - {checker: NewTarLz4(), ext: ".zip", shouldErr: true}, - {checker: NewTarLz4(), ext: ".tar", shouldErr: true}, - {checker: NewTarLz4(), ext: ".lz4", shouldErr: true}, - - {checker: NewTarSz(), ext: ".tar.sz", shouldErr: false}, - {checker: NewTarSz(), ext: ".tsz", shouldErr: false}, - {checker: NewTarSz(), ext: ".zip", shouldErr: true}, - {checker: NewTarSz(), ext: ".tar", shouldErr: true}, - {checker: NewTarSz(), ext: ".sz", shouldErr: true}, - - {checker: NewTarXz(), ext: ".tar.xz", shouldErr: false}, - {checker: NewTarXz(), ext: ".txz", shouldErr: false}, - {checker: NewTarXz(), ext: ".zip", shouldErr: true}, - {checker: NewTarXz(), ext: ".tar", shouldErr: true}, - {checker: NewTarXz(), ext: ".xz", shouldErr: true}, - } { - err := tc.checker.CheckExt("test" + tc.ext) - if tc.shouldErr && err == nil { - t.Errorf("Test %d [%s - %s]: Expected an error when checking extension, but got none", - i, tc.checker, tc.ext) - } - if !tc.shouldErr && err != nil { - t.Errorf("Test %d [%s - %s]: Did not expect an error when checking extension, but got: %v", - i, tc.checker, tc.ext, err) - } - - // also ensure that methods which create files check the extension, - // to avoid confusion where the extension indicates one format but - // actual format is another - if a, ok := tc.checker.(Archiver); ok { - filename := fmt.Sprintf("test%d_archive%s", i, tc.ext) - err := a.Archive(nil, filepath.Join(testdir, filename)) - if tc.shouldErr && err == nil { - t.Errorf("Test %d [%s - %s]: Archive(): Expected an error with filename '%s' but got none", - i, tc.checker, tc.ext, filename) - } - if !tc.shouldErr && err != nil { - t.Errorf("Test %d [%s - %s]: Archive(): Did not expect an error with filename '%s', but got: %v", - i, tc.checker, tc.ext, filename, err) - } - } - if c, ok := tc.checker.(FileCompressor); ok { - filename := fmt.Sprintf("test%d_compress%s", i, tc.ext) - err := c.CompressFile(testfile.Name(), filepath.Join(testdir, filename)) - if tc.shouldErr && err == nil { - t.Errorf("Test %d [%s - %s]: Compress(): Expected an error with filename '%s' but got none", - i, tc.checker, tc.ext, filename) - } - if !tc.shouldErr && err != nil { - t.Errorf("Test %d [%s - %s]: Compress(): Did not expect an error with filename '%s', but got: %v", - i, tc.checker, tc.ext, filename, err) - } - } - } -} diff --git a/formats.go b/formats.go new file mode 100644 index 00000000..8c325d30 --- /dev/null +++ b/formats.go @@ -0,0 +1,238 @@ +package archiver + +import ( + "context" + "fmt" + "io" + "strings" +) + +// RegisterFormat registers a format. It should be called during init. +// Duplicate formats by name are not allowed and will panic. +func RegisterFormat(format Format) { + name := strings.Trim(strings.ToLower(format.Name()), ".") + if _, ok := formats[name]; ok { + panic("format " + name + " is already registered") + } + formats[name] = format +} + +// Identify iterates the registered formats and returns the one that +// matches the given filename and/or stream. It is capable of identifying +// compressed files (.gz, .xz...), archive files (.tar, .zip...), and +// compressed archive files (tar.gz, tar.bz2...). The returned Format +// value can be type-asserted to ascertain its capabilities. +// +// If no matching formats were found, special error ErrNoMatch is returned. +func Identify(filename string, stream io.ReadSeeker) (Format, error) { + var compression Compression + var archival Archival + + // try compression format first, since that's the outer "layer" + for name, format := range formats { + cf, isCompression := format.(Compression) + if !isCompression { + continue + } + + matchResult, err := identifyOne(format, filename, stream, nil) + if err != nil { + return nil, fmt.Errorf("matching %s: %w", name, err) + } + + // if matched, wrap input stream with decompression + // so we can see if it contains an archive within + if matchResult.Matched() { + compression = cf + break + } + } + + // try archive format next + for name, format := range formats { + af, isArchive := format.(Archival) + if !isArchive { + continue + } + + matchResult, err := identifyOne(format, filename, stream, compression) + if err != nil { + return nil, fmt.Errorf("matching %s: %w", name, err) + } + + if matchResult.Matched() { + archival = af + break + } + } + + switch { + case compression != nil && archival == nil: + return compression, nil + case compression == nil && archival != nil: + return archival, nil + case compression != nil && archival != nil: + return CompressedArchive{compression, archival}, nil + default: + return nil, ErrNoMatch + } +} + +func identifyOne(format Format, filename string, stream io.ReadSeeker, comp Compression) (MatchResult, error) { + // reset stream position to beginning, then restore current position when done + previousOffset, err := stream.Seek(0, io.SeekCurrent) + if err != nil { + return MatchResult{}, err + } + _, err = stream.Seek(0, io.SeekStart) + if err != nil { + return MatchResult{}, err + } + defer stream.Seek(previousOffset, io.SeekStart) + + // if looking within a compressed format, wrap the stream in a + // reader that can decompress it so we can match the "inner" format + // (yes, we have to make a new reader every time we do a match, + // because we reset/seek the stream each time and that can mess up + // the compression reader's state if we don't discard it also) + if comp != nil { + decompressedStream, err := comp.OpenReader(stream) + if err != nil { + return MatchResult{}, err + } + defer decompressedStream.Close() + stream = struct { + io.Reader + io.Seeker + }{ + Reader: decompressedStream, + Seeker: stream, + } + } + + return format.Match(filename, stream) +} + +// CompressedArchive combines a compression format on top of an archive +// format (e.g. "tar.gz") and provides both functionalities in a single +// type. It ensures that archive functions are wrapped by compressors and +// decompressors. However, compressed archives have some limitations; for +// example, files cannot be inserted/appended because of complexities with +// modifying existing compression state (perhaps this could be overcome, +// but I'm not about to try it). +// +// As this type is intended to compose compression and archive formats, +// both must be specified in order for this value to be valid, or its +// methods will return errors. +type CompressedArchive struct { + Compression + Archival +} + +// Name returns a concatenation of the archive format name +// and the compression format name. +func (caf CompressedArchive) Name() string { + if caf.Compression == nil && caf.Archival == nil { + panic("missing both compression and archive formats") + } + var name string + if caf.Archival != nil { + name += caf.Archival.Name() + } + if caf.Compression != nil { + name += caf.Compression.Name() + } + return name +} + +// Match matches if the input matches both the compression and archive format. +func (caf CompressedArchive) Match(filename string, stream io.Reader) (MatchResult, error) { + var conglomerate MatchResult + + if caf.Compression != nil { + matchResult, err := caf.Compression.Match(filename, stream) + if err != nil { + return MatchResult{}, err + } + if !matchResult.Matched() { + return matchResult, nil + } + + // wrap the reader with the decompressor so we can + // attempt to match the archive by reading the stream + rc, err := caf.Compression.OpenReader(stream) + if err != nil { + return matchResult, err + } + defer rc.Close() + stream = rc + + conglomerate = matchResult + } + + if caf.Archival != nil { + matchResult, err := caf.Archival.Match(filename, stream) + if err != nil { + return MatchResult{}, err + } + if !matchResult.Matched() { + return matchResult, nil + } + conglomerate.ByName = conglomerate.ByName || matchResult.ByName + conglomerate.ByStream = conglomerate.ByStream || matchResult.ByStream + } + + return conglomerate, nil +} + +// Archive adds files to the output archive while compressing the result. +func (caf CompressedArchive) Archive(ctx context.Context, output io.Writer, files []File) error { + if caf.Compression != nil { + wc, err := caf.Compression.OpenWriter(output) + if err != nil { + return err + } + defer wc.Close() + output = wc + } + return caf.Archival.Archive(ctx, output, files) +} + +// Extract reads files out of an archive while decompressing the results. +func (caf CompressedArchive) Extract(ctx context.Context, sourceArchive io.Reader, pathsInArchive []string, handleFile FileHandler) error { + if caf.Compression != nil { + rc, err := caf.Compression.OpenReader(sourceArchive) + if err != nil { + return err + } + defer rc.Close() + sourceArchive = rc + } + return caf.Archival.(Extractor).Extract(ctx, sourceArchive, pathsInArchive, handleFile) +} + +// MatchResult returns true if the format was matched either +// by name, stream, or both. Name usually refers to matching +// by file extension, and stream usually refers to reading +// the first few bytes of the stream (its header). A stream +// match is generally stronger, as filenames are not always +// indicative of their contents if they even exist at all. +type MatchResult struct { + ByName, ByStream bool +} + +// Matched returns true if a match was made by either name or stream. +func (mr MatchResult) Matched() bool { return mr.ByName || mr.ByStream } + +// ErrNoMatch is returned if there are no matching formats. +var ErrNoMatch = fmt.Errorf("no formats matched") + +// Registered formats. +var formats = make(map[string]Format) + +// Interface guards +var ( + _ Format = (*CompressedArchive)(nil) + _ Archiver = (*CompressedArchive)(nil) + _ Extractor = (*CompressedArchive)(nil) +) diff --git a/fs.go b/fs.go new file mode 100644 index 00000000..f7fc30b2 --- /dev/null +++ b/fs.go @@ -0,0 +1,403 @@ +package archiver + +import ( + "context" + "errors" + "fmt" + "io" + "io/fs" + "os" + "path" + "path/filepath" + "runtime" + "strings" +) + +// FileSystem opens the file at root as a read-only file system. The root may be a +// path to a directory, archive file, compressed archive file or any other file on +// disk. +// +// If root is a directory, its contents are accessed directly from the disk's file system. +// If root is an archive file, its contents can be accessed like a normal directory; +// compressed archive files are transparently decompressed as contents are accessed. +// And if root is any other file, it is the only file in the returned file system. +// +// This method essentially offers uniform read access to various kinds of files: +// directories, archives, compressed archives, and individual files are all treated +// the same way. +func FileSystem(root string) (fs.ReadDirFS, error) { + info, err := os.Stat(root) + if err != nil { + return nil, err + } + + // real folders can be accessed easily + if info.IsDir() { + return DirFS(root), nil + } + + // if any archive formats recognize this file, access it like a folder + file, err := os.Open(root) + if err != nil { + return nil, err + } + defer file.Close() + format, err := Identify(filepath.Base(root), file) + if err != nil && !errors.Is(err, ErrNoMatch) { + return nil, err + } + if format != nil { + if af, ok := format.(Archival); ok { + return ArchiveFS{Path: root, Format: af}, nil + } + } + + // otherwise consider it an ordinary file; make a file system with it as its only file + return FileFS(root), nil +} + +// DirFS allows accessing a directory on disk with a consistent file system interface. +// It is almost the same as os.DirFS, except for some reason os.DirFS only implements +// Open() and Stat(), but we also need ReadDir(). Seems like an obvious miss (as of Go 1.17) +// and I have questions: https://twitter.com/mholt6/status/1476058551432876032 +type DirFS string + +func (f DirFS) Open(name string) (fs.File, error) { + if err := f.checkName(name, "open"); err != nil { + return nil, err + } + return os.Open(filepath.Join(string(f), name)) +} + +func (f DirFS) ReadDir(name string) ([]fs.DirEntry, error) { + if err := f.checkName(name, "readdir"); err != nil { + return nil, err + } + return os.ReadDir(filepath.Join(string(f), name)) +} + +func (f DirFS) Stat(name string) (fs.FileInfo, error) { + if err := f.checkName(name, "stat"); err != nil { + return nil, err + } + return os.Stat(filepath.Join(string(f), name)) +} + +// checkName returns an error if name is not a valid path according to the docs of +// the io/fs package, with an extra cue taken from the standard lib's implementation +// of os.dirFS.Open(), which checks for invalid characters in Windows paths. +func (f DirFS) checkName(name, op string) error { + if !fs.ValidPath(name) || runtime.GOOS == "windows" && strings.ContainsAny(name, `\:`) { + return &fs.PathError{Op: op, Path: name, Err: fs.ErrInvalid} + } + return nil +} + +// FileFS allows accessing a file on disk using a consistent file system interface. +// The value should be the path to a regular file, not a directory. This file will +// be the only entry in the file system and will be at its root. It can be accessed +// within the file system by the name of "." or the filename. +type FileFS string + +func (f FileFS) Open(name string) (fs.File, error) { + if err := f.checkName(name, "open"); err != nil { + return nil, err + } + return os.Open(string(f)) +} + +func (f FileFS) ReadDir(name string) ([]fs.DirEntry, error) { + if err := f.checkName(name, "stat"); err != nil { + return nil, err + } + info, err := f.Stat(name) + if err != nil { + return nil, err + } + return []fs.DirEntry{fs.FileInfoToDirEntry(info)}, nil +} + +func (f FileFS) Stat(name string) (fs.FileInfo, error) { + if err := f.checkName(name, "stat"); err != nil { + return nil, err + } + return os.Stat(string(f)) +} + +func (f FileFS) checkName(name, op string) error { + if !fs.ValidPath(name) { + return &fs.PathError{Op: "open", Path: name, Err: fs.ErrInvalid} + } + if name != "." && name != path.Base(string(f)) { + return &fs.PathError{Op: op, Path: name, Err: fs.ErrNotExist} + } + return nil +} + +// ArchiveFS allows accessing an archive (or a compressed archive) using a +// consistent file system interface. Essentially, it allows traversal and +// reading of archive contents the same way as any normal directory on disk. +// The contents of compressed archives are transparently decompressed. +type ArchiveFS struct { + Path string + Format Archival + Context context.Context // optional +} + +func (f ArchiveFS) Open(name string) (fs.File, error) { + if !fs.ValidPath(name) { + return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrInvalid} + } + + archiveFile, err := os.Open(f.Path) + if err != nil { + return nil, err + } + defer func() { + // close the archive file if extraction failed; we can only + // count on the user/caller closing it if they successfully + // got the handle to the extracted file + if err != nil { + archiveFile.Close() + } + }() + + // handle special case of opening the archive root + if name == "." { + archiveInfo, err := archiveFile.Stat() + if err != nil { + return nil, err + } + entries, err := f.ReadDir(name) + if err != nil { + return nil, err + } + return &dirFile{ + extractedFile: extractedFile{ + File: File{ + FileInfo: dirFileInfo{archiveInfo}, + NameInArchive: ".", + }, + }, + entries: entries, + }, nil + } + + var fsFile fs.File + handler := func(_ context.Context, file File) error { + // if this is the requested file, and it's a directory, set up the dirFile, + // which will include a listing of all its contents as we continue the walk + trimmedName := strings.Trim(file.NameInArchive, "/") + if trimmedName == name && file.IsDir() { + fsFile = &dirFile{extractedFile: extractedFile{File: file}} + return nil + } + + // if the named file was a directory and we are filling its entries, + // add this entry to the list + if df, ok := fsFile.(*dirFile); ok { + df.entries = append(df.entries, fs.FileInfoToDirEntry(file)) + + // don't traverse into subfolders + if file.IsDir() { + return fs.SkipDir + } + + return nil + } + + // if named file is not a regular file, it can't be opened + if !file.Mode().IsRegular() { + fsFile = extractedFile{File: file} + return errStopWalk + } + + // regular files can be read, so open it for reading + rc, err := file.Open() + if err != nil { + return err + } + fsFile = extractedFile{File: file, ReadCloser: rc, parentArchive: archiveFile} + return errStopWalk + } + + err = f.Format.Extract(f.Context, archiveFile, []string{name}, handler) + if err != nil && fsFile != nil { + if ef, ok := fsFile.(extractedFile); ok { + if ef.parentArchive != nil { + // don't close the archive file in above defer; it + // will be closed when the returned file is closed + err = nil + } + } + } + if err != nil { + return nil, err + } + + return fsFile, nil +} + +func (f ArchiveFS) Stat(name string) (fs.FileInfo, error) { + if !fs.ValidPath(name) { + return nil, &fs.PathError{Op: "stat", Path: name, Err: fs.ErrInvalid} + } + + if name == "." { + fileInfo, err := os.Stat(f.Path) + if err != nil { + return nil, err + } + return dirFileInfo{fileInfo}, nil + } + + archiveFile, err := os.Open(f.Path) + if err != nil { + return nil, err + } + defer archiveFile.Close() + + var result File + handler := func(_ context.Context, file File) error { + result = file + return errStopWalk + } + err = f.Format.Extract(f.Context, archiveFile, []string{name}, handler) + if err != nil && result.FileInfo == nil { + return nil, err + } + if result.FileInfo == nil { + return nil, fs.ErrNotExist + } + return result.FileInfo, err +} + +func (f ArchiveFS) ReadDir(name string) ([]fs.DirEntry, error) { + if !fs.ValidPath(name) { + return nil, &fs.PathError{Op: "readdir", Path: name, Err: fs.ErrInvalid} + } + + archiveFile, err := os.Open(f.Path) + if err != nil { + return nil, err + } + defer archiveFile.Close() + + var entries []fs.DirEntry + handler := func(_ context.Context, file File) error { + // directories may end with trailing slash; standardize name + trimmedName := strings.Trim(file.NameInArchive, "/") + + // don't include the named directory itself in the list of entries + if trimmedName == name { + return nil + } + + entries = append(entries, fs.FileInfoToDirEntry(file)) + + // don't traverse into subfolders + if file.IsDir() { + return fs.SkipDir + } + + return nil + } + + // handle special case of reading from root of archive + var filter []string + if name != "." { + filter = []string{name} + } + + err = f.Format.Extract(f.Context, archiveFile, filter, handler) + return entries, err +} + +// errStopWalk is an arbitrary error value, since returning +// any error (other than fs.SkipDir) will stop a walk. We +// use this as we may only want 1 file from an extraction, +// even if that file is a directory and would otherwise be +// traversed during the walk. +var errStopWalk = fmt.Errorf("stop walk") + +// dirFile implements the fs.ReadDirFile interface. +type dirFile struct { + extractedFile + + // TODO: We could probably be more memory-efficient by not loading + // all the entries at once and then "faking" the paging for ReadDir(). + // Instead, we could maybe store a reference to the parent archive FS, + // then walk it each time ReadDir is called, skipping entriesRead + // files, then continuing the listing, until n are listed. But that + // might be kinda messy and a lot of work, so I leave it for a future + // optimization if needed. + entries []fs.DirEntry + entriesRead int +} + +// If this represents the root of the archive, we use the archive's +// FileInfo which says it's a file, not a directory; the whole point +// of this package is to treat the archive as a directory, so always +// return true in our case. +func (dirFile) IsDir() bool { return true } + +func (df *dirFile) ReadDir(n int) ([]fs.DirEntry, error) { + if n <= 0 { + return df.entries, nil + } + if df.entriesRead >= len(df.entries) { + return nil, io.EOF + } + if df.entriesRead+n > len(df.entries) { + n = len(df.entries) - df.entriesRead + } + entries := df.entries[df.entriesRead : df.entriesRead+n] + df.entriesRead += n + return entries, nil +} + +// dirFileInfo is an implementation of fs.FileInfo that +// is only used for files that are directories. It always +// returns 0 size, directory bit set in the mode, and +// true for IsDir. It is often used as the FileInfo for +// dirFile values. +type dirFileInfo struct { + fs.FileInfo +} + +func (dirFileInfo) Size() int64 { return 0 } +func (info dirFileInfo) Mode() fs.FileMode { return info.FileInfo.Mode() | fs.ModeDir } +func (dirFileInfo) IsDir() bool { return true } + +// extractedFile implements fs.File, thus it represents an "opened" file, +// which is slightly different from our File type which represents a file +// that possibly may be opened. If the file is actually opened, this type +// ensures that the parent archive is closed when this file from within it +// is also closed. +type extractedFile struct { + File + + // Set these fields if a "regular file" which has actual content + // that can be read, i.e. a file that is open for reading. + // ReadCloser should be the file's reader, and parentArchive is + // a reference to the archive the files comes out of. + // If parentArchive is set, it will also be closed along with + // the file when Close() is called. + io.ReadCloser + parentArchive io.Closer +} + +// Close closes the the current file if opened and +// the parent archive if specified. This is a no-op +// for directories which do not set those fields. +func (ef extractedFile) Close() error { + if ef.parentArchive != nil { + if err := ef.parentArchive.Close(); err != nil { + return err + } + } + if ef.ReadCloser != nil { + return ef.ReadCloser.Close() + } + return nil +} diff --git a/go.mod b/go.mod index 689d0e86..6911cebc 100644 --- a/go.mod +++ b/go.mod @@ -1,15 +1,19 @@ -module github.com/mholt/archiver/v3 +module github.com/mholt/archiver/v4 -go 1.13 +go 1.17 require ( github.com/andybalholm/brotli v1.0.4 - github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 - github.com/golang/snappy v0.0.2 - github.com/klauspost/compress v1.11.4 + github.com/dsnet/compress v0.0.1 + github.com/klauspost/compress v1.13.6 github.com/klauspost/pgzip v1.2.5 - github.com/nwaples/rardecode v1.1.0 - github.com/pierrec/lz4/v4 v4.1.2 - github.com/ulikunitz/xz v0.5.9 - github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 + github.com/nwaples/rardecode/v2 v2.0.0-beta.2 + github.com/therootcompany/xz v1.0.1 + github.com/ulikunitz/xz v0.5.10 +) + +require ( + github.com/golang/snappy v0.0.4 + github.com/pierrec/lz4/v4 v4.1.12 + golang.org/x/text v0.3.7 ) diff --git a/go.sum b/go.sum index c0899456..0177d9d7 100644 --- a/go.sum +++ b/go.sum @@ -1,24 +1,25 @@ github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY= github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= -github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 h1:iFaUwBSo5Svw6L7HYpRu/0lE3e0BaElwnNO1qkNQxBY= -github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5/go.mod h1:qssHWj60/X5sZFNxpG4HBPDHVqxNm4DfnCKgrbZOT+s= +github.com/dsnet/compress v0.0.1 h1:PlZu0n3Tuv04TzpfPbrnI0HW/YwodEXDS+oPKahKF0Q= +github.com/dsnet/compress v0.0.1/go.mod h1:Aw8dCMJ7RioblQeTqt88akK31OvO8Dhf5JflhBbQEHo= github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY= -github.com/golang/snappy v0.0.2 h1:aeE13tS0IiQgFjYdoL8qN3K1N2bXXtI6Vi51/y7BpMw= -github.com/golang/snappy v0.0.2/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= -github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= +github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= -github.com/klauspost/compress v1.11.4 h1:kz40R/YWls3iqT9zX9AHN3WoVsrAWVyui5sxuLqiXqU= -github.com/klauspost/compress v1.11.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= +github.com/klauspost/compress v1.13.6 h1:P76CopJELS0TiO2mebmnzgWaajssP/EszplttgQxcgc= +github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/klauspost/pgzip v1.2.5 h1:qnWYvvKqedOF2ulHpMG72XQol4ILEJ8k2wwRl/Km8oE= github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= -github.com/nwaples/rardecode v1.1.0 h1:vSxaY8vQhOcVr4mm5e8XllHWTiM4JF507A0Katqw7MQ= -github.com/nwaples/rardecode v1.1.0/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0= -github.com/pierrec/lz4/v4 v4.1.2 h1:qvY3YFXRQE/XB8MlLzJH7mSzBs74eA2gg52YTk6jUPM= -github.com/pierrec/lz4/v4 v4.1.2/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= -github.com/ulikunitz/xz v0.5.8/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= -github.com/ulikunitz/xz v0.5.9 h1:RsKRIA2MO8x56wkkcd3LbtcE/uMszhb6DpRf+3uwa3I= -github.com/ulikunitz/xz v0.5.9/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= -github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofmx9yWTog9BfvIu0q41lo= -github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMxjDjgmT5uz5wzYJKVo23qUhYTos= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +github.com/nwaples/rardecode/v2 v2.0.0-beta.2 h1:e3mzJFJs4k83GXBEiTaQ5HgSc/kOK8q0rDaRO0MPaOk= +github.com/nwaples/rardecode/v2 v2.0.0-beta.2/go.mod h1:yntwv/HfMc/Hbvtq9I19D1n58te3h6KsqCf3GxyfBGY= +github.com/pierrec/lz4/v4 v4.1.12 h1:44l88ehTZAUGW4VlO1QC4zkilL99M6Y9MXNwEs0uzP8= +github.com/pierrec/lz4/v4 v4.1.12/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= +github.com/therootcompany/xz v1.0.1 h1:CmOtsn1CbtmyYiusbfmhmkpAAETj0wBIH6kCYaX+xzw= +github.com/therootcompany/xz v1.0.1/go.mod h1:3K3UH1yCKgBneZYhuQUvJ9HPD19UEXEI0BWbMn8qNMY= +github.com/ulikunitz/xz v0.5.6/go.mod h1:2bypXElzHzzJZwzH67Y6wb67pO62Rzfn7BSiF4ABRW8= +github.com/ulikunitz/xz v0.5.10 h1:t92gobL9l3HE202wg3rlk19F6X+JOxl9BBrCCMYEYd8= +github.com/ulikunitz/xz v0.5.10/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= +golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/gz.go b/gz.go index 650718d0..ae6b4ef5 100644 --- a/gz.go +++ b/gz.go @@ -1,76 +1,79 @@ package archiver import ( - "fmt" + "bytes" "io" - "path/filepath" + "strings" "github.com/klauspost/compress/gzip" "github.com/klauspost/pgzip" ) +func init() { + RegisterFormat(Gz{}) +} + // Gz facilitates gzip compression. type Gz struct { + // Gzip compression level. See https://pkg.go.dev/compress/flate#pkg-constants + // for some predefined constants. If 0, DefaultCompression is assumed rather + // than no compression. CompressionLevel int - SingleThreaded bool + + // Use a fast parallel Gzip implementation. This is only + // effective for large streams (about 1 MB or greater). + Multithreaded bool } -// Compress reads in, compresses it, and writes it to out. -func (gz *Gz) Compress(in io.Reader, out io.Writer) error { - var w io.WriteCloser - var err error - if gz.SingleThreaded { - w, err = gzip.NewWriterLevel(out, gz.CompressionLevel) - } else { - w, err = pgzip.NewWriterLevel(out, gz.CompressionLevel) +func (Gz) Name() string { return ".gz" } + +func (gz Gz) Match(filename string, stream io.Reader) (MatchResult, error) { + var mr MatchResult + + // match filename + if strings.Contains(strings.ToLower(filename), gz.Name()) { + mr.ByName = true } - if err != nil { - return err + + // match file header + buf := make([]byte, len(gzHeader)) + if _, err := io.ReadFull(stream, buf); err != nil { + return mr, err } - defer w.Close() - _, err = io.Copy(w, in) - return err + mr.ByStream = bytes.Equal(buf, gzHeader) + + return mr, nil } -// Decompress reads in, decompresses it, and writes it to out. -func (gz *Gz) Decompress(in io.Reader, out io.Writer) error { - var r io.ReadCloser - var err error - if gz.SingleThreaded { - r, err = gzip.NewReader(in) - } else { - r, err = pgzip.NewReader(in) - } - if err != nil { - return err +func (gz Gz) OpenWriter(w io.Writer) (io.WriteCloser, error) { + // assume default compression level if 0, rather than no + // compression, since no compression on a gzipped file + // doesn't make any sense in our use cases + level := gz.CompressionLevel + if level == 0 { + level = gzip.DefaultCompression } - defer r.Close() - _, err = io.Copy(out, r) - return err -} -// CheckExt ensures the file extension matches the format. -func (gz *Gz) CheckExt(filename string) error { - if filepath.Ext(filename) != ".gz" { - return fmt.Errorf("filename must have a .gz extension") + var wc io.WriteCloser + var err error + if gz.Multithreaded { + wc, err = pgzip.NewWriterLevel(w, level) + } else { + wc, err = gzip.NewWriterLevel(w, level) } - return nil + return wc, err } -func (gz *Gz) String() string { return "gz" } - -// NewGz returns a new, default instance ready to be customized and used. -func NewGz() *Gz { - return &Gz{ - CompressionLevel: gzip.DefaultCompression, +func (gz Gz) OpenReader(r io.Reader) (io.ReadCloser, error) { + var rc io.ReadCloser + var err error + if gz.Multithreaded { + rc, err = pgzip.NewReader(r) + } else { + rc, err = gzip.NewReader(r) } + return rc, err } -// Compile-time checks to ensure type implements desired interfaces. -var ( - _ = Compressor(new(Gz)) - _ = Decompressor(new(Gz)) -) - -// DefaultGz is a default instance that is conveniently ready to use. -var DefaultGz = NewGz() +// magic number at the beginning of gzip files +var gzHeader = []byte{0x1f, 0x8b} diff --git a/interfaces.go b/interfaces.go new file mode 100644 index 00000000..0fc88930 --- /dev/null +++ b/interfaces.go @@ -0,0 +1,81 @@ +package archiver + +import ( + "context" + "io" +) + +// Format represents either an archive or compression format. +type Format interface { + // Name returns the name of the format. + Name() string + + // Match returns true if the given name/stream is recognized. + // One of the arguments is optional: filename might be empty + // if working with an unnamed stream, or stream might be + // empty if only working with a filename. The filename should + // consist only of the base name, not a path component, and is + // typically used for matching by file extension. However, + // matching by reading the stream is preferred. Match reads + // only as many bytes as needed to determine a match. To + // preserve the stream through matching, you should either + // buffer what is read by Match, or seek to the last position + // before Match was called. + Match(filename string, stream io.Reader) (MatchResult, error) +} + +// Compression is a compression format with both compress and decompress methods. +type Compression interface { + Format + Compressor + Decompressor +} + +// Archival is an archival format with both archive and extract methods. +type Archival interface { + Format + Archiver + Extractor +} + +// Compressor can compress data by wrapping a writer. +type Compressor interface { + // OpenWriter wraps w with a new writer that compresses what is written. + // The writer must be closed when writing is finished. + OpenWriter(w io.Writer) (io.WriteCloser, error) +} + +// Decompressor can decompress data by wrapping a reader. +type Decompressor interface { + // OpenReader wraps r with a new reader that decompresses what is read. + // The reader must be closed when reading is finished. + OpenReader(r io.Reader) (io.ReadCloser, error) +} + +// Archiver can create a new archive. +type Archiver interface { + // Archive writes an archive file to output with the given files. + // + // Context is optional, but if given, cancellation must be honored. + Archive(ctx context.Context, output io.Writer, files []File) error +} + +// Extractor can extract files from an archive. +type Extractor interface { + // Extract reads the files at pathsInArchive from sourceArchive. + // If pathsInArchive is nil, all files are extracted without discretion. + // If pathsInArchive is empty, no files are extracted. + // If a path refers to a directory, all files within it are extracted. + // Extracted files are passed to the handleFile callback for handling. + // + // Context is optional, but if given, cancellation must be honored. + Extract(ctx context.Context, sourceArchive io.Reader, pathsInArchive []string, handleFile FileHandler) error +} + +// Inserter can insert files into an existing archive. +type Inserter interface { + // Insert inserts the files into archive. + // + // Context is optional, but if given, cancellation must be honored. + Insert(ctx context.Context, archive io.ReadWriteSeeker, files []File) error +} diff --git a/lz4.go b/lz4.go index 3d6b0a21..659c3975 100644 --- a/lz4.go +++ b/lz4.go @@ -1,63 +1,55 @@ package archiver import ( - "fmt" + "bytes" "io" - "path/filepath" + "strings" "github.com/pierrec/lz4/v4" ) +func init() { + RegisterFormat(Lz4{}) +} + // Lz4 facilitates LZ4 compression. type Lz4 struct { CompressionLevel int } -// Compress reads in, compresses it, and writes it to out. -func (lz *Lz4) Compress(in io.Reader, out io.Writer) error { - w := lz4.NewWriter(out) - // TODO archiver v4: use proper lz4.Fast - // bitshifting for backwards compatibility with lz4/v3 - options := []lz4.Option{ - lz4.CompressionLevelOption(lz4.CompressionLevel(1 << (8 + lz.CompressionLevel))), +func (Lz4) Name() string { return ".lz4" } + +func (lz Lz4) Match(filename string, stream io.Reader) (MatchResult, error) { + var mr MatchResult + + // match filename + if strings.Contains(strings.ToLower(filename), lz.Name()) { + mr.ByName = true } - if err := w.Apply(options...); err != nil { - return err + + // match file header + buf := make([]byte, len(lz4Header)) + if _, err := io.ReadFull(stream, buf); err != nil { + return mr, err } - defer w.Close() - _, err := io.Copy(w, in) - return err -} + mr.ByStream = bytes.Equal(buf, lz4Header) -// Decompress reads in, decompresses it, and writes it to out. -func (lz *Lz4) Decompress(in io.Reader, out io.Writer) error { - r := lz4.NewReader(in) - _, err := io.Copy(out, r) - return err + return mr, nil } -// CheckExt ensures the file extension matches the format. -func (lz *Lz4) CheckExt(filename string) error { - if filepath.Ext(filename) != ".lz4" { - return fmt.Errorf("filename must have a .lz4 extension") +func (lz Lz4) OpenWriter(w io.Writer) (io.WriteCloser, error) { + lzw := lz4.NewWriter(w) + options := []lz4.Option{ + lz4.CompressionLevelOption(lz4.CompressionLevel(lz.CompressionLevel)), } - return nil -} - -func (lz *Lz4) String() string { return "lz4" } - -// NewLz4 returns a new, default instance ready to be customized and used. -func NewLz4() *Lz4 { - return &Lz4{ - CompressionLevel: 9, // https://github.com/lz4/lz4/blob/1b819bfd633ae285df2dfe1b0589e1ec064f2873/lib/lz4hc.h#L48 + if err := lzw.Apply(options...); err != nil { + return nil, err } + return lzw, nil } -// Compile-time checks to ensure type implements desired interfaces. -var ( - _ = Compressor(new(Lz4)) - _ = Decompressor(new(Lz4)) -) +func (Lz4) OpenReader(r io.Reader) (io.ReadCloser, error) { + return io.NopCloser(lz4.NewReader(r)), nil +} -// DefaultLz4 is a default instance that is conveniently ready to use. -var DefaultLz4 = NewLz4() +var lz4Header = []byte{0x04, 0x22, 0x4d, 0x18} diff --git a/rar.go b/rar.go index 35fd60b6..057c366d 100644 --- a/rar.go +++ b/rar.go @@ -2,423 +2,124 @@ package archiver import ( "bytes" + "context" + "errors" "fmt" "io" + "io/fs" "log" "os" "path" - "path/filepath" "strings" "time" - "github.com/nwaples/rardecode" + "github.com/nwaples/rardecode/v2" ) -// Rar provides facilities for reading RAR archives. -// See https://www.rarlab.com/technote.htm. -type Rar struct { - // Whether to overwrite existing files; if false, - // an error is returned if the file exists. - OverwriteExisting bool - - // Whether to make all the directories necessary - // to create a rar archive in the desired path. - MkdirAll bool - - // A single top-level folder can be implicitly - // created by the Unarchive method if the files - // to be extracted from the archive do not all - // have a common root. This roughly mimics the - // behavior of archival tools integrated into OS - // file browsers which create a subfolder to - // avoid unexpectedly littering the destination - // folder with potentially many files, causing a - // problematic cleanup/organization situation. - // This feature is available for both creation - // and extraction of archives, but may be slightly - // inefficient with lots and lots of files, - // especially on extraction. - ImplicitTopLevelFolder bool - - // Strip number of leading paths. This feature is available - // only during unpacking of the entire archive. - StripComponents int +func init() { + RegisterFormat(Rar{}) +} - // If true, errors encountered during reading - // or writing a single file will be logged and - // the operation will continue on remaining files. +type Rar struct { + // If true, errors encountered during reading or writing + // a file within an archive will be logged and the + // operation will continue on remaining files. ContinueOnError bool - // The password to open archives (optional). + // Password to open archives. Password string - - rr *rardecode.Reader // underlying stream reader - rc *rardecode.ReadCloser // supports multi-volume archives (files only) } -// CheckExt ensures the file extension matches the format. -func (*Rar) CheckExt(filename string) error { - if !strings.HasSuffix(filename, ".rar") { - return fmt.Errorf("filename must have a .rar extension") - } - return nil -} +func (Rar) Name() string { return ".rar" } -// CheckPath ensures that the filename has not been crafted to perform path traversal attacks -func (*Rar) CheckPath(to, filename string) error { - to, _ = filepath.Abs(to) //explicit the destination folder to prevent that 'string.HasPrefix' check can be 'bypassed' when no destination folder is supplied in input - dest := filepath.Join(to, filename) - //prevent path traversal attacks - if !strings.HasPrefix(dest, to) { - return &IllegalPathError{AbsolutePath: dest, Filename: filename} - } - return nil -} +func (r Rar) Match(filename string, stream io.Reader) (MatchResult, error) { + var mr MatchResult -// Unarchive unpacks the .rar file at source to destination. -// Destination will be treated as a folder name. It supports -// multi-volume archives. -func (r *Rar) Unarchive(source, destination string) error { - if !fileExists(destination) && r.MkdirAll { - err := mkdir(destination, 0755) - if err != nil { - return fmt.Errorf("preparing destination: %v", err) - } + // match filename + if strings.Contains(strings.ToLower(filename), r.Name()) { + mr.ByName = true } - // if the files in the archive do not all share a common - // root, then make sure we extract to a single subfolder - // rather than potentially littering the destination... - if r.ImplicitTopLevelFolder { - var err error - destination, err = r.addTopLevelFolder(source, destination) - if err != nil { - return fmt.Errorf("scanning source archive: %v", err) - } + // match file header (there are two versions; allocate buffer for larger one) + buf := make([]byte, len(rarHeaderV5_0)) + if _, err := io.ReadFull(stream, buf); err != nil { + return mr, err } + mr.ByStream = bytes.Equal(buf[:len(rarHeaderV1_5)], rarHeaderV1_5) || bytes.Equal(buf, rarHeaderV5_0) - err := r.OpenFile(source) - if err != nil { - return fmt.Errorf("opening rar archive for reading: %v", err) - } - defer r.Close() - - for { - err := r.unrarNext(destination) - if err == io.EOF { - break - } - if err != nil { - if r.ContinueOnError || IsIllegalPathError(err) { - log.Printf("[ERROR] Reading file in rar archive: %v", err) - continue - } - return fmt.Errorf("reading file in rar archive: %v", err) - } - } - - return nil + return mr, nil } -// addTopLevelFolder scans the files contained inside -// the tarball named sourceArchive and returns a modified -// destination if all the files do not share the same -// top-level folder. -func (r *Rar) addTopLevelFolder(sourceArchive, destination string) (string, error) { - file, err := os.Open(sourceArchive) - if err != nil { - return "", fmt.Errorf("opening source archive: %v", err) - } - defer file.Close() - - rc, err := rardecode.NewReader(file, r.Password) - if err != nil { - return "", fmt.Errorf("creating archive reader: %v", err) - } - - var files []string - for { - hdr, err := rc.Next() - if err == io.EOF { - break - } - if err != nil { - return "", fmt.Errorf("scanning tarball's file listing: %v", err) - } - files = append(files, hdr.Name) - } - - if multipleTopLevels(files) { - destination = filepath.Join(destination, folderNameFromFileName(sourceArchive)) - } - - return destination, nil -} - -func (r *Rar) unrarNext(to string) error { - f, err := r.Read() - if err != nil { - return err // don't wrap error; calling loop must break on io.EOF - } - defer f.Close() - - header, ok := f.Header.(*rardecode.FileHeader) - if !ok { - return fmt.Errorf("expected header to be *rardecode.FileHeader but was %T", f.Header) - } - - errPath := r.CheckPath(to, header.Name) - if errPath != nil { - return fmt.Errorf("checking path traversal attempt: %v", errPath) - } - - if r.StripComponents > 0 { - if strings.Count(header.Name, "/") < r.StripComponents { - return nil // skip path with fewer components - } - - for i := 0; i < r.StripComponents; i++ { - slash := strings.Index(header.Name, "/") - header.Name = header.Name[slash+1:] - } - } - - return r.unrarFile(f, filepath.Join(to, header.Name)) +// Archive is not implemented for RAR, but the method exists so that Rar satisfies the ArchiveFormat interface. +func (r Rar) Archive(_ context.Context, _ io.Writer, _ []File) error { + return fmt.Errorf("not implemented because RAR is a proprietary format") } -func (r *Rar) unrarFile(f File, to string) error { - // do not overwrite existing files, if configured - if !f.IsDir() && !r.OverwriteExisting && fileExists(to) { - return fmt.Errorf("file already exists: %s", to) +func (r Rar) Extract(ctx context.Context, sourceArchive io.Reader, pathsInArchive []string, handleFile FileHandler) error { + if ctx == nil { + ctx = context.Background() } - hdr, ok := f.Header.(*rardecode.FileHeader) - if !ok { - return fmt.Errorf("expected header to be *rardecode.FileHeader but was %T", f.Header) + var options []rardecode.Option + if r.Password != "" { + options = append(options, rardecode.Password(r.Password)) } - if f.IsDir() { - if fileExists("testdata") { - err := os.Chmod(to, hdr.Mode()) - if err != nil { - return fmt.Errorf("changing dir mode: %v", err) - } - } else { - err := mkdir(to, hdr.Mode()) - if err != nil { - return fmt.Errorf("making directories: %v", err) - } - } - return nil - } - - // if files come before their containing folders, then we must - // create their folders before writing the file - err := mkdir(filepath.Dir(to), 0755) - if err != nil { - return fmt.Errorf("making parent directories: %v", err) - } - - if (hdr.Mode() & os.ModeSymlink) != 0 { - return nil - } - - return writeNewFile(to, r.rr, hdr.Mode()) -} - -// OpenFile opens filename for reading. This method supports -// multi-volume archives, whereas Open does not (but Open -// supports any stream, not just files). -func (r *Rar) OpenFile(filename string) error { - if r.rr != nil { - return fmt.Errorf("rar archive is already open for reading") - } - var err error - r.rc, err = rardecode.OpenReader(filename, r.Password) + rr, err := rardecode.NewReader(sourceArchive, options...) if err != nil { return err } - r.rr = &r.rc.Reader - return nil -} - -// Open opens t for reading an archive from -// in. The size parameter is not used. -func (r *Rar) Open(in io.Reader, size int64) error { - if r.rr != nil { - return fmt.Errorf("rar archive is already open for reading") - } - var err error - r.rr, err = rardecode.NewReader(in, r.Password) - return err -} - -// Read reads the next file from t, which must have -// already been opened for reading. If there are no -// more files, the error is io.EOF. The File must -// be closed when finished reading from it. -func (r *Rar) Read() (File, error) { - if r.rr == nil { - return File{}, fmt.Errorf("rar archive is not open") - } - hdr, err := r.rr.Next() - if err != nil { - return File{}, err // don't wrap error; preserve io.EOF - } - - file := File{ - FileInfo: rarFileInfo{hdr}, - Header: hdr, - ReadCloser: ReadFakeCloser{r.rr}, - } - - return file, nil -} - -// Close closes the rar archive(s) opened by Create and Open. -func (r *Rar) Close() error { - var err error - if r.rc != nil { - rc := r.rc - r.rc = nil - err = rc.Close() - } - if r.rr != nil { - r.rr = nil - } - return err -} - -// Walk calls walkFn for each visited item in archive. -func (r *Rar) Walk(archive string, walkFn WalkFunc) error { - file, err := os.Open(archive) - if err != nil { - return fmt.Errorf("opening archive file: %v", err) - } - defer file.Close() - - err = r.Open(file, 0) - if err != nil { - return fmt.Errorf("opening archive: %v", err) - } - defer r.Close() + // important to initialize to non-nil, empty value due to how fileIsIncluded works + skipDirs := skipList{} for { - f, err := r.Read() + if err := ctx.Err(); err != nil { + return err // honor context cancellation + } + + hdr, err := rr.Next() if err == io.EOF { break } if err != nil { if r.ContinueOnError { - log.Printf("[ERROR] Opening next file: %v", err) + log.Printf("[ERROR] Advancing to next file in rar archive: %v", err) continue } - return fmt.Errorf("opening next file: %v", err) + return err } - err = walkFn(f) - if err != nil { - if err == ErrStopWalk { - break - } - if r.ContinueOnError { - log.Printf("[ERROR] Walking %s: %v", f.Name(), err) - continue - } - return fmt.Errorf("walking %s: %v", f.Name(), err) + if !fileIsIncluded(pathsInArchive, hdr.Name) { + continue } - } - - return nil -} - -// Extract extracts a single file from the rar archive. -// If the target is a directory, the entire folder will -// be extracted into destination. -func (r *Rar) Extract(source, target, destination string) error { - // target refers to a path inside the archive, which should be clean also - target = path.Clean(target) - - // if the target ends up being a directory, then - // we will continue walking and extracting files - // until we are no longer within that directory - var targetDirPath string - - return r.Walk(source, func(f File) error { - th, ok := f.Header.(*rardecode.FileHeader) - if !ok { - return fmt.Errorf("expected header to be *rardecode.FileHeader but was %T", f.Header) + if fileIsIncluded(skipDirs, hdr.Name) { + continue } - // importantly, cleaning the path strips tailing slash, - // which must be appended to folders within the archive - name := path.Clean(th.Name) - if f.IsDir() && target == name { - targetDirPath = path.Dir(name) + file := File{ + FileInfo: rarFileInfo{hdr}, + Header: hdr, + NameInArchive: hdr.Name, + Open: func() (io.ReadCloser, error) { return io.NopCloser(rr), nil }, } - if within(target, th.Name) { - // either this is the exact file we want, or is - // in the directory we want to extract - - // build the filename we will extract to - end, err := filepath.Rel(targetDirPath, th.Name) - if err != nil { - return fmt.Errorf("relativizing paths: %v", err) - } - joined := filepath.Join(destination, end) - - err = r.unrarFile(f, joined) - if err != nil { - return fmt.Errorf("extracting file %s: %v", th.Name, err) - } - - // if our target was not a directory, stop walk - if targetDirPath == "" { - return ErrStopWalk + err = handleFile(ctx, file) + if errors.Is(err, fs.SkipDir) { + // if a directory, skip this path; if a file, skip the folder path + dirPath := hdr.Name + if !hdr.IsDir { + dirPath = path.Dir(hdr.Name) } - } else if targetDirPath != "" { - // finished walking the entire directory - return ErrStopWalk + skipDirs.add(dirPath) + } else if err != nil { + return fmt.Errorf("handling file: %s: %w", hdr.Name, err) } - - return nil - }) -} - -// Match returns true if the format of file matches this -// type's format. It should not affect reader position. -func (*Rar) Match(file io.ReadSeeker) (bool, error) { - currentPos, err := file.Seek(0, io.SeekCurrent) - if err != nil { - return false, err - } - _, err = file.Seek(0, 0) - if err != nil { - return false, err - } - defer func() { - _, _ = file.Seek(currentPos, io.SeekStart) - }() - - buf := make([]byte, 8) - if n, err := file.Read(buf); err != nil || n < 8 { - return false, nil } - hasRarHeader := bytes.Equal(buf[:7], []byte("Rar!\x1a\x07\x00")) || // ver 1.5 - bytes.Equal(buf, []byte("Rar!\x1a\x07\x01\x00")) // ver 5.0 - return hasRarHeader, nil -} -func (r *Rar) String() string { return "rar" } - -// NewRar returns a new, default instance ready to be customized and used. -func NewRar() *Rar { - return &Rar{ - MkdirAll: true, - } + return nil } +// rarFileInfo satisfies the fs.FileInfo interface for RAR entries. type rarFileInfo struct { fh *rardecode.FileHeader } @@ -430,17 +131,7 @@ func (rfi rarFileInfo) ModTime() time.Time { return rfi.fh.ModificationTime } func (rfi rarFileInfo) IsDir() bool { return rfi.fh.IsDir } func (rfi rarFileInfo) Sys() interface{} { return nil } -// Compile-time checks to ensure type implements desired interfaces. var ( - _ = Reader(new(Rar)) - _ = Unarchiver(new(Rar)) - _ = Walker(new(Rar)) - _ = Extractor(new(Rar)) - _ = Matcher(new(Rar)) - _ = ExtensionChecker(new(Rar)) - _ = FilenameChecker(new(Rar)) - _ = os.FileInfo(rarFileInfo{}) + rarHeaderV1_5 = []byte("Rar!\x1a\x07\x00") // v1.5 + rarHeaderV5_0 = []byte("Rar!\x1a\x07\x01\x00") // v5.0 ) - -// DefaultRar is a default instance that is conveniently ready to use. -var DefaultRar = NewRar() diff --git a/sz.go b/sz.go index 02009b52..577e331a 100644 --- a/sz.go +++ b/sz.go @@ -1,51 +1,47 @@ package archiver import ( - "fmt" + "bytes" "io" - "path/filepath" + "strings" "github.com/golang/snappy" ) -// Snappy facilitates Snappy compression. -type Snappy struct{} - -// Compress reads in, compresses it, and writes it to out. -func (s *Snappy) Compress(in io.Reader, out io.Writer) error { - w := snappy.NewBufferedWriter(out) - defer w.Close() - _, err := io.Copy(w, in) - return err +func init() { + RegisterFormat(Sz{}) } -// Decompress reads in, decompresses it, and writes it to out. -func (s *Snappy) Decompress(in io.Reader, out io.Writer) error { - r := snappy.NewReader(in) - _, err := io.Copy(out, r) - return err -} +// Sz facilitates Snappy compression. +type Sz struct{} + +func (sz Sz) Name() string { return ".sz" } -// CheckExt ensures the file extension matches the format. -func (s *Snappy) CheckExt(filename string) error { - if filepath.Ext(filename) != ".sz" { - return fmt.Errorf("filename must have a .sz extension") +func (sz Sz) Match(filename string, stream io.Reader) (MatchResult, error) { + var mr MatchResult + + // match filename + if strings.Contains(strings.ToLower(filename), sz.Name()) { + mr.ByName = true } - return nil -} -func (s *Snappy) String() string { return "sz" } + // match file header + buf := make([]byte, len(snappyHeader)) + if _, err := io.ReadFull(stream, buf); err != nil { + return mr, err + } + mr.ByStream = bytes.Equal(buf, snappyHeader) -// NewSnappy returns a new, default instance ready to be customized and used. -func NewSnappy() *Snappy { - return new(Snappy) + return mr, nil } -// Compile-time checks to ensure type implements desired interfaces. -var ( - _ = Compressor(new(Snappy)) - _ = Decompressor(new(Snappy)) -) +func (Sz) OpenWriter(w io.Writer) (io.WriteCloser, error) { + return snappy.NewBufferedWriter(w), nil +} + +func (Sz) OpenReader(r io.Reader) (io.ReadCloser, error) { + return io.NopCloser(snappy.NewReader(r)), nil +} -// DefaultSnappy is a default instance that is conveniently ready to use. -var DefaultSnappy = NewSnappy() +// https://github.com/google/snappy/blob/master/framing_format.txt +var snappyHeader = []byte{0xff, 0x06, 0x00, 0x00, 0x73, 0x4e, 0x61, 0x50, 0x70, 0x59} diff --git a/tar.go b/tar.go index be898665..dd146c28 100644 --- a/tar.go +++ b/tar.go @@ -2,658 +2,235 @@ package archiver import ( "archive/tar" - "bytes" + "context" + "errors" "fmt" "io" + "io/fs" "log" - "os" "path" - "path/filepath" - "strconv" "strings" ) -// Tar provides facilities for operating TAR archives. -// See http://www.gnu.org/software/tar/manual/html_node/Standard.html. +func init() { + RegisterFormat(Tar{}) +} + type Tar struct { - // Whether to overwrite existing files; if false, - // an error is returned if the file exists. - OverwriteExisting bool - - // Whether to make all the directories necessary - // to create a tar archive in the desired path. - MkdirAll bool - - // A single top-level folder can be implicitly - // created by the Archive or Unarchive methods - // if the files to be added to the archive - // or the files to be extracted from the archive - // do not all have a common root. This roughly - // mimics the behavior of archival tools integrated - // into OS file browsers which create a subfolder - // to avoid unexpectedly littering the destination - // folder with potentially many files, causing a - // problematic cleanup/organization situation. - // This feature is available for both creation - // and extraction of archives, but may be slightly - // inefficient with lots and lots of files, - // especially on extraction. - ImplicitTopLevelFolder bool - - // Strip number of leading paths. This feature is available - // only during unpacking of the entire archive. - StripComponents int - - // If true, errors encountered during reading - // or writing a single file will be logged and - // the operation will continue on remaining files. + // If true, errors encountered during reading or writing + // a file within an archive will be logged and the + // operation will continue on remaining files. ContinueOnError bool - - tw *tar.Writer - tr *tar.Reader - - readerWrapFn func(io.Reader) (io.Reader, error) - writerWrapFn func(io.Writer) (io.Writer, error) - cleanupWrapFn func() } -// CheckExt ensures the file extension matches the format. -func (*Tar) CheckExt(filename string) error { - if !strings.HasSuffix(filename, ".tar") { - return fmt.Errorf("filename must have a .tar extension") - } - return nil -} +func (Tar) Name() string { return ".tar" } -// CheckPath ensures that the filename has not been crafted to perform path traversal attacks -func (*Tar) CheckPath(to, filename string) error { - to, _ = filepath.Abs(to) //explicit the destination folder to prevent that 'string.HasPrefix' check can be 'bypassed' when no destination folder is supplied in input - dest := filepath.Join(to, filename) - //prevent path traversal attacks - if !strings.HasPrefix(dest, to) { - return &IllegalPathError{AbsolutePath: dest, Filename: filename} - } - return nil -} +func (t Tar) Match(filename string, stream io.Reader) (MatchResult, error) { + var mr MatchResult -// Archive creates a tarball file at destination containing -// the files listed in sources. The destination must end with -// ".tar". File paths can be those of regular files or -// directories; directories will be recursively added. -func (t *Tar) Archive(sources []string, destination string) error { - err := t.CheckExt(destination) - if t.writerWrapFn == nil && err != nil { - return fmt.Errorf("checking extension: %v", err) - } - if !t.OverwriteExisting && fileExists(destination) { - return fmt.Errorf("file already exists: %s", destination) + // match filename + if strings.Contains(strings.ToLower(filename), t.Name()) { + mr.ByName = true } - // make the folder to contain the resulting archive - // if it does not already exist - destDir := filepath.Dir(destination) - if t.MkdirAll && !fileExists(destDir) { - err := mkdir(destDir, 0755) - if err != nil { - return fmt.Errorf("making folder for destination: %v", err) - } - } + // match file header + r := tar.NewReader(stream) + _, err := r.Next() + mr.ByStream = err == nil - out, err := os.Create(destination) - if err != nil { - return fmt.Errorf("creating %s: %v", destination, err) - } - defer out.Close() + return mr, nil +} - err = t.Create(out) - if err != nil { - return fmt.Errorf("creating tar: %v", err) +func (t Tar) Archive(ctx context.Context, output io.Writer, files []File) error { + if ctx == nil { + ctx = context.Background() } - defer t.Close() - var topLevelFolder string - if t.ImplicitTopLevelFolder && multipleTopLevels(sources) { - topLevelFolder = folderNameFromFileName(destination) - } + tw := tar.NewWriter(output) + defer tw.Close() - for _, source := range sources { - err := t.writeWalk(source, topLevelFolder, destination) + for _, file := range files { + if err := ctx.Err(); err != nil { + return err // honor context cancellation + } + err := t.writeFileToArchive(ctx, tw, file) if err != nil { - return fmt.Errorf("walking %s: %v", source, err) + if t.ContinueOnError && ctx.Err() == nil { // context errors should always abort + log.Printf("[ERROR] %v", err) + continue + } + return err } } return nil } -// Unarchive unpacks the .tar file at source to destination. -// Destination will be treated as a folder name. -func (t *Tar) Unarchive(source, destination string) error { - if !fileExists(destination) && t.MkdirAll { - err := mkdir(destination, 0755) - if err != nil { - return fmt.Errorf("preparing destination: %v", err) - } +func (Tar) writeFileToArchive(ctx context.Context, tw *tar.Writer, file File) error { + hdr, err := tar.FileInfoHeader(file, file.LinkTarget) + if err != nil { + return fmt.Errorf("file %s: creating header: %w", file.NameInArchive, err) } - // if the files in the archive do not all share a common - // root, then make sure we extract to a single subfolder - // rather than potentially littering the destination... - if t.ImplicitTopLevelFolder { - var err error - destination, err = t.addTopLevelFolder(source, destination) - if err != nil { - return fmt.Errorf("scanning source archive: %v", err) - } - } + // reset the name; FileInfoHeader() only puts the + // base name of the file, not the whole path + hdr.Name = file.NameInArchive - file, err := os.Open(source) - if err != nil { - return fmt.Errorf("opening source archive: %v", err) + if err := tw.WriteHeader(hdr); err != nil { + return fmt.Errorf("file %s: writing header: %w", file.NameInArchive, err) } - defer file.Close() - err = t.Open(file, 0) - if err != nil { - return fmt.Errorf("opening tar archive for reading: %v", err) + // only proceed to write a file body if there is actually a body + // (for example, directories and links don't have a body) + if hdr.Typeflag != tar.TypeReg { + return nil } - defer t.Close() - for { - err := t.untarNext(destination) - if err == io.EOF { - break - } - if err != nil { - if t.ContinueOnError || IsIllegalPathError(err) { - log.Printf("[ERROR] Reading file in tar archive: %v", err) - continue - } - return fmt.Errorf("reading file in tar archive: %v", err) - } + if err := openAndCopyFile(file, tw); err != nil { + return fmt.Errorf("file %s: writing data: %w", file.NameInArchive, err) } return nil } -// addTopLevelFolder scans the files contained inside -// the tarball named sourceArchive and returns a modified -// destination if all the files do not share the same -// top-level folder. -func (t *Tar) addTopLevelFolder(sourceArchive, destination string) (string, error) { - file, err := os.Open(sourceArchive) - if err != nil { - return "", fmt.Errorf("opening source archive: %v", err) - } - defer file.Close() - - // if the reader is to be wrapped, ensure we do that now - // or we will not be able to read the archive successfully - reader := io.Reader(file) - if t.readerWrapFn != nil { - reader, err = t.readerWrapFn(reader) - if err != nil { - return "", fmt.Errorf("wrapping reader: %v", err) - } - } - if t.cleanupWrapFn != nil { - defer t.cleanupWrapFn() - } - - tr := tar.NewReader(reader) - - var files []string +func (t Tar) Insert(ctx context.Context, into io.ReadWriteSeeker, files []File) error { + if ctx == nil { + ctx = context.Background() + } + + // Tar files may end with some, none, or a lot of zero-byte padding. The spec says + // it should end with two 512-byte trailer records consisting solely of null/0 + // bytes: https://www.gnu.org/software/tar/manual/html_node/Standard.html. However, + // in my experiments using the `tar` command, I've found that is not the case, + // and Colin Percival (author of tarsnap) confirmed this: + // - https://twitter.com/cperciva/status/1476774314623913987 + // - https://twitter.com/cperciva/status/1476776999758663680 + // So while this solution on Stack Overflow makes sense if you control the + // writer: https://stackoverflow.com/a/18330903/1048862 - and I did get it + // to work in that case -- it is not a general solution. Seems that the only + // reliable thing to do is scan the entire archive to find the last file, + // read its size, then use that to compute the end of content and thus the + // true length of end-of-archive padding. This is slightly more complex than + // just adding the size of the last file to the current stream/seek position, + // because we have to align to 512-byte blocks precisely. I don't actually + // fully know why this works, but in my testing on a few different files it + // did work, whereas other solutions only worked on 1 specific file. *shrug* + // + // Another option is to scan the file for the last contiguous series of 0s, + // without interpreting the tar format at all, and to find the nearest + // blocksize-offset and start writing there. Problem is that you wouldn't + // know if you just overwrote some of the last file if it ends with all 0s. + // Sigh. + var lastFileSize, lastStreamPos int64 + tr := tar.NewReader(into) for { hdr, err := tr.Next() if err == io.EOF { break } if err != nil { - return "", fmt.Errorf("scanning tarball's file listing: %v", err) - } - files = append(files, hdr.Name) - } - - if multipleTopLevels(files) { - destination = filepath.Join(destination, folderNameFromFileName(sourceArchive)) - } - - return destination, nil -} - -func (t *Tar) untarNext(destination string) error { - f, err := t.Read() - if err != nil { - return err // don't wrap error; calling loop must break on io.EOF - } - defer f.Close() - - header, ok := f.Header.(*tar.Header) - if !ok { - return fmt.Errorf("expected header to be *tar.Header but was %T", f.Header) - } - - errPath := t.CheckPath(destination, header.Name) - if errPath != nil { - return fmt.Errorf("checking path traversal attempt: %v", errPath) - } - - if t.StripComponents > 0 { - if strings.Count(header.Name, "/") < t.StripComponents { - return nil // skip path with fewer components - } - - for i := 0; i < t.StripComponents; i++ { - slash := strings.Index(header.Name, "/") - header.Name = header.Name[slash+1:] - } - } - return t.untarFile(f, destination, header) -} - -func (t *Tar) untarFile(f File, destination string, hdr *tar.Header) error { - to := filepath.Join(destination, hdr.Name) - - // do not overwrite existing files, if configured - if !f.IsDir() && !t.OverwriteExisting && fileExists(to) { - return fmt.Errorf("file already exists: %s", to) - } - - switch hdr.Typeflag { - case tar.TypeDir: - return mkdir(to, f.Mode()) - case tar.TypeReg, tar.TypeRegA, tar.TypeChar, tar.TypeBlock, tar.TypeFifo, tar.TypeGNUSparse: - return writeNewFile(to, f, f.Mode()) - case tar.TypeSymlink: - return writeNewSymbolicLink(to, hdr.Linkname) - case tar.TypeLink: - return writeNewHardLink(to, filepath.Join(destination, hdr.Linkname)) - case tar.TypeXGlobalHeader: - return nil // ignore the pax global header from git-generated tarballs - default: - return fmt.Errorf("%s: unknown type flag: %c", hdr.Name, hdr.Typeflag) - } -} - -func (t *Tar) writeWalk(source, topLevelFolder, destination string) error { - sourceInfo, err := os.Stat(source) - if err != nil { - return fmt.Errorf("%s: stat: %v", source, err) - } - destAbs, err := filepath.Abs(destination) - if err != nil { - return fmt.Errorf("%s: getting absolute path of destination %s: %v", source, destination, err) - } - - return filepath.Walk(source, func(fpath string, info os.FileInfo, err error) error { - handleErr := func(err error) error { - if t.ContinueOnError { - log.Printf("[ERROR] Walking %s: %v", fpath, err) - return nil - } return err } + lastStreamPos, err = into.Seek(0, io.SeekCurrent) if err != nil { - return handleErr(fmt.Errorf("traversing %s: %v", fpath, err)) - } - if info == nil { - return handleErr(fmt.Errorf("no file info")) - } - - // make sure we do not copy our output file into itself - fpathAbs, err := filepath.Abs(fpath) - if err != nil { - return handleErr(fmt.Errorf("%s: getting absolute path: %v", fpath, err)) - } - if within(fpathAbs, destAbs) { - return nil - } - - // build the name to be used within the archive - nameInArchive, err := makeNameInArchive(sourceInfo, source, topLevelFolder, fpath) - if err != nil { - return handleErr(err) - } - - var file io.ReadCloser - if info.Mode().IsRegular() { - file, err = os.Open(fpath) - if err != nil { - return handleErr(fmt.Errorf("%s: opening: %v", fpath, err)) - } - defer file.Close() - } - err = t.Write(File{ - FileInfo: FileInfo{ - FileInfo: info, - CustomName: nameInArchive, - SourcePath: fpath, - }, - ReadCloser: file, - }) - if err != nil { - return handleErr(fmt.Errorf("%s: writing: %s", fpath, err)) - } - - return nil - }) -} - -// Create opens t for writing a tar archive to out. -func (t *Tar) Create(out io.Writer) error { - if t.tw != nil { - return fmt.Errorf("tar archive is already created for writing") - } - - // wrapping writers allows us to output - // compressed tarballs, for example - if t.writerWrapFn != nil { - var err error - out, err = t.writerWrapFn(out) - if err != nil { - return fmt.Errorf("wrapping writer: %v", err) - } - } - - t.tw = tar.NewWriter(out) - return nil -} - -// Write writes f to t, which must have been opened for writing first. -func (t *Tar) Write(f File) error { - if t.tw == nil { - return fmt.Errorf("tar archive was not created for writing first") - } - if f.FileInfo == nil { - return fmt.Errorf("no file info") - } - if f.FileInfo.Name() == "" { - return fmt.Errorf("missing file name") - } - - var linkTarget string - if isSymlink(f) { - fi, ok := f.FileInfo.(FileInfo) - if !ok { - return fmt.Errorf("failed to cast fs.FileInfo to archiver.FileInfo: %v", f) - } - var err error - linkTarget, err = os.Readlink(fi.SourcePath) - if err != nil { - return fmt.Errorf("%s: readlink: %v", fi.SourcePath, err) + return err } + lastFileSize = hdr.Size } - hdr, err := tar.FileInfoHeader(f, filepath.ToSlash(linkTarget)) + // we can now compute the precise location to write the new file to (I think) + const blockSize = 512 // (as of Go 1.17, this is also a hard-coded const in the archive/tar package) + newOffset := lastStreamPos + lastFileSize + newOffset += blockSize - (newOffset % blockSize) // shift to next-nearest block boundary + _, err := into.Seek(newOffset, io.SeekStart) if err != nil { - return fmt.Errorf("%s: making header: %v", f.Name(), err) + return err } - err = t.tw.WriteHeader(hdr) - if err != nil { - return fmt.Errorf("%s: writing header: %w", hdr.Name, err) - } - - if f.IsDir() { - return nil // directories have no contents - } + tw := tar.NewWriter(into) + defer tw.Close() - if hdr.Typeflag == tar.TypeReg { - if f.ReadCloser == nil { - return fmt.Errorf("%s: no way to read file contents", f.Name()) + for i, file := range files { + if err := ctx.Err(); err != nil { + return err // honor context cancellation } - _, err := io.Copy(t.tw, f) + err = t.writeFileToArchive(ctx, tw, file) if err != nil { - return fmt.Errorf("%s: copying contents: %w", f.Name(), err) + if t.ContinueOnError && ctx.Err() == nil { + log.Printf("[ERROR] appending file %d into archive: %s: %v", i, file.Name(), err) + continue + } + return fmt.Errorf("appending file %d into archive: %s: %w", i, file.Name(), err) } } return nil } -// Open opens t for reading an archive from -// in. The size parameter is not used. -func (t *Tar) Open(in io.Reader, size int64) error { - if t.tr != nil { - return fmt.Errorf("tar archive is already open for reading") - } - // wrapping readers allows us to open compressed tarballs - if t.readerWrapFn != nil { - var err error - in, err = t.readerWrapFn(in) - if err != nil { - return fmt.Errorf("wrapping file reader: %v", err) - } - } - t.tr = tar.NewReader(in) - return nil -} - -// Read reads the next file from t, which must have -// already been opened for reading. If there are no -// more files, the error is io.EOF. The File must -// be closed when finished reading from it. -func (t *Tar) Read() (File, error) { - if t.tr == nil { - return File{}, fmt.Errorf("tar archive is not open") - } - - hdr, err := t.tr.Next() - if err != nil { - return File{}, err // don't wrap error; preserve io.EOF - } - - file := File{ - FileInfo: hdr.FileInfo(), - Header: hdr, - ReadCloser: ReadFakeCloser{t.tr}, +func (t Tar) Extract(ctx context.Context, sourceArchive io.Reader, pathsInArchive []string, handleFile FileHandler) error { + if ctx == nil { + ctx = context.Background() } - return file, nil -} - -// Close closes the tar archive(s) opened by Create and Open. -func (t *Tar) Close() error { - var err error - if t.tr != nil { - t.tr = nil - } - if t.tw != nil { - tw := t.tw - t.tw = nil - err = tw.Close() - } - // make sure cleanup of "Reader/Writer wrapper" - // (say that ten times fast) happens AFTER the - // underlying stream is closed - if t.cleanupWrapFn != nil { - t.cleanupWrapFn() - } - return err -} + tr := tar.NewReader(sourceArchive) -// Walk calls walkFn for each visited item in archive. -func (t *Tar) Walk(archive string, walkFn WalkFunc) error { - file, err := os.Open(archive) - if err != nil { - return fmt.Errorf("opening archive file: %v", err) - } - defer file.Close() - - err = t.Open(file, 0) - if err != nil { - return fmt.Errorf("opening archive: %v", err) - } - defer t.Close() + // important to initialize to non-nil, empty value due to how fileIsIncluded works + skipDirs := skipList{} for { - f, err := t.Read() + if err := ctx.Err(); err != nil { + return err // honor context cancellation + } + + hdr, err := tr.Next() if err == io.EOF { break } if err != nil { - if t.ContinueOnError { - log.Printf("[ERROR] Opening next file: %v", err) + if t.ContinueOnError && ctx.Err() == nil { + log.Printf("[ERROR] Advancing to next file in tar archive: %v", err) continue } - return fmt.Errorf("opening next file: %v", err) + return err } - err = walkFn(f) - if err != nil { - if err == ErrStopWalk { - break - } - if t.ContinueOnError { - log.Printf("[ERROR] Walking %s: %v", f.Name(), err) - continue - } - return fmt.Errorf("walking %s: %v", f.Name(), err) + if !fileIsIncluded(pathsInArchive, hdr.Name) { + continue } - } - - return nil -} - -// Extract extracts a single file from the tar archive. -// If the target is a directory, the entire folder will -// be extracted into destination. -func (t *Tar) Extract(source, target, destination string) error { - // target refers to a path inside the archive, which should be clean also - target = path.Clean(target) - - // if the target ends up being a directory, then - // we will continue walking and extracting files - // until we are no longer within that directory - var targetDirPath string - - return t.Walk(source, func(f File) error { - th, ok := f.Header.(*tar.Header) - if !ok { - return fmt.Errorf("expected header to be *tar.Header but was %T", f.Header) + if fileIsIncluded(skipDirs, hdr.Name) { + continue } - - // importantly, cleaning the path strips tailing slash, - // which must be appended to folders within the archive - name := path.Clean(th.Name) - if f.IsDir() && target == name { - targetDirPath = path.Dir(name) + if hdr.Typeflag == tar.TypeXGlobalHeader { + // ignore the pax global header from git-generated tarballs + continue } - if within(target, th.Name) { - // either this is the exact file we want, or is - // in the directory we want to extract - - // build the filename we will extract to - end, err := filepath.Rel(targetDirPath, th.Name) - if err != nil { - return fmt.Errorf("relativizing paths: %v", err) - } - th.Name = end - - // relativize any hardlink names - if th.Typeflag == tar.TypeLink { - th.Linkname = filepath.Join(filepath.Base(filepath.Dir(th.Linkname)), filepath.Base(th.Linkname)) - } - - err = t.untarFile(f, destination, th) - if err != nil { - return fmt.Errorf("extracting file %s: %v", th.Name, err) - } - - // if our target was not a directory, stop walk - if targetDirPath == "" { - return ErrStopWalk - } - } else if targetDirPath != "" { - // finished walking the entire directory - return ErrStopWalk + file := File{ + FileInfo: hdr.FileInfo(), + Header: hdr, + NameInArchive: hdr.Name, + LinkTarget: hdr.Linkname, + Open: func() (io.ReadCloser, error) { return io.NopCloser(tr), nil }, } - return nil - }) -} - -// Match returns true if the format of file matches this -// type's format. It should not affect reader position. -func (*Tar) Match(file io.ReadSeeker) (bool, error) { - currentPos, err := file.Seek(0, io.SeekCurrent) - if err != nil { - return false, err - } - _, err = file.Seek(0, 0) - if err != nil { - return false, err - } - defer func() { - _, _ = file.Seek(currentPos, io.SeekStart) - }() - - buf := make([]byte, tarBlockSize) - if _, err = io.ReadFull(file, buf); err != nil { - return false, nil - } - return hasTarHeader(buf), nil -} - -// hasTarHeader checks passed bytes has a valid tar header or not. buf must -// contain at least 512 bytes and if not, it always returns false. -func hasTarHeader(buf []byte) bool { - if len(buf) < tarBlockSize { - return false - } - - b := buf[148:156] - b = bytes.Trim(b, " \x00") // clean up all spaces and null bytes - if len(b) == 0 { - return false // unknown format - } - hdrSum, err := strconv.ParseUint(string(b), 8, 64) - if err != nil { - return false - } - - // According to the go official archive/tar, Sun tar uses signed byte - // values so this calcs both signed and unsigned - var usum uint64 - var sum int64 - for i, c := range buf { - if 148 <= i && i < 156 { - c = ' ' // checksum field itself is counted as branks + err = handleFile(ctx, file) + if errors.Is(err, fs.SkipDir) { + // if a directory, skip this path; if a file, skip the folder path + dirPath := hdr.Name + if hdr.Typeflag != tar.TypeDir { + dirPath = path.Dir(hdr.Name) + } + skipDirs.add(dirPath) + } else if err != nil { + return fmt.Errorf("handling file: %s: %w", hdr.Name, err) } - usum += uint64(uint8(c)) - sum += int64(int8(c)) } - if hdrSum != usum && int64(hdrSum) != sum { - return false // invalid checksum - } - - return true -} - -func (t *Tar) String() string { return "tar" } - -// NewTar returns a new, default instance ready to be customized and used. -func NewTar() *Tar { - return &Tar{ - MkdirAll: true, - } + return nil } -const tarBlockSize = 512 - -// Compile-time checks to ensure type implements desired interfaces. +// Interface guards var ( - _ = Reader(new(Tar)) - _ = Writer(new(Tar)) - _ = Archiver(new(Tar)) - _ = Unarchiver(new(Tar)) - _ = Walker(new(Tar)) - _ = Extractor(new(Tar)) - _ = Matcher(new(Tar)) - _ = ExtensionChecker(new(Tar)) - _ = FilenameChecker(new(Tar)) + _ Archiver = (*Tar)(nil) + _ Extractor = (*Tar)(nil) + _ Inserter = (*Tar)(nil) ) - -// DefaultTar is a default instance that is conveniently ready to use. -var DefaultTar = NewTar() diff --git a/tar_test.go b/tar_test.go deleted file mode 100644 index 7a9d3541..00000000 --- a/tar_test.go +++ /dev/null @@ -1,67 +0,0 @@ -package archiver_test - -import ( - "io/ioutil" - "os" - "path" - "testing" - - "github.com/mholt/archiver/v3" -) - -func requireRegularFile(t *testing.T, path string) os.FileInfo { - fileInfo, err := os.Stat(path) - if err != nil { - t.Fatalf("fileInfo on '%s': %v", path, err) - } - - if !fileInfo.Mode().IsRegular() { - t.Fatalf("'%s' expected to be a regular file", path) - } - - return fileInfo -} - -func assertSameFile(t *testing.T, f1, f2 os.FileInfo) { - if !os.SameFile(f1, f2) { - t.Errorf("expected '%s' and '%s' to be the same file", f1.Name(), f2.Name()) - } -} - -func TestDefaultTar_Unarchive_HardlinkSuccess(t *testing.T) { - source := "testdata/gnu-hardlinks.tar" - - destination, err := ioutil.TempDir("", "archiver_tar_test") - if err != nil { - t.Fatalf("creating temp dir: %v", err) - } - defer os.RemoveAll(destination) - - err = archiver.DefaultTar.Unarchive(source, destination) - if err != nil { - t.Fatalf("unarchiving '%s' to '%s': %v", source, destination, err) - } - - fileaInfo := requireRegularFile(t, path.Join(destination, "dir-1", "dir-2", "file-a")) - filebInfo := requireRegularFile(t, path.Join(destination, "dir-1", "dir-2", "file-b")) - assertSameFile(t, fileaInfo, filebInfo) -} - -func TestDefaultTar_Extract_HardlinkSuccess(t *testing.T) { - source := "testdata/gnu-hardlinks.tar" - - destination, err := ioutil.TempDir("", "archiver_tar_test") - if err != nil { - t.Fatalf("creating temp dir: %v", err) - } - defer os.RemoveAll(destination) - - err = archiver.DefaultTar.Extract(source, path.Join("dir-1", "dir-2"), destination) - if err != nil { - t.Fatalf("unarchiving '%s' to '%s': %v", source, destination, err) - } - - fileaInfo := requireRegularFile(t, path.Join(destination, "dir-2", "file-a")) - filebInfo := requireRegularFile(t, path.Join(destination, "dir-2", "file-b")) - assertSameFile(t, fileaInfo, filebInfo) -} diff --git a/tarbrotli.go b/tarbrotli.go deleted file mode 100644 index 83a455d6..00000000 --- a/tarbrotli.go +++ /dev/null @@ -1,114 +0,0 @@ -package archiver - -import ( - "fmt" - "io" - "strings" - - "github.com/andybalholm/brotli" -) - -// TarBrotli facilitates brotli compression of tarball archives. -type TarBrotli struct { - *Tar - Quality int -} - -// CheckExt ensures the file extension matches the format. -func (*TarBrotli) CheckExt(filename string) error { - if !strings.HasSuffix(filename, ".tar.br") && - !strings.HasSuffix(filename, ".tbr") { - return fmt.Errorf("filename must have a .tar.br or .tbr extension") - } - return nil -} - -// Archive creates a compressed tar file at destination -// containing the files listed in sources. The destination -// must end with ".tar.br" or ".tbr". File paths can be -// those of regular files or directories; directories will -// be recursively added. -func (tbr *TarBrotli) Archive(sources []string, destination string) error { - err := tbr.CheckExt(destination) - if err != nil { - return fmt.Errorf("output %s", err.Error()) - } - tbr.wrapWriter() - return tbr.Tar.Archive(sources, destination) -} - -// Unarchive unpacks the compressed tarball at -// source to destination. Destination will be -// treated as a folder name. -func (tbr *TarBrotli) Unarchive(source, destination string) error { - tbr.wrapReader() - return tbr.Tar.Unarchive(source, destination) -} - -// Walk calls walkFn for each visited item in archive. -func (tbr *TarBrotli) Walk(archive string, walkFn WalkFunc) error { - tbr.wrapReader() - return tbr.Tar.Walk(archive, walkFn) -} - -// Create opens txz for writing a compressed -// tar archive to out. -func (tbr *TarBrotli) Create(out io.Writer) error { - tbr.wrapWriter() - return tbr.Tar.Create(out) -} - -// Open opens t for reading a compressed archive from -// in. The size parameter is not used. -func (tbr *TarBrotli) Open(in io.Reader, size int64) error { - tbr.wrapReader() - return tbr.Tar.Open(in, size) -} - -// Extract extracts a single file from the tar archive. -// If the target is a directory, the entire folder will -// be extracted into destination. -func (tbr *TarBrotli) Extract(source, target, destination string) error { - tbr.wrapReader() - return tbr.Tar.Extract(source, target, destination) -} - -func (tbr *TarBrotli) wrapWriter() { - var brw *brotli.Writer - tbr.Tar.writerWrapFn = func(w io.Writer) (io.Writer, error) { - brw = brotli.NewWriterLevel(w, tbr.Quality) - return brw, nil - } - tbr.Tar.cleanupWrapFn = func() { - brw.Close() - } -} - -func (tbr *TarBrotli) wrapReader() { - tbr.Tar.readerWrapFn = func(r io.Reader) (io.Reader, error) { - return brotli.NewReader(r), nil - } -} - -func (tbr *TarBrotli) String() string { return "tar.br" } - -// NewTarBrotli returns a new, default instance ready to be customized and used. -func NewTarBrotli() *TarBrotli { - return &TarBrotli{ - Tar: NewTar(), - Quality: brotli.DefaultCompression, - } -} - -// Compile-time checks to ensure type implements desired interfaces. -var ( - _ = Reader(new(TarBrotli)) - _ = Writer(new(TarBrotli)) - _ = Archiver(new(TarBrotli)) - _ = Unarchiver(new(TarBrotli)) - _ = Walker(new(TarBrotli)) - _ = Extractor(new(TarBrotli)) -) - -// DefaultTarBrotli is a convenient archiver ready to use. -var DefaultTarBrotli = NewTarBrotli() diff --git a/tarbz2.go b/tarbz2.go deleted file mode 100644 index e5870a7d..00000000 --- a/tarbz2.go +++ /dev/null @@ -1,126 +0,0 @@ -package archiver - -import ( - "fmt" - "io" - "strings" - - "github.com/dsnet/compress/bzip2" -) - -// TarBz2 facilitates bzip2 compression -// (https://github.com/dsnet/compress/blob/master/doc/bzip2-format.pdf) -// of tarball archives. -type TarBz2 struct { - *Tar - - CompressionLevel int -} - -// CheckExt ensures the file extension matches the format. -func (*TarBz2) CheckExt(filename string) error { - if !strings.HasSuffix(filename, ".tar.bz2") && - !strings.HasSuffix(filename, ".tbz2") { - return fmt.Errorf("filename must have a .tar.bz2 or .tbz2 extension") - } - return nil -} - -// Archive creates a compressed tar file at destination -// containing the files listed in sources. The destination -// must end with ".tar.bz2" or ".tbz2". File paths can be -// those of regular files or directories; directories will -// be recursively added. -func (tbz2 *TarBz2) Archive(sources []string, destination string) error { - err := tbz2.CheckExt(destination) - if err != nil { - return fmt.Errorf("output %s", err.Error()) - } - tbz2.wrapWriter() - return tbz2.Tar.Archive(sources, destination) -} - -// Unarchive unpacks the compressed tarball at -// source to destination. Destination will be -// treated as a folder name. -func (tbz2 *TarBz2) Unarchive(source, destination string) error { - tbz2.wrapReader() - return tbz2.Tar.Unarchive(source, destination) -} - -// Walk calls walkFn for each visited item in archive. -func (tbz2 *TarBz2) Walk(archive string, walkFn WalkFunc) error { - tbz2.wrapReader() - return tbz2.Tar.Walk(archive, walkFn) -} - -// Create opens tbz2 for writing a compressed -// tar archive to out. -func (tbz2 *TarBz2) Create(out io.Writer) error { - tbz2.wrapWriter() - return tbz2.Tar.Create(out) -} - -// Open opens t for reading a compressed archive from -// in. The size parameter is not used. -func (tbz2 *TarBz2) Open(in io.Reader, size int64) error { - tbz2.wrapReader() - return tbz2.Tar.Open(in, size) -} - -// Extract extracts a single file from the tar archive. -// If the target is a directory, the entire folder will -// be extracted into destination. -func (tbz2 *TarBz2) Extract(source, target, destination string) error { - tbz2.wrapReader() - return tbz2.Tar.Extract(source, target, destination) -} - -func (tbz2 *TarBz2) wrapWriter() { - var bz2w *bzip2.Writer - tbz2.Tar.writerWrapFn = func(w io.Writer) (io.Writer, error) { - var err error - bz2w, err = bzip2.NewWriter(w, &bzip2.WriterConfig{ - Level: tbz2.CompressionLevel, - }) - return bz2w, err - } - tbz2.Tar.cleanupWrapFn = func() { - bz2w.Close() - } -} - -func (tbz2 *TarBz2) wrapReader() { - var bz2r *bzip2.Reader - tbz2.Tar.readerWrapFn = func(r io.Reader) (io.Reader, error) { - var err error - bz2r, err = bzip2.NewReader(r, nil) - return bz2r, err - } - tbz2.Tar.cleanupWrapFn = func() { - bz2r.Close() - } -} - -func (tbz2 *TarBz2) String() string { return "tar.bz2" } - -// NewTarBz2 returns a new, default instance ready to be customized and used. -func NewTarBz2() *TarBz2 { - return &TarBz2{ - CompressionLevel: bzip2.DefaultCompression, - Tar: NewTar(), - } -} - -// Compile-time checks to ensure type implements desired interfaces. -var ( - _ = Reader(new(TarBz2)) - _ = Writer(new(TarBz2)) - _ = Archiver(new(TarBz2)) - _ = Unarchiver(new(TarBz2)) - _ = Walker(new(TarBz2)) - _ = Extractor(new(TarBz2)) -) - -// DefaultTarBz2 is a convenient archiver ready to use. -var DefaultTarBz2 = NewTarBz2() diff --git a/targz.go b/targz.go deleted file mode 100644 index 283fd01b..00000000 --- a/targz.go +++ /dev/null @@ -1,137 +0,0 @@ -package archiver - -import ( - "fmt" - "io" - "strings" - - "github.com/klauspost/compress/gzip" - "github.com/klauspost/pgzip" -) - -// TarGz facilitates gzip compression -// (RFC 1952) of tarball archives. -type TarGz struct { - *Tar - - // The compression level to use, as described - // in the compress/gzip package. - CompressionLevel int - - // Disables parallel gzip. - SingleThreaded bool -} - -// CheckExt ensures the file extension matches the format. -func (*TarGz) CheckExt(filename string) error { - if !strings.HasSuffix(filename, ".tar.gz") && - !strings.HasSuffix(filename, ".tgz") { - return fmt.Errorf("filename must have a .tar.gz or .tgz extension") - } - return nil -} - -// Archive creates a compressed tar file at destination -// containing the files listed in sources. The destination -// must end with ".tar.gz" or ".tgz". File paths can be -// those of regular files or directories; directories will -// be recursively added. -func (tgz *TarGz) Archive(sources []string, destination string) error { - err := tgz.CheckExt(destination) - if err != nil { - return fmt.Errorf("output %s", err.Error()) - } - tgz.wrapWriter() - return tgz.Tar.Archive(sources, destination) -} - -// Unarchive unpacks the compressed tarball at -// source to destination. Destination will be -// treated as a folder name. -func (tgz *TarGz) Unarchive(source, destination string) error { - tgz.wrapReader() - return tgz.Tar.Unarchive(source, destination) -} - -// Walk calls walkFn for each visited item in archive. -func (tgz *TarGz) Walk(archive string, walkFn WalkFunc) error { - tgz.wrapReader() - return tgz.Tar.Walk(archive, walkFn) -} - -// Create opens txz for writing a compressed -// tar archive to out. -func (tgz *TarGz) Create(out io.Writer) error { - tgz.wrapWriter() - return tgz.Tar.Create(out) -} - -// Open opens t for reading a compressed archive from -// in. The size parameter is not used. -func (tgz *TarGz) Open(in io.Reader, size int64) error { - tgz.wrapReader() - return tgz.Tar.Open(in, size) -} - -// Extract extracts a single file from the tar archive. -// If the target is a directory, the entire folder will -// be extracted into destination. -func (tgz *TarGz) Extract(source, target, destination string) error { - tgz.wrapReader() - return tgz.Tar.Extract(source, target, destination) -} - -func (tgz *TarGz) wrapWriter() { - var gzw io.WriteCloser - tgz.Tar.writerWrapFn = func(w io.Writer) (io.Writer, error) { - var err error - if tgz.SingleThreaded { - gzw, err = gzip.NewWriterLevel(w, tgz.CompressionLevel) - } else { - gzw, err = pgzip.NewWriterLevel(w, tgz.CompressionLevel) - } - return gzw, err - } - tgz.Tar.cleanupWrapFn = func() { - gzw.Close() - } -} - -func (tgz *TarGz) wrapReader() { - var gzr io.ReadCloser - tgz.Tar.readerWrapFn = func(r io.Reader) (io.Reader, error) { - var err error - if tgz.SingleThreaded { - gzr, err = gzip.NewReader(r) - } else { - gzr, err = pgzip.NewReader(r) - } - return gzr, err - } - tgz.Tar.cleanupWrapFn = func() { - gzr.Close() - } -} - -func (tgz *TarGz) String() string { return "tar.gz" } - -// NewTarGz returns a new, default instance ready to be customized and used. -func NewTarGz() *TarGz { - return &TarGz{ - CompressionLevel: gzip.DefaultCompression, - Tar: NewTar(), - } -} - -// Compile-time checks to ensure type implements desired interfaces. -var ( - _ = Reader(new(TarGz)) - _ = Writer(new(TarGz)) - _ = Archiver(new(TarGz)) - _ = Unarchiver(new(TarGz)) - _ = Walker(new(TarGz)) - _ = Extractor(new(TarGz)) -) - -// DefaultTarGz is a convenient archiver ready to use. -var DefaultTarGz = NewTarGz() diff --git a/tarlz4.go b/tarlz4.go deleted file mode 100644 index 42cbc90b..00000000 --- a/tarlz4.go +++ /dev/null @@ -1,129 +0,0 @@ -package archiver - -import ( - "fmt" - "io" - "strings" - - "github.com/pierrec/lz4/v4" -) - -// TarLz4 facilitates lz4 compression -// (https://github.com/lz4/lz4/tree/master/doc) -// of tarball archives. -type TarLz4 struct { - *Tar - - // The compression level to use when writing. - // Minimum 0 (fast compression), maximum 12 - // (most space savings). - CompressionLevel int -} - -// CheckExt ensures the file extension matches the format. -func (*TarLz4) CheckExt(filename string) error { - if !strings.HasSuffix(filename, ".tar.lz4") && - !strings.HasSuffix(filename, ".tlz4") { - - return fmt.Errorf("filename must have a .tar.lz4 or .tlz4 extension") - } - return nil -} - -// Archive creates a compressed tar file at destination -// containing the files listed in sources. The destination -// must end with ".tar.lz4" or ".tlz4". File paths can be -// those of regular files or directories; directories will -// be recursively added. -func (tlz4 *TarLz4) Archive(sources []string, destination string) error { - err := tlz4.CheckExt(destination) - if err != nil { - return fmt.Errorf("output %s", err.Error()) - } - tlz4.wrapWriter() - return tlz4.Tar.Archive(sources, destination) -} - -// Unarchive unpacks the compressed tarball at -// source to destination. Destination will be -// treated as a folder name. -func (tlz4 *TarLz4) Unarchive(source, destination string) error { - tlz4.wrapReader() - return tlz4.Tar.Unarchive(source, destination) -} - -// Walk calls walkFn for each visited item in archive. -func (tlz4 *TarLz4) Walk(archive string, walkFn WalkFunc) error { - tlz4.wrapReader() - return tlz4.Tar.Walk(archive, walkFn) -} - -// Create opens tlz4 for writing a compressed -// tar archive to out. -func (tlz4 *TarLz4) Create(out io.Writer) error { - tlz4.wrapWriter() - return tlz4.Tar.Create(out) -} - -// Open opens t for reading a compressed archive from -// in. The size parameter is not used. -func (tlz4 *TarLz4) Open(in io.Reader, size int64) error { - tlz4.wrapReader() - return tlz4.Tar.Open(in, size) -} - -// Extract extracts a single file from the tar archive. -// If the target is a directory, the entire folder will -// be extracted into destination. -func (tlz4 *TarLz4) Extract(source, target, destination string) error { - tlz4.wrapReader() - return tlz4.Tar.Extract(source, target, destination) -} - -func (tlz4 *TarLz4) wrapWriter() { - var lz4w *lz4.Writer - tlz4.Tar.writerWrapFn = func(w io.Writer) (io.Writer, error) { - lz4w = lz4.NewWriter(w) - // TODO archiver v4: use proper lz4.Fast - // bitshifting for backwards compatibility with lz4/v3 - options := []lz4.Option{ - lz4.CompressionLevelOption(lz4.CompressionLevel(1 << (8 + tlz4.CompressionLevel))), - } - if err := lz4w.Apply(options...); err != nil { - return lz4w, err - } - return lz4w, nil - } - tlz4.Tar.cleanupWrapFn = func() { - lz4w.Close() - } -} - -func (tlz4 *TarLz4) wrapReader() { - tlz4.Tar.readerWrapFn = func(r io.Reader) (io.Reader, error) { - return lz4.NewReader(r), nil - } -} - -func (tlz4 *TarLz4) String() string { return "tar.lz4" } - -// NewTarLz4 returns a new, default instance ready to be customized and used. -func NewTarLz4() *TarLz4 { - return &TarLz4{ - CompressionLevel: 9, // https://github.com/lz4/lz4/blob/1b819bfd633ae285df2dfe1b0589e1ec064f2873/lib/lz4hc.h#L48 - Tar: NewTar(), - } -} - -// Compile-time checks to ensure type implements desired interfaces. -var ( - _ = Reader(new(TarLz4)) - _ = Writer(new(TarLz4)) - _ = Archiver(new(TarLz4)) - _ = Unarchiver(new(TarLz4)) - _ = Walker(new(TarLz4)) - _ = Extractor(new(TarLz4)) -) - -// DefaultTarLz4 is a convenient archiver ready to use. -var DefaultTarLz4 = NewTarLz4() diff --git a/tarsz.go b/tarsz.go deleted file mode 100644 index ee3808e6..00000000 --- a/tarsz.go +++ /dev/null @@ -1,114 +0,0 @@ -package archiver - -import ( - "fmt" - "io" - "strings" - - "github.com/golang/snappy" -) - -// TarSz facilitates Snappy compression -// (https://github.com/google/snappy) -// of tarball archives. -type TarSz struct { - *Tar -} - -// CheckExt ensures the file extension matches the format. -func (*TarSz) CheckExt(filename string) error { - if !strings.HasSuffix(filename, ".tar.sz") && - !strings.HasSuffix(filename, ".tsz") { - return fmt.Errorf("filename must have a .tar.sz or .tsz extension") - } - return nil -} - -// Archive creates a compressed tar file at destination -// containing the files listed in sources. The destination -// must end with ".tar.sz" or ".tsz". File paths can be -// those of regular files or directories; directories will -// be recursively added. -func (tsz *TarSz) Archive(sources []string, destination string) error { - err := tsz.CheckExt(destination) - if err != nil { - return fmt.Errorf("output %s", err.Error()) - } - tsz.wrapWriter() - return tsz.Tar.Archive(sources, destination) -} - -// Unarchive unpacks the compressed tarball at -// source to destination. Destination will be -// treated as a folder name. -func (tsz *TarSz) Unarchive(source, destination string) error { - tsz.wrapReader() - return tsz.Tar.Unarchive(source, destination) -} - -// Walk calls walkFn for each visited item in archive. -func (tsz *TarSz) Walk(archive string, walkFn WalkFunc) error { - tsz.wrapReader() - return tsz.Tar.Walk(archive, walkFn) -} - -// Create opens tsz for writing a compressed -// tar archive to out. -func (tsz *TarSz) Create(out io.Writer) error { - tsz.wrapWriter() - return tsz.Tar.Create(out) -} - -// Open opens t for reading a compressed archive from -// in. The size parameter is not used. -func (tsz *TarSz) Open(in io.Reader, size int64) error { - tsz.wrapReader() - return tsz.Tar.Open(in, size) -} - -// Extract extracts a single file from the tar archive. -// If the target is a directory, the entire folder will -// be extracted into destination. -func (tsz *TarSz) Extract(source, target, destination string) error { - tsz.wrapReader() - return tsz.Tar.Extract(source, target, destination) -} - -func (tsz *TarSz) wrapWriter() { - var sw *snappy.Writer - tsz.Tar.writerWrapFn = func(w io.Writer) (io.Writer, error) { - sw = snappy.NewBufferedWriter(w) - return sw, nil - } - tsz.Tar.cleanupWrapFn = func() { - sw.Close() - } -} - -func (tsz *TarSz) wrapReader() { - tsz.Tar.readerWrapFn = func(r io.Reader) (io.Reader, error) { - return snappy.NewReader(r), nil - } -} - -func (tsz *TarSz) String() string { return "tar.sz" } - -// NewTarSz returns a new, default instance ready to be customized and used. -func NewTarSz() *TarSz { - return &TarSz{ - Tar: NewTar(), - } -} - -// Compile-time checks to ensure type implements desired interfaces. -var ( - _ = Reader(new(TarSz)) - _ = Writer(new(TarSz)) - _ = Archiver(new(TarSz)) - _ = Unarchiver(new(TarSz)) - _ = Walker(new(TarSz)) - _ = Extractor(new(TarSz)) -) - -// DefaultTarSz is a convenient archiver ready to use. -var DefaultTarSz = NewTarSz() diff --git a/tarxz.go b/tarxz.go deleted file mode 100644 index 5679a067..00000000 --- a/tarxz.go +++ /dev/null @@ -1,119 +0,0 @@ -package archiver - -import ( - "fmt" - "io" - "strings" - - "github.com/ulikunitz/xz" - fastxz "github.com/xi2/xz" -) - -// TarXz facilitates xz compression -// (https://tukaani.org/xz/format.html) -// of tarball archives. -type TarXz struct { - *Tar -} - -// CheckExt ensures the file extension matches the format. -func (*TarXz) CheckExt(filename string) error { - if !strings.HasSuffix(filename, ".tar.xz") && - !strings.HasSuffix(filename, ".txz") { - return fmt.Errorf("filename must have a .tar.xz or .txz extension") - } - return nil -} - -// Archive creates a compressed tar file at destination -// containing the files listed in sources. The destination -// must end with ".tar.xz" or ".txz". File paths can be -// those of regular files or directories; directories will -// be recursively added. -func (txz *TarXz) Archive(sources []string, destination string) error { - err := txz.CheckExt(destination) - if err != nil { - return fmt.Errorf("output %s", err.Error()) - } - txz.wrapWriter() - return txz.Tar.Archive(sources, destination) -} - -// Unarchive unpacks the compressed tarball at -// source to destination. Destination will be -// treated as a folder name. -func (txz *TarXz) Unarchive(source, destination string) error { - txz.wrapReader() - return txz.Tar.Unarchive(source, destination) -} - -// Walk calls walkFn for each visited item in archive. -func (txz *TarXz) Walk(archive string, walkFn WalkFunc) error { - txz.wrapReader() - return txz.Tar.Walk(archive, walkFn) -} - -// Create opens txz for writing a compressed -// tar archive to out. -func (txz *TarXz) Create(out io.Writer) error { - txz.wrapWriter() - return txz.Tar.Create(out) -} - -// Open opens t for reading a compressed archive from -// in. The size parameter is not used. -func (txz *TarXz) Open(in io.Reader, size int64) error { - txz.wrapReader() - return txz.Tar.Open(in, size) -} - -// Extract extracts a single file from the tar archive. -// If the target is a directory, the entire folder will -// be extracted into destination. -func (txz *TarXz) Extract(source, target, destination string) error { - txz.wrapReader() - return txz.Tar.Extract(source, target, destination) -} - -func (txz *TarXz) wrapWriter() { - var xzw *xz.Writer - txz.Tar.writerWrapFn = func(w io.Writer) (io.Writer, error) { - var err error - xzw, err = xz.NewWriter(w) - return xzw, err - } - txz.Tar.cleanupWrapFn = func() { - xzw.Close() - } -} - -func (txz *TarXz) wrapReader() { - var xzr *fastxz.Reader - txz.Tar.readerWrapFn = func(r io.Reader) (io.Reader, error) { - var err error - xzr, err = fastxz.NewReader(r, 0) - return xzr, err - } -} - -func (txz *TarXz) String() string { return "tar.xz" } - -// NewTarXz returns a new, default instance ready to be customized and used. -func NewTarXz() *TarXz { - return &TarXz{ - Tar: NewTar(), - } -} - -// Compile-time checks to ensure type implements desired interfaces. -var ( - _ = Reader(new(TarXz)) - _ = Writer(new(TarXz)) - _ = Archiver(new(TarXz)) - _ = Unarchiver(new(TarXz)) - _ = Walker(new(TarXz)) - _ = Extractor(new(TarXz)) -) - -// DefaultTarXz is a convenient archiver ready to use. -var DefaultTarXz = NewTarXz() diff --git a/tarzst.go b/tarzst.go deleted file mode 100644 index 3b2fe431..00000000 --- a/tarzst.go +++ /dev/null @@ -1,120 +0,0 @@ -package archiver - -import ( - "fmt" - "io" - "strings" - - "github.com/klauspost/compress/zstd" -) - -// TarZstd facilitates Zstandard compression -// (RFC 8478) of tarball archives. -type TarZstd struct { - *Tar -} - -// CheckExt ensures the file extension matches the format. -func (*TarZstd) CheckExt(filename string) error { - if !strings.HasSuffix(filename, ".tar.zst") { - return fmt.Errorf("filename must have a .tar.zst extension") - } - return nil -} - -// Archive creates a compressed tar file at destination -// containing the files listed in sources. The destination -// must end with ".tar.zst" or ".tzst". File paths can be -// those of regular files or directories; directories will -// be recursively added. -func (tzst *TarZstd) Archive(sources []string, destination string) error { - err := tzst.CheckExt(destination) - if err != nil { - return fmt.Errorf("output %s", err.Error()) - } - tzst.wrapWriter() - return tzst.Tar.Archive(sources, destination) -} - -// Unarchive unpacks the compressed tarball at -// source to destination. Destination will be -// treated as a folder name. -func (tzst *TarZstd) Unarchive(source, destination string) error { - tzst.wrapReader() - return tzst.Tar.Unarchive(source, destination) -} - -// Walk calls walkFn for each visited item in archive. -func (tzst *TarZstd) Walk(archive string, walkFn WalkFunc) error { - tzst.wrapReader() - return tzst.Tar.Walk(archive, walkFn) -} - -// Create opens txz for writing a compressed -// tar archive to out. -func (tzst *TarZstd) Create(out io.Writer) error { - tzst.wrapWriter() - return tzst.Tar.Create(out) -} - -// Open opens t for reading a compressed archive from -// in. The size parameter is not used. -func (tzst *TarZstd) Open(in io.Reader, size int64) error { - tzst.wrapReader() - return tzst.Tar.Open(in, size) -} - -// Extract extracts a single file from the tar archive. -// If the target is a directory, the entire folder will -// be extracted into destination. -func (tzst *TarZstd) Extract(source, target, destination string) error { - tzst.wrapReader() - return tzst.Tar.Extract(source, target, destination) -} - -func (tzst *TarZstd) wrapWriter() { - var zstdw *zstd.Encoder - tzst.Tar.writerWrapFn = func(w io.Writer) (io.Writer, error) { - var err error - zstdw, err = zstd.NewWriter(w) - return zstdw, err - } - tzst.Tar.cleanupWrapFn = func() { - zstdw.Close() - } -} - -func (tzst *TarZstd) wrapReader() { - var zstdr *zstd.Decoder - tzst.Tar.readerWrapFn = func(r io.Reader) (io.Reader, error) { - var err error - zstdr, err = zstd.NewReader(r) - return zstdr, err - } - tzst.Tar.cleanupWrapFn = func() { - zstdr.Close() - } -} - -func (tzst *TarZstd) String() string { return "tar.zst" } - -// NewTarZstd returns a new, default instance ready to be customized and used. -func NewTarZstd() *TarZstd { - return &TarZstd{ - Tar: NewTar(), - } -} - -// Compile-time checks to ensure type implements desired interfaces. -var ( - _ = Reader(new(TarZstd)) - _ = Writer(new(TarZstd)) - _ = Archiver(new(TarZstd)) - _ = Unarchiver(new(TarZstd)) - _ = Walker(new(TarZstd)) - _ = ExtensionChecker(new(TarZstd)) - _ = Extractor(new(TarZstd)) -) - -// DefaultTarZstd is a convenient archiver ready to use. -var DefaultTarZstd = NewTarZstd() diff --git a/testdata/corpus/already-compressed.jpg b/testdata/corpus/already-compressed.jpg deleted file mode 100644 index 3d599f80..00000000 Binary files a/testdata/corpus/already-compressed.jpg and /dev/null differ diff --git a/testdata/corpus/proverbs/extra/proverb3.txt b/testdata/corpus/proverbs/extra/proverb3.txt deleted file mode 100644 index 4a4768d4..00000000 --- a/testdata/corpus/proverbs/extra/proverb3.txt +++ /dev/null @@ -1,2 +0,0 @@ -"interface{} says nothing." - - Rob Pike \ No newline at end of file diff --git a/testdata/corpus/proverbs/proverb1.txt b/testdata/corpus/proverbs/proverb1.txt deleted file mode 100644 index 88da02ef..00000000 --- a/testdata/corpus/proverbs/proverb1.txt +++ /dev/null @@ -1,2 +0,0 @@ -"Channels orchestrate; mutexes serialize." - - Rob Pike \ No newline at end of file diff --git a/testdata/corpus/proverbs/proverb2.txt b/testdata/corpus/proverbs/proverb2.txt deleted file mode 100644 index 8e075027..00000000 --- a/testdata/corpus/proverbs/proverb2.txt +++ /dev/null @@ -1,2 +0,0 @@ -"A little copying is better than a little dependency." - - Rob Pike \ No newline at end of file diff --git a/testdata/corpus/quote1.txt b/testdata/corpus/quote1.txt deleted file mode 100644 index 1c34480d..00000000 --- a/testdata/corpus/quote1.txt +++ /dev/null @@ -1,2 +0,0 @@ -"Go has generics; they're called interfaces." - - Matt Holt \ No newline at end of file diff --git a/testdata/create-evil-tar.go b/testdata/create-evil-tar.go deleted file mode 100644 index 46c01eeb..00000000 --- a/testdata/create-evil-tar.go +++ /dev/null @@ -1,74 +0,0 @@ -package main - -import ( - "archive/tar" - "fmt" - "log" - "os" - "time" -) - -func main() { - // Create a file to write our archive to. - tarname := "double-evil.tar" - fw, err := os.Create(tarname) - if nil != err { - log.Fatal(err) - return - } - - // Create a new tar archive. - tw := tar.NewWriter(fw) - - // Write the evil symlink, it points outside of the target directory - hdr := &tar.Header{ - Name: "bad/file.txt", - Mode: 0644, - Typeflag: tar.TypeSymlink, - Linkname: "../../badfile.txt", - ModTime: time.Now(), - } - if err := tw.WriteHeader(hdr); err != nil { - log.Fatal(err) - return - } - - // Write safe files to the archive. - var files = []struct { - Name, Body string - }{ - {"goodfile.txt", "hello world"}, - {"morefile.txt", "hello world"}, - {"bad/file.txt", "Mwa-ha-ha"}, - } - for _, file := range files { - hdr := &tar.Header{ - Name: file.Name, - Mode: 0644, - Size: int64(len(file.Body)), - ModTime: time.Now(), - } - - if err := tw.WriteHeader(hdr); err != nil { - log.Fatal(err) - return - } - - if _, err := tw.Write([]byte(file.Body)); err != nil { - log.Fatal(err) - } - } - - // Close the in-memory archive so that it writes trailing data - err = tw.Close() - if err != nil { - log.Fatal(err) - } - fmt.Printf("Wrote %s\n", tarname) - - // close the on-disk archive so that it flushes all bytes - if err = fw.Close(); err != nil { - log.Fatal(err) - return - } -} diff --git a/testdata/create-evil-zip.go b/testdata/create-evil-zip.go deleted file mode 100644 index 197b6475..00000000 --- a/testdata/create-evil-zip.go +++ /dev/null @@ -1,75 +0,0 @@ -package main - -import ( - "log" - "os" - "time" - - "github.com/klauspost/compress/zip" -) - -func main() { - // Create a buffer to write our archive to. - fw, err := os.Create("double-evil.zip") - if nil != err { - log.Fatal(err) - return - } - - // Create a new zip archive. - w := zip.NewWriter(fw) - - // Write the evil symlink - h := &zip.FileHeader{ - Name: "bad/file.txt", - Method: zip.Deflate, - Modified: time.Now(), - } - h.SetMode(os.ModeSymlink) - header, err := w.CreateHeader(h) - if err != nil { - log.Fatal(err) - } - // The evil symlink points outside of the target directory - _, err = header.Write([]byte("../../badfile.txt")) - if err != nil { - log.Fatal(err) - } - - // Write safe files to the archive. - var files = []struct { - Name, Body string - }{ - {"goodfile.txt", "hello world"}, - {"morefile.txt", "hello world"}, - {"bad/file.txt", "Mwa-ha-ha"}, - } - for _, file := range files { - h := &zip.FileHeader{ - Name: file.Name, - Method: zip.Deflate, - Modified: time.Now(), - } - - header, err := w.CreateHeader(h) - if err != nil { - log.Fatal(err) - } - - _, err = header.Write([]byte(file.Body)) - if err != nil { - log.Fatal(err) - } - } - - // close the in-memory archive so that it writes trailing data - if err = w.Close(); err != nil { - log.Fatal(err) - } - - // close the on-disk archive so that it flushes all bytes - if err = fw.Close(); err != nil { - log.Fatal(err) - return - } -} diff --git a/testdata/gnu-hardlinks.tar b/testdata/gnu-hardlinks.tar deleted file mode 100644 index 25fffda3..00000000 Binary files a/testdata/gnu-hardlinks.tar and /dev/null differ diff --git a/testdata/sample.rar b/testdata/sample.rar deleted file mode 100644 index 3cc7c33b..00000000 Binary files a/testdata/sample.rar and /dev/null differ diff --git a/testdata/testarchives/evilarchives/double-evil.tar b/testdata/testarchives/evilarchives/double-evil.tar deleted file mode 100644 index 5ca66cc2..00000000 Binary files a/testdata/testarchives/evilarchives/double-evil.tar and /dev/null differ diff --git a/testdata/testarchives/evilarchives/double-evil.zip b/testdata/testarchives/evilarchives/double-evil.zip deleted file mode 100644 index afd7ba3a..00000000 Binary files a/testdata/testarchives/evilarchives/double-evil.zip and /dev/null differ diff --git a/testdata/testarchives/evilarchives/evil.tar b/testdata/testarchives/evilarchives/evil.tar deleted file mode 100644 index 9fe114ba..00000000 Binary files a/testdata/testarchives/evilarchives/evil.tar and /dev/null differ diff --git a/testdata/testarchives/evilarchives/evil.tar.bz2 b/testdata/testarchives/evilarchives/evil.tar.bz2 deleted file mode 100644 index 4b979e61..00000000 Binary files a/testdata/testarchives/evilarchives/evil.tar.bz2 and /dev/null differ diff --git a/testdata/testarchives/evilarchives/evil.tar.gz b/testdata/testarchives/evilarchives/evil.tar.gz deleted file mode 100644 index 6a68871a..00000000 Binary files a/testdata/testarchives/evilarchives/evil.tar.gz and /dev/null differ diff --git a/testdata/testarchives/evilarchives/evil.zip b/testdata/testarchives/evilarchives/evil.zip deleted file mode 100644 index 665e1856..00000000 Binary files a/testdata/testarchives/evilarchives/evil.zip and /dev/null differ diff --git a/xz.go b/xz.go index c60d5eae..eaf43471 100644 --- a/xz.go +++ b/xz.go @@ -1,58 +1,53 @@ package archiver import ( - "fmt" + "bytes" "io" - "path/filepath" + "strings" + fastxz "github.com/therootcompany/xz" "github.com/ulikunitz/xz" - fastxz "github.com/xi2/xz" ) -// Xz facilitates XZ compression. +func init() { + RegisterFormat(Xz{}) +} + +// Xz facilitates xz compression. type Xz struct{} -// Compress reads in, compresses it, and writes it to out. -func (x *Xz) Compress(in io.Reader, out io.Writer) error { - w, err := xz.NewWriter(out) - if err != nil { - return err - } - defer w.Close() - _, err = io.Copy(w, in) - return err -} +func (Xz) Name() string { return ".xz" } -// Decompress reads in, decompresses it, and writes it to out. -func (x *Xz) Decompress(in io.Reader, out io.Writer) error { - r, err := fastxz.NewReader(in, 0) - if err != nil { - return err +func (x Xz) Match(filename string, stream io.Reader) (MatchResult, error) { + var mr MatchResult + + // match filename + if strings.Contains(strings.ToLower(filename), x.Name()) { + mr.ByName = true } - _, err = io.Copy(out, r) - return err -} -// CheckExt ensures the file extension matches the format. -func (x *Xz) CheckExt(filename string) error { - if filepath.Ext(filename) != ".xz" { - return fmt.Errorf("filename must have a .xz extension") + // match file header + buf := make([]byte, len(xzHeader)) + if _, err := io.ReadFull(stream, buf); err != nil { + return mr, err } - return nil -} + mr.ByStream = bytes.Equal(buf, xzHeader) -func (x *Xz) String() string { return "xz" } + return mr, nil +} -// NewXz returns a new, default instance ready to be customized and used. -func NewXz() *Xz { - return new(Xz) +func (Xz) OpenWriter(w io.Writer) (io.WriteCloser, error) { + return xz.NewWriter(w) } -// Compile-time checks to ensure type implements desired interfaces. -var ( - _ = Compressor(new(Xz)) - _ = Decompressor(new(Xz)) -) +func (Xz) OpenReader(r io.Reader) (io.ReadCloser, error) { + xr, err := fastxz.NewReader(r, 0) + if err != nil { + return nil, err + } + return io.NopCloser(xr), err +} -// DefaultXz is a default instance that is conveniently ready to use. -var DefaultXz = NewXz() +// magic number at the beginning of xz files; see section 2.1.1.1 +// of https://tukaani.org/xz/xz-file-format.txt +var xzHeader = []byte{0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00} diff --git a/zip.go b/zip.go index c6af8efb..cc66e9b9 100644 --- a/zip.go +++ b/zip.go @@ -1,677 +1,267 @@ package archiver import ( + "archive/zip" "bytes" - "compress/flate" + "context" + "errors" "fmt" "io" - "io/ioutil" - "log" - "os" + "io/fs" "path" - "path/filepath" "strings" "github.com/dsnet/compress/bzip2" - "github.com/klauspost/compress/zip" "github.com/klauspost/compress/zstd" "github.com/ulikunitz/xz" + "golang.org/x/text/encoding" + "golang.org/x/text/encoding/charmap" + "golang.org/x/text/encoding/japanese" + "golang.org/x/text/encoding/korean" + "golang.org/x/text/encoding/simplifiedchinese" + "golang.org/x/text/encoding/traditionalchinese" + "golang.org/x/text/encoding/unicode" ) -// ZipCompressionMethod Compression type -type ZipCompressionMethod uint16 +func init() { + RegisterFormat(Zip{}) -// Compression methods. -// see https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT. -// Note LZMA: Disabled - because 7z isn't able to unpack ZIP+LZMA ZIP+LZMA2 archives made this way - and vice versa. -const ( - Store ZipCompressionMethod = 0 - Deflate ZipCompressionMethod = 8 - BZIP2 ZipCompressionMethod = 12 - LZMA ZipCompressionMethod = 14 - ZSTD ZipCompressionMethod = 93 - XZ ZipCompressionMethod = 95 -) - -// Zip provides facilities for operating ZIP archives. -// See https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT. -type Zip struct { - // The compression level to use, as described - // in the compress/flate package. - CompressionLevel int - - // Whether to overwrite existing files; if false, - // an error is returned if the file exists. - OverwriteExisting bool - - // Whether to make all the directories necessary - // to create a zip archive in the desired path. - MkdirAll bool - - // If enabled, selective compression will only - // compress files which are not already in a - // compressed format; this is decided based - // simply on file extension. - SelectiveCompression bool - - // A single top-level folder can be implicitly - // created by the Archive or Unarchive methods - // if the files to be added to the archive - // or the files to be extracted from the archive - // do not all have a common root. This roughly - // mimics the behavior of archival tools integrated - // into OS file browsers which create a subfolder - // to avoid unexpectedly littering the destination - // folder with potentially many files, causing a - // problematic cleanup/organization situation. - // This feature is available for both creation - // and extraction of archives, but may be slightly - // inefficient with lots and lots of files, - // especially on extraction. - ImplicitTopLevelFolder bool - - // Strip number of leading paths. This feature is available - // only during unpacking of the entire archive. - StripComponents int - - // If true, errors encountered during reading - // or writing a single file will be logged and - // the operation will continue on remaining files. - ContinueOnError bool - - // Compression algorithm - FileMethod ZipCompressionMethod - zw *zip.Writer - zr *zip.Reader - ridx int - //decinitialized bool -} - -// CheckExt ensures the file extension matches the format. -func (*Zip) CheckExt(filename string) error { - if !strings.HasSuffix(filename, ".zip") { - return fmt.Errorf("filename must have a .zip extension") - } - return nil -} + // TODO: What about custom flate levels too + zip.RegisterCompressor(ZipMethodBzip2, func(out io.Writer) (io.WriteCloser, error) { + return bzip2.NewWriter(out, &bzip2.WriterConfig{ /*TODO: Level: z.CompressionLevel*/ }) + }) + zip.RegisterCompressor(ZipMethodZstd, func(out io.Writer) (io.WriteCloser, error) { + return zstd.NewWriter(out) + }) + zip.RegisterCompressor(ZipMethodXz, func(out io.Writer) (io.WriteCloser, error) { + return xz.NewWriter(out) + }) -// Registering a global decompressor is not reentrant and may panic -func registerDecompressor(zr *zip.Reader) { - // register zstd decompressor - zr.RegisterDecompressor(uint16(ZSTD), func(r io.Reader) io.ReadCloser { - zr, err := zstd.NewReader(r) + zip.RegisterDecompressor(ZipMethodBzip2, func(r io.Reader) io.ReadCloser { + bz2r, err := bzip2.NewReader(r, nil) if err != nil { return nil } - return zr.IOReadCloser() + return bz2r }) - zr.RegisterDecompressor(uint16(BZIP2), func(r io.Reader) io.ReadCloser { - bz2r, err := bzip2.NewReader(r, nil) + zip.RegisterDecompressor(ZipMethodZstd, func(r io.Reader) io.ReadCloser { + zr, err := zstd.NewReader(r) if err != nil { return nil } - return bz2r + return zr.IOReadCloser() }) - zr.RegisterDecompressor(uint16(XZ), func(r io.Reader) io.ReadCloser { + zip.RegisterDecompressor(ZipMethodXz, func(r io.Reader) io.ReadCloser { xr, err := xz.NewReader(r) if err != nil { return nil } - return ioutil.NopCloser(xr) + return io.NopCloser(xr) }) } -// CheckPath ensures the file extension matches the format. -func (*Zip) CheckPath(to, filename string) error { - to, _ = filepath.Abs(to) //explicit the destination folder to prevent that 'string.HasPrefix' check can be 'bypassed' when no destination folder is supplied in input - dest := filepath.Join(to, filename) - //prevent path traversal attacks - if !strings.HasPrefix(dest, to) { - return &IllegalPathError{AbsolutePath: dest, Filename: filename} - } - return nil -} - -// Archive creates a .zip file at destination containing -// the files listed in sources. The destination must end -// with ".zip". File paths can be those of regular files -// or directories. Regular files are stored at the 'root' -// of the archive, and directories are recursively added. -func (z *Zip) Archive(sources []string, destination string) error { - err := z.CheckExt(destination) - if err != nil { - return fmt.Errorf("checking extension: %v", err) - } - if !z.OverwriteExisting && fileExists(destination) { - return fmt.Errorf("file already exists: %s", destination) - } - - // make the folder to contain the resulting archive - // if it does not already exist - destDir := filepath.Dir(destination) - if z.MkdirAll && !fileExists(destDir) { - err := mkdir(destDir, 0755) - if err != nil { - return fmt.Errorf("making folder for destination: %v", err) - } - } - - out, err := os.Create(destination) - if err != nil { - return fmt.Errorf("creating %s: %v", destination, err) - } - defer out.Close() - - err = z.Create(out) - if err != nil { - return fmt.Errorf("creating zip: %v", err) - } - defer z.Close() +type Zip struct { + // Only compress files which are not already in a + // compressed format (determined simply by examining + // file extension). + SelectiveCompression bool - var topLevelFolder string - if z.ImplicitTopLevelFolder && multipleTopLevels(sources) { - topLevelFolder = folderNameFromFileName(destination) - } + // The method or algorithm for compressing stored files. + Compression uint16 - for _, source := range sources { - err := z.writeWalk(source, topLevelFolder, destination) - if err != nil { - return fmt.Errorf("walking %s: %v", source, err) - } - } + // If true, errors encountered during reading or writing + // a file within an archive will be logged and the + // operation will continue on remaining files. + ContinueOnError bool - return nil + // For files in zip archives that do not have UTF-8 + // encoded filenames and comments, specify the character + // encoding here. + TextEncoding string } -// Unarchive unpacks the .zip file at source to destination. -// Destination will be treated as a folder name. -func (z *Zip) Unarchive(source, destination string) error { - if !fileExists(destination) && z.MkdirAll { - err := mkdir(destination, 0755) - if err != nil { - return fmt.Errorf("preparing destination: %v", err) - } - } - - file, err := os.Open(source) - if err != nil { - return fmt.Errorf("opening source file: %v", err) - } - defer file.Close() +func (z Zip) Name() string { return ".zip" } - fileInfo, err := file.Stat() - if err != nil { - return fmt.Errorf("statting source file: %v", err) - } +func (z Zip) Match(filename string, stream io.Reader) (MatchResult, error) { + var mr MatchResult - err = z.Open(file, fileInfo.Size()) - if err != nil { - return fmt.Errorf("opening zip archive for reading: %v", err) - } - defer z.Close() - - // if the files in the archive do not all share a common - // root, then make sure we extract to a single subfolder - // rather than potentially littering the destination... - if z.ImplicitTopLevelFolder { - files := make([]string, len(z.zr.File)) - for i := range z.zr.File { - files[i] = z.zr.File[i].Name - } - if multipleTopLevels(files) { - destination = filepath.Join(destination, folderNameFromFileName(source)) - } + // match filename + if strings.Contains(strings.ToLower(filename), z.Name()) { + mr.ByName = true } - for { - err := z.extractNext(destination) - if err == io.EOF { - break - } - if err != nil { - if z.ContinueOnError || IsIllegalPathError(err) { - log.Printf("[ERROR] Reading file in zip archive: %v", err) - continue - } - return fmt.Errorf("reading file in zip archive: %v", err) - } + // match file header + buf := make([]byte, len(zipHeader)) + if _, err := io.ReadFull(stream, buf); err != nil { + return mr, err } + mr.ByStream = bytes.Equal(buf, zipHeader) - return nil + return mr, nil } -func (z *Zip) extractNext(to string) error { - f, err := z.Read() - if err != nil { - return err // don't wrap error; calling loop must break on io.EOF - } - defer f.Close() - - header, ok := f.Header.(zip.FileHeader) - if !ok { - return fmt.Errorf("expected header to be zip.FileHeader but was %T", f.Header) +func (z Zip) Archive(ctx context.Context, output io.Writer, files []File) error { + if ctx == nil { + ctx = context.Background() } - errPath := z.CheckPath(to, header.Name) - if errPath != nil { - return fmt.Errorf("checking path traversal attempt: %v", errPath) - } + zw := zip.NewWriter(output) + defer zw.Close() - if z.StripComponents > 0 { - if strings.Count(header.Name, "/") < z.StripComponents { - return nil // skip path with fewer components + for i, file := range files { + if err := ctx.Err(); err != nil { + return err // honor context cancellation } - for i := 0; i < z.StripComponents; i++ { - slash := strings.Index(header.Name, "/") - header.Name = header.Name[slash+1:] - } - } - return z.extractFile(f, to, &header) -} - -func (z *Zip) extractFile(f File, to string, header *zip.FileHeader) error { - to = filepath.Join(to, header.Name) - - // if a directory, no content; simply make the directory and return - if f.IsDir() { - return mkdir(to, f.Mode()) - } - - // do not overwrite existing files, if configured - if !z.OverwriteExisting && fileExists(to) { - return fmt.Errorf("file already exists: %s", to) - } - - // extract symbolic links as symbolic links - if isSymlink(header.FileInfo()) { - // symlink target is the contents of the file - buf := new(bytes.Buffer) - _, err := io.Copy(buf, f) + hdr, err := zip.FileInfoHeader(file) if err != nil { - return fmt.Errorf("%s: reading symlink target: %v", header.Name, err) + return fmt.Errorf("getting info for file %d: %s: %w", i, file.Name(), err) } - return writeNewSymbolicLink(to, strings.TrimSpace(buf.String())) - } - - return writeNewFile(to, f, f.Mode()) -} - -func (z *Zip) writeWalk(source, topLevelFolder, destination string) error { - sourceInfo, err := os.Stat(source) - if err != nil { - return fmt.Errorf("%s: stat: %v", source, err) - } - destAbs, err := filepath.Abs(destination) - if err != nil { - return fmt.Errorf("%s: getting absolute path of destination %s: %v", source, destination, err) - } - return filepath.Walk(source, func(fpath string, info os.FileInfo, err error) error { - handleErr := func(err error) error { - if z.ContinueOnError { - log.Printf("[ERROR] Walking %s: %v", fpath, err) - return nil + // customize header based on file properties + if file.IsDir() { + hdr.Name += "/" // required - strangely no mention of this in zip spec? but is in godoc... + hdr.Method = zip.Store + } else if z.SelectiveCompression { + // only enable compression on compressable files + ext := strings.ToLower(path.Ext(hdr.Name)) + if _, ok := compressedFormats[ext]; ok { + hdr.Method = zip.Store + } else { + hdr.Method = z.Compression } - return err - } - if err != nil { - return handleErr(fmt.Errorf("traversing %s: %v", fpath, err)) - } - if info == nil { - return handleErr(fmt.Errorf("%s: no file info", fpath)) } - // make sure we do not copy the output file into the output - // file; that results in an infinite loop and disk exhaustion! - fpathAbs, err := filepath.Abs(fpath) + w, err := zw.CreateHeader(hdr) if err != nil { - return handleErr(fmt.Errorf("%s: getting absolute path: %v", fpath, err)) - } - if within(fpathAbs, destAbs) { - return nil + return fmt.Errorf("creating header for file %d: %s: %w", i, file.Name(), err) } - // build the name to be used within the archive - nameInArchive, err := makeNameInArchive(sourceInfo, source, topLevelFolder, fpath) - if err != nil { - return handleErr(err) + // directories have no file body + if file.IsDir() { + continue } - - var file io.ReadCloser - if info.Mode().IsRegular() { - file, err = os.Open(fpath) - if err != nil { - return handleErr(fmt.Errorf("%s: opening: %v", fpath, err)) - } - defer file.Close() + if err := openAndCopyFile(file, w); err != nil { + return fmt.Errorf("writing file %d: %s: %w", i, file.Name(), err) } - err = z.Write(File{ - FileInfo: FileInfo{ - FileInfo: info, - CustomName: nameInArchive, - SourcePath: fpath, - }, - ReadCloser: file, - }) - if err != nil { - return handleErr(fmt.Errorf("%s: writing: %s", fpath, err)) - } - - return nil - }) -} - -// Create opens z for writing a ZIP archive to out. -func (z *Zip) Create(out io.Writer) error { - if z.zw != nil { - return fmt.Errorf("zip archive is already created for writing") - } - z.zw = zip.NewWriter(out) - if z.CompressionLevel != flate.DefaultCompression { - z.zw.RegisterCompressor(zip.Deflate, func(out io.Writer) (io.WriteCloser, error) { - return flate.NewWriter(out, z.CompressionLevel) - }) - } - switch z.FileMethod { - case BZIP2: - z.zw.RegisterCompressor(uint16(BZIP2), func(out io.Writer) (io.WriteCloser, error) { - return bzip2.NewWriter(out, &bzip2.WriterConfig{Level: z.CompressionLevel}) - }) - case ZSTD: - z.zw.RegisterCompressor(uint16(ZSTD), func(out io.Writer) (io.WriteCloser, error) { - return zstd.NewWriter(out) - }) - case XZ: - z.zw.RegisterCompressor(uint16(XZ), func(out io.Writer) (io.WriteCloser, error) { - return xz.NewWriter(out) - }) } + return nil } -// Write writes f to z, which must have been opened for writing first. -func (z *Zip) Write(f File) error { - if z.zw == nil { - return fmt.Errorf("zip archive was not created for writing first") - } - if f.FileInfo == nil { - return fmt.Errorf("no file info") - } - if f.FileInfo.Name() == "" { - return fmt.Errorf("missing file name") +func (z Zip) Extract(ctx context.Context, sourceArchive io.Reader, pathsInArchive []string, handleFile FileHandler) error { + if ctx == nil { + ctx = context.Background() } - header, err := zip.FileInfoHeader(f) - if err != nil { - return fmt.Errorf("%s: getting header: %v", f.Name(), err) + sra, ok := sourceArchive.(seekReaderAt) + if !ok { + return fmt.Errorf("input type must be an io.ReaderAt and io.Seeker because of zip format constraints") } - if f.IsDir() { - header.Name += "/" // required - strangely no mention of this in zip spec? but is in godoc... - header.Method = zip.Store - } else { - ext := strings.ToLower(path.Ext(header.Name)) - if _, ok := compressedFormats[ext]; ok && z.SelectiveCompression { - header.Method = zip.Store - } else { - header.Method = uint16(z.FileMethod) - } + size, err := streamSizeBySeeking(sra) + if err != nil { + return fmt.Errorf("determining stream size: %w", err) } - writer, err := z.zw.CreateHeader(header) + zr, err := zip.NewReader(sra, size) if err != nil { - return fmt.Errorf("%s: making header: %w", f.Name(), err) + return err } - return z.writeFile(f, writer) -} + // important to initialize to non-nil, empty value due to how fileIsIncluded works + skipDirs := skipList{} -func (z *Zip) writeFile(f File, writer io.Writer) error { - if f.IsDir() { - return nil // directories have no contents - } - if isSymlink(f) { - fi, ok := f.FileInfo.(FileInfo) - if !ok { - return fmt.Errorf("failed to cast fs.FileInfo to archiver.FileInfo: %v", f) + for i, f := range zr.File { + if err := ctx.Err(); err != nil { + return err // honor context cancellation } - // file body for symlinks is the symlink target - linkTarget, err := os.Readlink(fi.SourcePath) - if err != nil { - return fmt.Errorf("%s: readlink: %v", fi.SourcePath, err) + if !fileIsIncluded(pathsInArchive, f.Name) { + continue } - _, err = writer.Write([]byte(filepath.ToSlash(linkTarget))) - if err != nil { - return fmt.Errorf("%s: writing symlink target: %v", fi.SourcePath, err) + if fileIsIncluded(skipDirs, f.Name) { + continue } - return nil - } - - if f.ReadCloser == nil { - return fmt.Errorf("%s: no way to read file contents", f.Name()) - } - _, err := io.Copy(writer, f) - if err != nil { - return fmt.Errorf("%s: copying contents: %w", f.Name(), err) - } - - return nil -} - -// Open opens z for reading an archive from in, -// which is expected to have the given size and -// which must be an io.ReaderAt. -func (z *Zip) Open(in io.Reader, size int64) error { - inRdrAt, ok := in.(io.ReaderAt) - if !ok { - return fmt.Errorf("reader must be io.ReaderAt") - } - if z.zr != nil { - return fmt.Errorf("zip archive is already open for reading") - } - var err error - z.zr, err = zip.NewReader(inRdrAt, size) - if err != nil { - return fmt.Errorf("creating reader: %v", err) - } - registerDecompressor(z.zr) - z.ridx = 0 - return nil -} - -// Read reads the next file from z, which must have -// already been opened for reading. If there are no -// more files, the error is io.EOF. The File must -// be closed when finished reading from it. -func (z *Zip) Read() (File, error) { - if z.zr == nil { - return File{}, fmt.Errorf("zip archive is not open") - } - if z.ridx >= len(z.zr.File) { - return File{}, io.EOF - } - - // access the file and increment counter so that - // if there is an error processing this file, the - // caller can still iterate to the next file - zf := z.zr.File[z.ridx] - z.ridx++ - file := File{ - FileInfo: zf.FileInfo(), - Header: zf.FileHeader, - } - - rc, err := zf.Open() - if err != nil { - return file, fmt.Errorf("%s: open compressed file: %v", zf.Name, err) - } - file.ReadCloser = rc - - return file, nil -} - -// Close closes the zip archive(s) opened by Create and Open. -func (z *Zip) Close() error { - if z.zr != nil { - z.zr = nil - } - if z.zw != nil { - zw := z.zw - z.zw = nil - return zw.Close() - } - return nil -} + // ensure filename and comment are UTF-8 encoded (issue #147) + z.decodeText(&f.FileHeader) -// Walk calls walkFn for each visited item in archive. -func (z *Zip) Walk(archive string, walkFn WalkFunc) error { - zr, err := zip.OpenReader(archive) - if err != nil { - return fmt.Errorf("opening zip reader: %v", err) - } - defer zr.Close() - registerDecompressor(&zr.Reader) - for _, zf := range zr.File { - zfrc, err := zf.Open() - if err != nil { - if zfrc != nil { - zfrc.Close() - } - if z.ContinueOnError { - log.Printf("[ERROR] Opening %s: %v", zf.Name, err) - continue - } - return fmt.Errorf("opening %s: %v", zf.Name, err) + file := File{ + FileInfo: f.FileInfo(), + Header: f.FileHeader, + NameInArchive: f.Name, + Open: func() (io.ReadCloser, error) { return f.Open() }, } - err = walkFn(File{ - FileInfo: zf.FileInfo(), - Header: zf.FileHeader, - ReadCloser: zfrc, - }) - zfrc.Close() - if err != nil { - if err == ErrStopWalk { - break + err := handleFile(ctx, file) + if errors.Is(err, fs.SkipDir) { + // if a directory, skip this path; if a file, skip the folder path + dirPath := f.Name + if !file.IsDir() { + dirPath = path.Dir(f.Name) } - if z.ContinueOnError { - log.Printf("[ERROR] Walking %s: %v", zf.Name, err) - continue - } - return fmt.Errorf("walking %s: %v", zf.Name, err) + skipDirs.add(dirPath) + } else if err != nil { + return fmt.Errorf("handling file %d: %s: %w", i, f.Name, err) } } return nil } -// Extract extracts a single file from the zip archive. -// If the target is a directory, the entire folder will -// be extracted into destination. -func (z *Zip) Extract(source, target, destination string) error { - // target refers to a path inside the archive, which should be clean also - target = path.Clean(target) - - // if the target ends up being a directory, then - // we will continue walking and extracting files - // until we are no longer within that directory - var targetDirPath string - - return z.Walk(source, func(f File) error { - zfh, ok := f.Header.(zip.FileHeader) - if !ok { - return fmt.Errorf("expected header to be zip.FileHeader but was %T", f.Header) - } - - // importantly, cleaning the path strips tailing slash, - // which must be appended to folders within the archive - name := path.Clean(zfh.Name) - if f.IsDir() && target == name { - targetDirPath = path.Dir(name) +// decodeText decodes the name and comment fields from hdr into UTF-8. +// It is a no-op if the text is already UTF-8 encoded or if z.TextEncoding +// is not specified. +func (z Zip) decodeText(hdr *zip.FileHeader) { + if hdr.NonUTF8 && z.TextEncoding != "" { + filename, err := decodeText(hdr.Name, z.TextEncoding) + if err == nil { + hdr.Name = filename } - - if within(target, zfh.Name) { - // either this is the exact file we want, or is - // in the directory we want to extract - - // build the filename we will extract to - end, err := filepath.Rel(targetDirPath, zfh.Name) - if err != nil { - return fmt.Errorf("relativizing paths: %v", err) + if hdr.Comment != "" { + comment, err := decodeText(hdr.Comment, z.TextEncoding) + if err == nil { + hdr.Comment = comment } - joined := filepath.Join(destination, end) - - err = z.extractFile(f, joined, &zfh) - if err != nil { - return fmt.Errorf("extracting file %s: %v", zfh.Name, err) - } - - // if our target was not a directory, stop walk - if targetDirPath == "" { - return ErrStopWalk - } - } else if targetDirPath != "" { - // finished walking the entire directory - return ErrStopWalk } + } +} - return nil - }) +type seekReaderAt interface { + io.ReaderAt + io.Seeker } -// Match returns true if the format of file matches this -// type's format. It should not affect reader position. -func (*Zip) Match(file io.ReadSeeker) (bool, error) { - currentPos, err := file.Seek(0, io.SeekCurrent) +func streamSizeBySeeking(s io.Seeker) (int64, error) { + currentPosition, err := s.Seek(0, io.SeekCurrent) if err != nil { - return false, err + return 0, fmt.Errorf("getting current offset: %w", err) } - _, err = file.Seek(0, 0) + maxPosition, err := s.Seek(0, io.SeekEnd) if err != nil { - return false, err - } - defer func() { - _, _ = file.Seek(currentPos, io.SeekStart) - }() - - buf := make([]byte, 4) - if n, err := file.Read(buf); err != nil || n < 4 { - return false, nil + return 0, fmt.Errorf("fast-forwarding to end: %w", err) } - return bytes.Equal(buf, []byte("PK\x03\x04")), nil -} - -func (z *Zip) String() string { return "zip" } - -// NewZip returns a new, default instance ready to be customized and used. -func NewZip() *Zip { - return &Zip{ - CompressionLevel: flate.DefaultCompression, - MkdirAll: true, - SelectiveCompression: true, - FileMethod: Deflate, + _, err = s.Seek(currentPosition, io.SeekStart) + if err != nil { + return 0, fmt.Errorf("returning to prior offset %d: %w", currentPosition, err) } + return maxPosition, nil } -// Compile-time checks to ensure type implements desired interfaces. -var ( - _ = Reader(new(Zip)) - _ = Writer(new(Zip)) - _ = Archiver(new(Zip)) - _ = Unarchiver(new(Zip)) - _ = Walker(new(Zip)) - _ = Extractor(new(Zip)) - _ = Matcher(new(Zip)) - _ = ExtensionChecker(new(Zip)) - _ = FilenameChecker(new(Zip)) +// Additional compression methods not offered by archive/zip. +// See https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT section 4.4.5. +const ( + ZipMethodBzip2 = 12 + // TODO: LZMA: Disabled - because 7z isn't able to unpack ZIP+LZMA ZIP+LZMA2 archives made this way - and vice versa. + // ZipMethodLzma = 14 + ZipMethodZstd = 93 + ZipMethodXz = 95 ) // compressedFormats is a (non-exhaustive) set of lowercased // file extensions for formats that are typically already // compressed. Compressing files that are already compressed -// is inefficient, so use this set of extension to avoid that. +// is inefficient, so use this set of extensions to avoid that. var compressedFormats = map[string]struct{}{ ".7z": {}, ".avi": {}, @@ -707,5 +297,53 @@ var compressedFormats = map[string]struct{}{ ".zipx": {}, } -// DefaultZip is a default instance that is conveniently ready to use. -var DefaultZip = NewZip() +var encodings = map[string]encoding.Encoding{ + "ibm866": charmap.CodePage866, + "iso8859_2": charmap.ISO8859_2, + "iso8859_3": charmap.ISO8859_3, + "iso8859_4": charmap.ISO8859_4, + "iso8859_5": charmap.ISO8859_5, + "iso8859_6": charmap.ISO8859_6, + "iso8859_7": charmap.ISO8859_7, + "iso8859_8": charmap.ISO8859_8, + "iso8859_8I": charmap.ISO8859_8I, + "iso8859_10": charmap.ISO8859_10, + "iso8859_13": charmap.ISO8859_13, + "iso8859_14": charmap.ISO8859_14, + "iso8859_15": charmap.ISO8859_15, + "iso8859_16": charmap.ISO8859_16, + "koi8r": charmap.KOI8R, + "koi8u": charmap.KOI8U, + "macintosh": charmap.Macintosh, + "windows874": charmap.Windows874, + "windows1250": charmap.Windows1250, + "windows1251": charmap.Windows1251, + "windows1252": charmap.Windows1252, + "windows1253": charmap.Windows1253, + "windows1254": charmap.Windows1254, + "windows1255": charmap.Windows1255, + "windows1256": charmap.Windows1256, + "windows1257": charmap.Windows1257, + "windows1258": charmap.Windows1258, + "macintoshcyrillic": charmap.MacintoshCyrillic, + "gbk": simplifiedchinese.GBK, + "gb18030": simplifiedchinese.GB18030, + "big5": traditionalchinese.Big5, + "eucjp": japanese.EUCJP, + "iso2022jp": japanese.ISO2022JP, + "shiftjis": japanese.ShiftJIS, + "euckr": korean.EUCKR, + "utf16be": unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM), + "utf16le": unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM), +} + +// decodeText returns UTF-8 encoded text from the given charset. +// Thanks to @zxdvd for contributing non-UTF-8 encoding logic in #149. +func decodeText(input, charset string) (string, error) { + if enc, ok := encodings[charset]; ok { + return enc.NewDecoder().String(input) + } + return "", fmt.Errorf("unrecognized charset %s", charset) +} + +var zipHeader = []byte("PK\x03\x04") // TODO: headers of empty zip files might end with 0x05,0x06 or 0x06,0x06 instead of 0x03,0x04 diff --git a/zstd.go b/zstd.go index 60c11efc..cd310a31 100644 --- a/zstd.go +++ b/zstd.go @@ -1,61 +1,64 @@ package archiver import ( - "fmt" + "bytes" "io" - "path/filepath" + "strings" "github.com/klauspost/compress/zstd" ) +func init() { + RegisterFormat(Zstd{}) +} + // Zstd facilitates Zstandard compression. type Zstd struct { EncoderOptions []zstd.EOption DecoderOptions []zstd.DOption } -// Compress reads in, compresses it, and writes it to out. -func (zs *Zstd) Compress(in io.Reader, out io.Writer) error { - w, err := zstd.NewWriter(out, zs.EncoderOptions...) - if err != nil { - return err +func (Zstd) Name() string { return ".zst" } + +func (zs Zstd) Match(filename string, stream io.Reader) (MatchResult, error) { + var mr MatchResult + + // match filename + if strings.Contains(strings.ToLower(filename), zs.Name()) { + mr.ByName = true } - defer w.Close() - _, err = io.Copy(w, in) - return err -} -// Decompress reads in, decompresses it, and writes it to out. -func (zs *Zstd) Decompress(in io.Reader, out io.Writer) error { - r, err := zstd.NewReader(in, zs.DecoderOptions...) - if err != nil { - return err + // match file header + buf := make([]byte, len(zstdHeader)) + if _, err := io.ReadFull(stream, buf); err != nil { + return mr, err } - defer r.Close() - _, err = io.Copy(out, r) - return err + mr.ByStream = bytes.Equal(buf, zstdHeader) + + return mr, nil } -// CheckExt ensures the file extension matches the format. -func (zs *Zstd) CheckExt(filename string) error { - if filepath.Ext(filename) != ".zst" { - return fmt.Errorf("filename must have a .zst extension") - } - return nil +func (zs Zstd) OpenWriter(w io.Writer) (io.WriteCloser, error) { + return zstd.NewWriter(w, zs.EncoderOptions...) } -func (zs *Zstd) String() string { return "zstd" } +func (zs Zstd) OpenReader(r io.Reader) (io.ReadCloser, error) { + zr, err := zstd.NewReader(r, zs.DecoderOptions...) + if err != nil { + return nil, err + } + return errorCloser{zr}, nil +} -// NewZstd returns a new, default instance ready to be customized and used. -func NewZstd() *Zstd { - return new(Zstd) +type errorCloser struct { + *zstd.Decoder } -// Compile-time checks to ensure type implements desired interfaces. -var ( - _ = Compressor(new(Zstd)) - _ = Decompressor(new(Zstd)) -) +func (ec errorCloser) Close() error { + ec.Decoder.Close() + return nil +} -// DefaultZstd is a default instance that is conveniently ready to use. -var DefaultZstd = NewZstd() +// magic number at the beginning of Zstandard files +// https://github.com/facebook/zstd/blob/6211bfee5ec24dc825c11751c33aa31d618b5f10/doc/zstd_compression_format.md +var zstdHeader = []byte{0x28, 0xb5, 0x2f, 0xfd}