This repository has been archived by the owner on Nov 19, 2024. It is now read-only.
-
-
Notifications
You must be signed in to change notification settings - Fork 391
/
7z.go
129 lines (106 loc) · 3.34 KB
/
7z.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
package archiver
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"io/fs"
"log"
"path"
"strings"
"github.com/bodgit/sevenzip"
)
func init() {
RegisterFormat(SevenZip{})
// looks like the sevenzip package registers a lot of decompressors for us automatically:
// https://github.com/bodgit/sevenzip/blob/46c5197162c784318b98b9a3f80289a9aa1ca51a/register.go#L38-L61
}
type SevenZip struct {
// If true, errors encountered during reading or writing
// a file within an archive will be logged and the
// operation will continue on remaining files.
ContinueOnError bool
// The password, if dealing with an encrypted archive.
Password string
}
func (z SevenZip) Extension() string { return ".7z" }
func (z SevenZip) Match(_ context.Context, filename string, stream io.Reader) (MatchResult, error) {
var mr MatchResult
// match filename
if strings.Contains(strings.ToLower(filename), z.Extension()) {
mr.ByName = true
}
// match file header
buf, err := readAtMost(stream, len(sevenZipHeader))
if err != nil {
return mr, err
}
mr.ByStream = bytes.Equal(buf, sevenZipHeader)
return mr, nil
}
// Archive is not implemented for 7z because I do not know of a pure-Go 7z writer.
// Extract extracts files from z, implementing the Extractor interface. Uniquely, however,
// sourceArchive must be an io.ReaderAt and io.Seeker, which are oddly disjoint interfaces
// from io.Reader which is what the method signature requires. We chose this signature for
// the interface because we figure you can Read() from anything you can ReadAt() or Seek()
// with. Due to the nature of the zip archive format, if sourceArchive is not an io.Seeker
// and io.ReaderAt, an error is returned.
func (z SevenZip) Extract(ctx context.Context, sourceArchive io.Reader, handleFile FileHandler) error {
sra, ok := sourceArchive.(seekReaderAt)
if !ok {
return fmt.Errorf("input type must be an io.ReaderAt and io.Seeker because of zip format constraints")
}
size, err := streamSizeBySeeking(sra)
if err != nil {
return fmt.Errorf("determining stream size: %w", err)
}
zr, err := sevenzip.NewReaderWithPassword(sra, size, z.Password)
if err != nil {
return err
}
// important to initialize to non-nil, empty value due to how fileIsIncluded works
skipDirs := skipList{}
for i, f := range zr.File {
f := f // make a copy for the Open closure
if err := ctx.Err(); err != nil {
return err // honor context cancellation
}
if fileIsIncluded(skipDirs, f.Name) {
continue
}
fi := f.FileInfo()
file := FileInfo{
FileInfo: fi,
Header: f.FileHeader,
NameInArchive: f.Name,
Open: func() (fs.File, error) {
openedFile, err := f.Open()
if err != nil {
return nil, err
}
return fileInArchive{openedFile, fi}, nil
},
}
err := handleFile(ctx, file)
if errors.Is(err, fs.SkipDir) {
// if a directory, skip this path; if a file, skip the folder path
dirPath := f.Name
if !file.IsDir() {
dirPath = path.Dir(f.Name) + "/"
}
skipDirs.add(dirPath)
} else if err != nil {
if z.ContinueOnError {
log.Printf("[ERROR] %s: %v", f.Name, err)
continue
}
return fmt.Errorf("handling file %d: %s: %w", i, f.Name, err)
}
}
return nil
}
// https://py7zr.readthedocs.io/en/latest/archive_format.html#signature
var sevenZipHeader = []byte("7z\xBC\xAF\x27\x1C")
// Interface guard
var _ Extractor = SevenZip{}