8
8
"io"
9
9
"log"
10
10
"os"
11
+ "path/filepath"
11
12
"runtime"
13
+ "strings"
12
14
"time"
13
15
14
16
"github.com/s0rg/crawley/pkg/crawler"
@@ -17,32 +19,30 @@ import (
17
19
18
20
const (
19
21
appName = "Crawley"
22
+ appHelp = "the unix-way web crawler"
20
23
appSite = "https://github.com/s0rg/crawley"
21
24
defaultDelay = 150 * time .Millisecond
22
25
)
23
26
27
+ // build-time values.
24
28
var (
25
- GitHash string
26
29
GitTag string
30
+ GitHash string
27
31
BuildDate string
28
32
defaultUA = "Mozilla/5.0 (compatible; Win64; x64) Mr." + appName + "/" + GitTag + "-" + GitHash
33
+ )
29
34
30
- cookies , headers values.Smart
31
- tags , ignored values.Simple
32
-
33
- fDepth = flag .Int ("depth" , 0 , "scan depth (set -1 for unlimited)" )
34
- fWorkers = flag .Int ("workers" , runtime .NumCPU (), "number of workers" )
35
- fBrute = flag .Bool ("brute" , false , "scan html comments" )
36
- fNoHeads = flag .Bool ("headless" , false , "disable pre-flight HEAD requests" )
37
- fScanJS = flag .Bool ("js" , false , "scan js files for endpoints" )
38
- fSkipSSL = flag .Bool ("skip-ssl" , false , "skip ssl verification" )
39
- fSilent = flag .Bool ("silent" , false , "suppress info and error messages in stderr" )
40
- fVersion = flag .Bool ("version" , false , "show version" )
41
- fDirsPolicy = flag .String ("dirs" , "show" , "policy for non-resource urls: show / hide / only" )
42
- fProxyAuth = flag .String ("proxy-auth" , "" , "credentials for proxy: user:password" )
43
- fRobotsPolicy = flag .String ("robots" , "ignore" , "policy for robots.txt: ignore / crawl / respect" )
44
- fUA = flag .String ("user-agent" , defaultUA , "user-agent string" )
45
- fDelay = flag .Duration ("delay" , defaultDelay , "per-request delay (0 - disable)" )
35
+ // command-line flags.
36
+ var (
37
+ fDepth , fWorkers int
38
+ fSilent , fVersion bool
39
+ fBrute , fNoHeads bool
40
+ fSkipSSL , fScanJS bool
41
+ fDirsPolicy , fProxyAuth string
42
+ fRobotsPolicy , fUA string
43
+ fDelay time.Duration
44
+ cookies , headers values.Smart
45
+ tags , ignored values.List
46
46
)
47
47
48
48
func version () string {
@@ -56,6 +56,29 @@ func version() string {
56
56
)
57
57
}
58
58
59
+ func usage () {
60
+ var sb strings.Builder
61
+
62
+ const twoCR = "\n \n "
63
+
64
+ sb .WriteString (appName )
65
+ sb .WriteString (" - " )
66
+ sb .WriteString (appHelp )
67
+ sb .WriteString (", usage:" )
68
+ sb .WriteString (twoCR )
69
+
70
+ sb .WriteString (filepath .Base (os .Args [0 ]))
71
+ sb .WriteString (" [flags] url" )
72
+ sb .WriteString (twoCR )
73
+
74
+ sb .WriteString ("possible flags with default values:" )
75
+ sb .WriteString (twoCR )
76
+
77
+ _ , _ = os .Stderr .WriteString (sb .String ())
78
+
79
+ flag .PrintDefaults ()
80
+ }
81
+
59
82
func puts (s string ) {
60
83
_ , _ = os .Stdout .WriteString (s + "\n " )
61
84
}
@@ -102,14 +125,14 @@ func loadSmart() (h, c []string, err error) {
102
125
}
103
126
104
127
func initOptions () (rv []crawler.Option , err error ) {
105
- robots , err := crawler .ParseRobotsPolicy (* fRobotsPolicy )
128
+ robots , err := crawler .ParseRobotsPolicy (fRobotsPolicy )
106
129
if err != nil {
107
130
err = fmt .Errorf ("robots policy: %w" , err )
108
131
109
132
return
110
133
}
111
134
112
- dirs , err := crawler .ParseDirsPolicy (* fDirsPolicy )
135
+ dirs , err := crawler .ParseDirsPolicy (fDirsPolicy )
113
136
if err != nil {
114
137
err = fmt .Errorf ("dirs policy: %w" , err )
115
138
@@ -124,58 +147,72 @@ func initOptions() (rv []crawler.Option, err error) {
124
147
}
125
148
126
149
rv = []crawler.Option {
127
- crawler .WithUserAgent (* fUA ),
128
- crawler .WithDelay (* fDelay ),
129
- crawler .WithMaxCrawlDepth (* fDepth ),
130
- crawler .WithWorkersCount (* fWorkers ),
131
- crawler .WithSkipSSL (* fSkipSSL ),
132
- crawler .WithBruteMode (* fBrute ),
150
+ crawler .WithUserAgent (fUA ),
151
+ crawler .WithDelay (fDelay ),
152
+ crawler .WithMaxCrawlDepth (fDepth ),
153
+ crawler .WithWorkersCount (fWorkers ),
154
+ crawler .WithSkipSSL (fSkipSSL ),
155
+ crawler .WithBruteMode (fBrute ),
133
156
crawler .WithDirsPolicy (dirs ),
134
157
crawler .WithRobotsPolicy (robots ),
135
- crawler .WithoutHeads (* fNoHeads ),
136
- crawler .WithScanJS (* fScanJS ),
158
+ crawler .WithoutHeads (fNoHeads ),
159
+ crawler .WithScanJS (fScanJS ),
137
160
crawler .WithExtraHeaders (h ),
138
161
crawler .WithExtraCookies (c ),
139
162
crawler .WithTagsFilter (tags .Values ),
140
163
crawler .WithIgnored (ignored .Values ),
141
- crawler .WithProxyAuth (* fProxyAuth ),
164
+ crawler .WithProxyAuth (fProxyAuth ),
142
165
}
143
166
144
167
return rv , nil
145
168
}
146
169
147
- func main () {
148
- flag .Var (
149
- & headers ,
150
- "header" ,
170
+ func setupFlags () {
171
+ flag .Var (& headers , "header" ,
151
172
"extra headers for request, can be used multiple times, accept files with '@'-prefix" ,
152
173
)
153
- flag .Var (
154
- & cookies ,
155
- "cookie" ,
174
+ flag .Var (& cookies , "cookie" ,
156
175
"extra cookies for request, can be used multiple times, accept files with '@'-prefix" ,
157
176
)
158
- flag .Var (
159
- & tags ,
160
- "tag" ,
161
- "tags filter, single or comma-separated tag names" ,
162
- )
163
- flag .Var (
164
- & ignored ,
165
- "ignore" ,
166
- "patterns (in urls) to be ignored in crawl process" ,
167
- )
177
+
178
+ flag .Var (& tags , "tag" , "tags filter, single or comma-separated tag names" )
179
+ flag .Var (& ignored , "ignore" , "patterns (in urls) to be ignored in crawl process" )
180
+
181
+ flag .IntVar (& fDepth , "depth" , 0 , "scan depth (set -1 for unlimited)" )
182
+ flag .IntVar (& fWorkers , "workers" , runtime .NumCPU (), "number of workers" )
183
+
184
+ flag .BoolVar (& fBrute , "brute" , false , "scan html comments" )
185
+ flag .BoolVar (& fNoHeads , "headless" , false , "disable pre-flight HEAD requests" )
186
+ flag .BoolVar (& fScanJS , "js" , false , "scan js files for endpoints" )
187
+ flag .BoolVar (& fSkipSSL , "skip-ssl" , false , "skip ssl verification" )
188
+ flag .BoolVar (& fSilent , "silent" , false , "suppress info and error messages in stderr" )
189
+ flag .BoolVar (& fVersion , "version" , false , "show version" )
190
+
191
+ flag .StringVar (& fDirsPolicy , "dirs" , crawler .DefaultDirsPolicy ,
192
+ "policy for non-resource urls: show / hide / only" )
193
+ flag .StringVar (& fRobotsPolicy , "robots" , crawler .DefaultRobotsPolicy ,
194
+ "policy for robots.txt: ignore / crawl / respect" )
195
+ flag .StringVar (& fUA , "user-agent" , defaultUA , "user-agent string" )
196
+ flag .StringVar (& fProxyAuth , "proxy-auth" , "" , "credentials for proxy: user:password" )
197
+
198
+ flag .DurationVar (& fDelay , "delay" , defaultDelay , "per-request delay (0 - disable)" )
199
+
200
+ flag .Usage = usage
201
+ }
202
+
203
+ func main () {
204
+ setupFlags ()
168
205
169
206
flag .Parse ()
170
207
171
- if * fVersion {
208
+ if fVersion {
172
209
puts (version ())
173
210
174
211
return
175
212
}
176
213
177
214
if flag .NArg () != 1 {
178
- flag . Usage ()
215
+ usage ()
179
216
180
217
return
181
218
}
@@ -185,7 +222,7 @@ func main() {
185
222
log .Fatal ("[-] options:" , err )
186
223
}
187
224
188
- if * fSilent {
225
+ if fSilent {
189
226
log .SetOutput (io .Discard )
190
227
}
191
228
0 commit comments