Skip to content

Commit

Permalink
Merge pull request #162 from buger/input-modifier
Browse files Browse the repository at this point in the history
Middleware for custom request rewrite logic
  • Loading branch information
buger committed Aug 24, 2015
2 parents 5e526af + abde6ec commit 1fea397
Show file tree
Hide file tree
Showing 36 changed files with 1,431 additions and 237 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,7 @@
*.out

*.bin

*.gz

*.class
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
language: go
go: 1.4.2
script: sudo -E bash -c "source /etc/profile && eval '$(gimme 1.4.2)' && export GOPATH=$HOME/gopath:$GOPATH && go get && GORACE='halt_on_error=1' go test ./... -v -timeout 15s"
script: sudo -E bash -c "source /etc/profile && eval '$(gimme 1.4.2)' && export GOPATH=$HOME/gopath:$GOPATH && go get && GORACE='halt_on_error=1' go test ./... -v -timeout 60s -race"
16 changes: 15 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,23 @@ FROM google/golang:1.4

RUN cd /goroot/src/ && GOOS=linux GOARCH=386 ./make.bash --no-clean

WORKDIR /gopath/src/github.com/buger/gor/
RUN apt-get update && apt-get install ruby vim-common -y

# Install Java for middleware testing
RUN echo "deb http://ppa.launchpad.net/webupd8team/java/ubuntu trusty main" | tee /etc/apt/sources.list.d/webupd8team-java.list
RUN echo "deb-src http://ppa.launchpad.net/webupd8team/java/ubuntu trusty main" | tee -a /etc/apt/sources.list.d/webupd8team-java.list
RUN apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys EEA14886
RUN apt-get update -y
RUN echo oracle-java7-installer shared/accepted-oracle-license-v1-1 select true | /usr/bin/debconf-set-selections
RUN apt-get install oracle-java8-installer -y

RUN wget http://apache-mirror.rbc.ru/pub/apache//commons/io/binaries/commons-io-2.4-bin.tar.gz -P /tmp
RUN tar xzf /tmp/commons-io-2.4-bin.tar.gz -C /tmp

WORKDIR /gopath/src/github.com/buger/gor/
ADD . /gopath/src/github.com/buger/gor/

RUN javac -cp /tmp/commons-io-2.4/commons-io-2.4.jar ./examples/middleware/echo.java

RUN go get -u github.com/golang/lint/golint
RUN go get
13 changes: 8 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
SOURCE = emitter.go gor.go gor_stat.go input_dummy.go input_file.go input_raw.go input_tcp.go limiter.go output_dummy.go output_file.go input_http.go output_http.go output_tcp.go plugins.go settings.go test_input.go elasticsearch.go http_modifier.go http_modifier_settings.go http_client.go
SOURCE = emitter.go gor.go gor_stat.go input_dummy.go input_file.go input_raw.go input_tcp.go limiter.go output_dummy.go output_file.go input_http.go output_http.go output_tcp.go plugins.go settings.go test_input.go elasticsearch.go http_modifier.go http_modifier_settings.go http_client.go middleware.go protocol.go

SOURCE_PATH = /gopath/src/github.com/buger/gor/

Expand All @@ -20,7 +20,7 @@ drace:
docker run -v `pwd`:$(SOURCE_PATH) -t -i --env GORACE="halt_on_error=1" gor go test ./... $(ARGS) -v -race -timeout 15s

dtest:
docker run -v `pwd`:$(SOURCE_PATH) -t -i gor go test ./... $(ARGS) -v -timeout 10s
docker run -v `pwd`:$(SOURCE_PATH) -t -i gor go test ./... $(ARGS) -v -timeout 60s

dcover:
docker run -v `pwd`:$(SOURCE_PATH) -t -i --env GORACE="halt_on_error=1" gor go test $(ARGS) -race -v -timeout 15s -coverprofile=coverage.out
Expand All @@ -37,13 +37,16 @@ dbench:

# Used mainly for debugging, because docker container do not have access to parent machine ports
drun:
docker run -v `pwd`:$(SOURCE_PATH) -t -i gor go run $(SOURCE) --input-dummy=0 --output-http="http://localhost:9000" --verbose
docker run -v `pwd`:$(SOURCE_PATH) -t -i gor go run $(SOURCE) --input-dummy=0 --output-http="http://localhost:9000" --input-raw :9000 --input-http :9000 --verbose --debug --middleware "./examples/middleware/echo.sh"

drun-2:
docker run -v `pwd`:$(SOURCE_PATH) -t -i gor go run $(SOURCE) --input-file="./fixtures/requests.gor" --output-dummy=0 --verbose --debug --middleware "java -cp ./examples/middleware echo"

drecord:
docker run -v `pwd`:$(SOURCE_PATH) -t -i gor go run $(SOURCE) --input-dummy=0 --output-file=requests.bin --verbose
docker run -v `pwd`:$(SOURCE_PATH) -t -i gor go run $(SOURCE) --input-dummy=0 --output-file=requests.gor --verbose --debug

dreplay:
docker run -v `pwd`:$(SOURCE_PATH) -t -i gor go run $(SOURCE) --input-file=requests.bin --output-tcp=:9000 --verbose -h

dbash:
docker run -v `pwd`:$(SOURCE_PATH) -t -i gor /bin/bash
docker run -v `pwd`:$(SOURCE_PATH) -t -i gor /bin/bash
72 changes: 70 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ gor --input-tcp :28020 --output-http "http://staging.com" --output-http "http:/
```

### HTTP output workers
By default Gor creates dynamic pull of workers: it starts with 10 and create more http output workers when the http output queue length is greater than 10. The number of workers created (N) is equal to the queue length at the time which it is checked and found to have a length greater than 10. The queue length is checked every time a message is written to the http output queue. No more workers will be spawned until that request to spawn N workers is satisfied. If a dynamic worker cannot process a message at that time, it will sleep for 100 milliseconds. If a dynamic worker cannot process a message for 2 seconds it dies.
By default Gor creates dynamic pull of workers: it starts with 10 and create more http output workers when the http output queue length is greater than 10. The number of workers created (N) is equal to the queue length at the time which it is checked and found to have a length greater than 10. The queue length is checked every time a message is written to the http output queue. No more workers will be spawned until that request to spawn N workers is satisfied. If a dynamic worker cannot process a message at that time, it will sleep for 100 milliseconds. If a dynamic worker cannot process a message for 2 seconds it dies.
You may specify fixed number of workers using `--output-http-workers=20` option.

### Follow redirects
Expand Down Expand Up @@ -149,7 +149,7 @@ gor --input-raw :80 --output-http "http://staging.server" \
```

### Rewriting original request
Gor supports built-in basic rewriting support, for complex logic see https://github.com/buger/gor/pull/162
Gor supports some basic request rewriting support. For complex logic you can use middleware, see below.

#### Rewrite URL based on a mapping
```
Expand Down Expand Up @@ -177,6 +177,74 @@ Host header gets special treatment. By default Host get set to the value specifi

If you app accepts traffic from multiple domain, and you want to keep original headers, there is specific `--http-original-host` with tells Gor do not touch Host header at all.

### Middleware
Middleware is a program that accepts request and response payload at STDIN and emits modified requests at STDOUT. You can implement any custom logic like stripping private data, advanced rewriting, support for oAuth and etc.

```
Original request +--------------+
+-------------+----------STDIN---------->+ |
| Gor input | | Middleware |
+-------------+----------STDIN---------->+ |
Original response +------+---+---+
| ^
+-------------+ Modified request v |
| Gor output +<---------STDOUT-----------------+ |
+-----+-------+ |
| |
| Replayed response |
+------------------STDIN----------------->----+
```

Middleware can be written in any language, see `examples/middleware` folder for examples.
Middleware program should accept the fact that all communication with Gor is asynchronous, there is no guarantee that original request and response messages will come one after each other. Your app should take care of the state if logic depends on original or replayed response, see `examples/middleware/token_modifier.go` as example.

Simple bash echo middleware (returns same request) will look like this:
```bash
while read line; do
echo $line
end
```
Middleware can be enabled using `--middleware` option, by specifying path to executable file:
```
gor --input-raw :80 --middleware "/opt/middleware_executable" --output-http "http://staging.server"
```
#### Communication protocol
All messages should be hex encoded, new line character specifieds the end of the message, eg. new message per line.
Decoded payload consist of 2 parts: header and HTTP payload, separated by new line character.
Example request payload:
```
1 932079936fa4306fc308d67588178d17d823647c 1439818823587396305
GET /a HTTP/1.1
Host: 127.0.0.1

```
Example response payload:
```
2 8e091765ae902fef8a2b7d9dd960e9d52222bd8c 2782013
HTTP/1.1 200 OK
Date: Mon, 17 Aug 2015 13:40:23 GMT
Content-Length: 0
Content-Type: text/plain; charset=utf-8

```
Header contains request meta information separated by spaces. First value is payload type, possible values: `1` - request, `2` - original response, `3` - replayed response.
Next goes request id: unique among all requests (sha1 of time and Ack), but remain same for original and replayed response, so you can create associations between request and responses. Third argument varies depending on payload type: for request - start time, for responses - round-trip time.
HTTP payload is unmodified HTTP requests/responses intercepted from network. You can read more about request format [here](http://www.jmarshall.com/easy/http/), [here](https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol) and [here](http://www.w3.org/Protocols/rfc2616/rfc2616.html). You can operate with payload as you want, add headers, change path, and etc. Basically you just editing a string, just ensure that it is RCF compliant.
At the end modified (or untouched) request should be emitted back to STDOUT, keeping original header, and hex-encoded. If you want to filter request, just not send it. Emitting responses back is required, even if you did not touch them.
#### Advanced example
Imagine that you have auth system that randomly generate access tokens, which used later for accessing secure content. Since there is no pre-defined token value, naive approach without middleware (or if middleware use only request payloads) will fail, because replayed server have own tokens, not synced with origin. To fix this, our middleware should take in account responses of replayed and origin server, store `originalToken -> replayedToken` aliases and rewrite all requests using this token to use replayed alias. See `examples/middleware/token_modifier.go` and `middleware_test.go#TestTokenMiddleware` as example of described scheme.
### Saving requests to file and replaying them
You can save requests to file, and replay them later:
```
Expand Down
53 changes: 44 additions & 9 deletions emitter.go
Original file line number Diff line number Diff line change
@@ -1,21 +1,39 @@
package main

import (
"bytes"
"io"
"time"
)

// Start initialize loop for sending data from inputs to outputs
func Start(stop chan int) {
for _, in := range Plugins.Inputs {
go CopyMulty(in, Plugins.Outputs...)
if Settings.middleware != "" {
middleware := NewMiddleware(Settings.middleware)

for _, in := range Plugins.Inputs {
middleware.ReadFrom(in)
}

// We going only to read responses, so using same ReadFrom method
for _, out := range Plugins.Outputs {
if r, ok := out.(io.Reader); ok {
middleware.ReadFrom(r)
}
}

go CopyMulty(middleware, Plugins.Outputs...)
} else {
for _, in := range Plugins.Inputs {
go CopyMulty(in, Plugins.Outputs...)
}
}

for {
select {
case <-stop:
return
case <-time.After(1 * time.Second):
case <-time.After(time.Second):
}
}
}
Expand All @@ -28,20 +46,37 @@ func CopyMulty(src io.Reader, writers ...io.Writer) (err error) {

for {
nr, er := src.Read(buf)

if nr > 0 && len(buf) > nr {
payload := buf[0:nr]

if modifier != nil {
payload = modifier.Rewrite(payload)
_maxN := nr
if nr > 500 {
_maxN = 500
}

if Settings.debug {
Debug("[EMITTER] input:", string(payload[0:_maxN]))
}

if modifier != nil && isRequestPayload(payload) {
headSize := bytes.IndexByte(payload, '\n') + 1
body := payload[headSize:]
originalBodyLen := len(body)
body = modifier.Rewrite(body)

// If modifier tells to skip request
if len(payload) == 0 {
if len(body) == 0 {
continue
}
}

if Settings.debug {
Debug("[EMITTER] Sending payload, size:", len(payload), "First 500 bytes:", string(payload[0:500]))
if originalBodyLen != len(body) {
payload = append(payload[:headSize], body...)
}

if Settings.debug {
Debug("[EMITTER] Rewrittern input:", len(payload), "First 500 bytes:", string(payload[0:_maxN]))
}
}

if Settings.splitOutput {
Expand Down
30 changes: 30 additions & 0 deletions examples/middleware/echo.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;

import org.apache.commons.io.IOUtils;

public class echo {
public static void main(String[] args) {
if(args != null){
for(String arg : args){
System.out.println(arg);
}

}

BufferedReader stdin = new BufferedReader(new InputStreamReader(
System.in));
String line = null;

try {
while ((line = stdin.readLine()) != null) {

System.out.println(line);

}
} catch (IOException e) {
IOUtils.closeQuietly(stdin);
}
}
}
15 changes: 15 additions & 0 deletions examples/middleware/echo.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/usr/bin/env ruby
# encoding: utf-8
while data = STDIN.gets
next unless data
data = data.chomp

decoded = [data].pack("H*")
encoded = decoded.unpack("H*").first

STDOUT.puts encoded

STDERR.puts "[DEBUG][MIDDLEWARE] Original data: #{data}"
STDERR.puts "[DEBUG][MIDDLEWARE] Decoded request: #{decoded}"
STDERR.puts "[DEBUG][MIDDLEWARE] Encoded data: #{encoded}"
end
42 changes: 42 additions & 0 deletions examples/middleware/echo.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/usr/bin/env bash
#
# `xxd` utility included into vim-common package
# It allow hex decoding/encoding

function log {
# Logging to stderr, because stdout/stdin used for data transfer
>&2 echo "[DEBUG][ECHO] $1"
}

while read line; do
decoded=$(echo -e "$line" | xxd -r -p)

header=$(echo -e "$decoded" | head -n +1)
payload=$(echo -e "$decoded" | tail -n +2)

encoded=$(echo -e "$header\n$payload" | xxd -p | tr -d "\\n")

log ""
log "==================================="

case ${header:0:1} in
"1")
log "Request type: Request"
;;
"2")
log "Request type: Original Response"
;;
"3")
log "Request type: Replayed Response"
;;
*)
log "Unknown request type $header"
esac
echo "$encoded"

log "==================================="

log "Original data: $line"
log "Decoded request: $decoded"
log "Encoded data: $encoded"
done;
Loading

0 comments on commit 1fea397

Please sign in to comment.