Skip to content

Commit

Permalink
Add custom check endpoint
Browse files Browse the repository at this point in the history
Replace previously hardcoded value for /health to check when the server became ready to serve traffic. With this the server can support any server that provides an an OpenAI compatible inference endpoint.
  • Loading branch information
mostlygeek authored Oct 12, 2024
2 parents 5a57688 + 8eb5b7b commit 6cf0962
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 11 deletions.
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,13 @@ models:
- "gpt-4o-mini"
- "gpt-3.5-turbo"

# wait for this path to return an HTTP 200 before serving requests
# defaults to /health to match llama.cpp
#
# use "none" to skip endpoint checking. This may cause requests to fail
# until the server is ready
checkEndpoint: "/custom-endpoint"

"qwen":
# environment variables to pass to the command
env:
Expand Down
8 changes: 8 additions & 0 deletions config.example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ models:
# list of model name aliases this llama.cpp instance can serve
aliases:
- "gpt-4o-mini"

# check this path for a HTTP 200 response for the server to be ready
checkEndpoint: "/health"

"qwen":
cmd: "models/llama-server-osx --port 8999 -m models/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf"
proxy: "http://127.0.0.1:8999"
Expand All @@ -24,6 +28,10 @@ models:
cmd: "build/simple-responder --port 8999"
proxy: "http://127.0.0.1:8999"

# use "none" to skip check. Caution this may cause some requests to fail
# until the upstream server is ready for traffic
checkEndpoint: "none"

# don't use this, just for testing if things are broken
"broken":
cmd: "models/llama-server-osx --port 8999 -m models/doesnotexist.gguf"
Expand Down
2 changes: 1 addition & 1 deletion llama-swap.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ func main() {
proxyManager := proxy.New(config)
http.HandleFunc("/", proxyManager.HandleFunc)

fmt.Println("llamagate listening on " + *listenStr)
fmt.Println("llama-swap listening on " + *listenStr)
if err := http.ListenAndServe(*listenStr, nil); err != nil {
fmt.Printf("Error starting server: %v\n", err)
os.Exit(1)
Expand Down
9 changes: 5 additions & 4 deletions proxy/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@ import (
)

type ModelConfig struct {
Cmd string `yaml:"cmd"`
Proxy string `yaml:"proxy"`
Aliases []string `yaml:"aliases"`
Env []string `yaml:"env"`
Cmd string `yaml:"cmd"`
Proxy string `yaml:"proxy"`
Aliases []string `yaml:"aliases"`
Env []string `yaml:"env"`
CheckEndpoint string `yaml:"checkEndpoint"`
}

type Config struct {
Expand Down
25 changes: 19 additions & 6 deletions proxy/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"fmt"
"io"
"net/http"
"net/url"
"os"
"os/exec"
"strings"
Expand Down Expand Up @@ -89,11 +90,23 @@ func (pm *ProxyManager) checkHealthEndpoint() error {
return fmt.Errorf("no upstream available to check /health")
}

proxyTo := pm.currentConfig.Proxy
checkEndpoint := strings.TrimSpace(pm.currentConfig.CheckEndpoint)

maxDuration := time.Second * time.Duration(pm.config.HealthCheckTimeout)
if checkEndpoint == "none" {
return nil
}

// keep default behaviour
if checkEndpoint == "" {
checkEndpoint = "/health"
}

healthURL := proxyTo + "/health"
proxyTo := pm.currentConfig.Proxy
maxDuration := time.Second * time.Duration(pm.config.HealthCheckTimeout)
healthURL, err := url.JoinPath(proxyTo, checkEndpoint)
if err != nil {
return fmt.Errorf("failed to create health url with with %s and path %s", proxyTo, checkEndpoint)
}
client := &http.Client{}
startTime := time.Now()

Expand All @@ -112,12 +125,12 @@ func (pm *ProxyManager) checkHealthEndpoint() error {
// if TCP dial can't connect any HTTP response after 5 seconds
// exit quickly.
if time.Since(startTime) > 5*time.Second {
return fmt.Errorf("/healthy endpoint took more than 5 seconds to respond")
return fmt.Errorf("health check endpoint took more than 5 seconds to respond")
}
}

if time.Since(startTime) >= maxDuration {
return fmt.Errorf("failed to check /healthy from: %s", healthURL)
return fmt.Errorf("failed to check health from: %s", healthURL)
}
time.Sleep(time.Second)
continue
Expand All @@ -127,7 +140,7 @@ func (pm *ProxyManager) checkHealthEndpoint() error {
return nil
}
if time.Since(startTime) >= maxDuration {
return fmt.Errorf("failed to check /healthy from: %s", healthURL)
return fmt.Errorf("failed to check health from: %s", healthURL)
}
time.Sleep(time.Second)
}
Expand Down

0 comments on commit 6cf0962

Please sign in to comment.