Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions services/mcp-tools/internal/domain/search/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,6 @@ type FetchWebpageRequest struct {
type FetchWebpageResponse struct {
Text string `json:"text"`
Metadata map[string]any `json:"metadata"`
Status string `json:"status,omitempty"` // "success", "partial", or "failed"
Error string `json:"error,omitempty"` // Error message if scrape failed
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ type Config struct {

// HTTP Client Performance
SerperHTTPTimeout int `env:"SERPER_HTTP_TIMEOUT" envDefault:"15"`
SerperScrapeTimeout int `env:"SERPER_SCRAPE_TIMEOUT" envDefault:"30"` // Separate longer timeout for scrape operations
SerperMaxConnsPerHost int `env:"SERPER_MAX_CONNS_PER_HOST" envDefault:"50"`
SerperMaxIdleConns int `env:"SERPER_MAX_IDLE_CONNS" envDefault:"100"`
SerperIdleConnTimeout int `env:"SERPER_IDLE_CONN_TIMEOUT" envDefault:"90"`
Expand Down
63 changes: 58 additions & 5 deletions services/mcp-tools/internal/infrastructure/search/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ type ClientConfig struct {

// HTTP Client Settings
HTTPTimeout time.Duration
ScrapeTimeout time.Duration // Separate timeout for scrape operations (typically longer)
MaxConnsPerHost int
MaxIdleConns int
IdleConnTimeout time.Duration
Expand All @@ -63,6 +64,7 @@ type ClientConfig struct {
type SearchClient struct {
cfg ClientConfig
serperClient *resty.Client
scrapeClient *resty.Client // Separate client for scrape with longer timeout
fallbackClient *resty.Client
searxClient *resty.Client
retryConfig RetryConfig
Expand Down Expand Up @@ -114,9 +116,26 @@ func NewSearchClient(cfg ClientConfig) *SearchClient {
SetRetryCount(0).
SetTransport(transport)

// Scrape client with longer timeout (default 30s if not configured)
scrapeTimeout := cfg.ScrapeTimeout
if scrapeTimeout == 0 {
scrapeTimeout = 30 * time.Second
}
scrapeHTTP := resty.New().
SetHeader("User-Agent", "Jan-MCP-Tools/1.0").
SetTimeout(scrapeTimeout).
SetRetryCount(0).
SetTransport(transport)

// Fallback client with browser-like headers to avoid basic bot detection
fallbackHTTP := resty.New().
SetHeader("User-Agent", "Jan-MCP-Tools-Fallback/1.0").
SetTimeout(10 * time.Second).
SetHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36").
SetHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8").
SetHeader("Accept-Language", "en-US,en;q=0.5").
SetHeader("Accept-Encoding", "gzip, deflate").
SetHeader("Connection", "keep-alive").
SetHeader("Upgrade-Insecure-Requests", "1").
SetTimeout(15 * time.Second).
SetRetryCount(0)

searxHTTP := resty.New().
Expand Down Expand Up @@ -160,6 +179,7 @@ func NewSearchClient(cfg ClientConfig) *SearchClient {
return &SearchClient{
cfg: cfg,
serperClient: serperHTTP,
scrapeClient: scrapeHTTP,
fallbackClient: fallbackHTTP,
searxClient: searxHTTP,
retryConfig: retryConfig,
Expand Down Expand Up @@ -206,13 +226,46 @@ func (c *SearchClient) Search(ctx context.Context, query domainsearch.SearchRequ
}

// FetchWebpage scrapes a webpage either via Serper's scrape API or a fallback HTTP fetcher.
// Returns a response with status indicating success/failure - graceful degradation instead of errors.
func (c *SearchClient) FetchWebpage(ctx context.Context, query domainsearch.FetchWebpageRequest) (*domainsearch.FetchWebpageResponse, error) {
var serperErr, fallbackErr error

if c.hasAPIKey() {
if res, err := c.fetchViaSerper(ctx, query); err == nil {
res.Status = "success"
return res, nil
} else {
serperErr = err
}
}
return c.fetchFallback(ctx, query)

// Try fallback
if res, err := c.fetchFallback(ctx, query); err == nil {
res.Status = "success"
return res, nil
} else {
fallbackErr = err
}

// Both failed - return graceful degradation response
errMsg := "scrape failed"
if serperErr != nil && fallbackErr != nil {
errMsg = fmt.Sprintf("serper: %v; fallback: %v", serperErr, fallbackErr)
} else if fallbackErr != nil {
errMsg = fallbackErr.Error()
} else if serperErr != nil {
errMsg = serperErr.Error()
Comment on lines +251 to +257
Copy link

Copilot AI Dec 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The error message construction logic is redundant and can be simplified. The else-if chain at lines 254-257 will never execute because if both errors are nil, the function would have already returned successfully. The logic can be simplified to just check if both errors exist and construct the message accordingly, without the unnecessary else-if branches.

Suggested change
errMsg := "scrape failed"
if serperErr != nil && fallbackErr != nil {
errMsg = fmt.Sprintf("serper: %v; fallback: %v", serperErr, fallbackErr)
} else if fallbackErr != nil {
errMsg = fallbackErr.Error()
} else if serperErr != nil {
errMsg = serperErr.Error()
// At this point, fallbackErr is guaranteed to be non-nil because a successful
// fallback would have returned earlier.
errMsg := fallbackErr.Error()
if serperErr != nil {
errMsg = fmt.Sprintf("serper: %v; fallback: %v", serperErr, fallbackErr)

Copilot uses AI. Check for mistakes.
}

return &domainsearch.FetchWebpageResponse{
Text: "",
Status: "failed",
Error: errMsg,
Metadata: map[string]any{
"url": query.Url,
"reason": "Unable to scrape content from this URL",
},
}, nil
}

func (c *SearchClient) enrichQuery(query domainsearch.SearchRequest) domainsearch.SearchRequest {
Expand Down Expand Up @@ -482,10 +535,10 @@ func (c *SearchClient) fetchViaSerper(ctx context.Context, query domainsearch.Fe
body["includeMarkdown"] = *query.IncludeMarkdown
}

// Retry with exponential backoff
// Retry with exponential backoff - use dedicated scrape client with longer timeout
result, err := WithRetry(ctx, c.retryConfig, "serper_scrape", func() (*domainsearch.FetchWebpageResponse, error) {
var res domainsearch.FetchWebpageResponse
resp, err := c.serperClient.R().
resp, err := c.scrapeClient.R().
SetContext(ctx).
SetHeader("X-API-KEY", c.cfg.SerperAPIKey).
SetHeader("Content-Type", "application/json").
Expand Down
42 changes: 29 additions & 13 deletions services/mcp-tools/internal/infrastructure/search/retry.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,12 @@ import (

// RetryConfig defines retry behavior for search operations
type RetryConfig struct {
MaxAttempts int
InitialDelay time.Duration
MaxDelay time.Duration
BackoffFactor float64
RetryableErrors []string
MaxAttempts int
InitialDelay time.Duration
MaxDelay time.Duration
BackoffFactor float64
RetryableErrors []string
NonRetryableErrors []string // Errors that should never be retried
}

// DefaultRetryConfig returns sensible defaults for retry behavior
Expand All @@ -36,6 +37,12 @@ func DefaultRetryConfig() RetryConfig {
"503", // Service unavailable
"504", // Gateway timeout
},
NonRetryableErrors: []string{
"403", // Forbidden - anti-bot protection, won't succeed on retry
"401", // Unauthorized - auth issue, won't succeed on retry
"404", // Not found - page doesn't exist
"410", // Gone - resource permanently removed
},
}
}

Expand All @@ -59,9 +66,9 @@ func WithRetry[T any](ctx context.Context, cfg RetryConfig, operation string, fn
}

lastErr = err

// Check if error is retryable
if !isRetryable(err, cfg.RetryableErrors) {
if !isRetryable(err, cfg) {
log.Debug().
Err(err).
Str("operation", operation).
Expand Down Expand Up @@ -113,17 +120,26 @@ func calculateBackoff(attempt int, initial, max time.Duration, factor float64) t
}

// isRetryable checks if an error should trigger a retry
func isRetryable(err error, retryableErrors []string) bool {
func isRetryable(err error, cfg RetryConfig) bool {
if err == nil {
return false
}

errStr := err.Error()
for _, pattern := range retryableErrors {
if strings.Contains(strings.ToLower(errStr), strings.ToLower(pattern)) {

errStr := strings.ToLower(err.Error())

// Check non-retryable errors first - these should never be retried
for _, pattern := range cfg.NonRetryableErrors {
if strings.Contains(errStr, strings.ToLower(pattern)) {
return false
}
}

// Check if error matches known retryable patterns
for _, pattern := range cfg.RetryableErrors {
if strings.Contains(errStr, strings.ToLower(pattern)) {
return true
}
}

return false
}
1 change: 1 addition & 0 deletions services/mcp-tools/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ func main() {
CBTimeout: time.Duration(cfg.SerperCBTimeout) * time.Second,
CBMaxHalfOpen: cfg.SerperCBMaxHalfOpen,
HTTPTimeout: time.Duration(cfg.SerperHTTPTimeout) * time.Second,
ScrapeTimeout: time.Duration(cfg.SerperScrapeTimeout) * time.Second,
MaxConnsPerHost: cfg.SerperMaxConnsPerHost,
MaxIdleConns: cfg.SerperMaxIdleConns,
IdleConnTimeout: time.Duration(cfg.SerperIdleConnTimeout) * time.Second,
Expand Down
Loading