tidyverse · hadley · Nov 4, 2025 · Nov 6, 2025 · Nov 6, 2025 · Nov 6, 2025
diff --git a/.claude/settings.local.json b/.claude/settings.local.json
@@ -6,7 +6,7 @@
       "Bash(R:*)",
       "Bash(rm:*)",
       "Bash(air format:*)",
-      "Edit(/**)",
+      "Edit(**)"
     ],
     "deny": []
   }

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -93,17 +93,20 @@ Collate:
     'provider-aws.R'
     'provider-openai-compatible.R'
     'provider-azure.R'
+    'provider-claude-tools.R'
     'provider-claude.R'
     'provider-google.R'
     'provider-cloudflare.R'
     'provider-databricks.R'
     'provider-deepseek.R'
     'provider-github.R'
+    'provider-google-tools.R'
     'provider-google-upload.R'
     'provider-groq.R'
     'provider-huggingface.R'
     'provider-mistral.R'
     'provider-ollama.R'
+    'provider-openai-tools.R'
     'provider-openai.R'
     'provider-openrouter.R'
     'provider-perplexity.R'

diff --git a/NAMESPACE b/NAMESPACE
@@ -60,6 +60,8 @@ export(claude_file_download)
 export(claude_file_get)
 export(claude_file_list)
 export(claude_file_upload)
+export(claude_tool_web_fetch)
+export(claude_tool_web_search)
 export(content_image_file)
 export(content_image_plot)
 export(content_image_url)
@@ -72,6 +74,8 @@ export(contents_replay)
 export(contents_text)
 export(create_tool_def)
 export(df_schema)
+export(google_tool_web_fetch)
+export(google_tool_web_search)
 export(google_upload)
 export(has_credentials)
 export(interpolate)
@@ -90,6 +94,7 @@ export(models_ollama)
 export(models_openai)
 export(models_portkey)
 export(models_vllm)
+export(openai_tool_web_search)
 export(parallel_chat)
 export(parallel_chat_structured)
 export(parallel_chat_text)

diff --git a/NEWS.md b/NEWS.md
@@ -32,6 +32,11 @@
 * `parallel_chat_structured()` no longer errors if some results fail to parse. Instead it warns, and the corresponding rows will be filled in with the appropriate missing values (#628).
 * `parallel_chat_structured()` now returns a tibble, since this does a better job of printing more complex data frames (#787).
 * `params()` gains new `reasoning_effort` and `reasoning_tokens` so you can control the amount of effort a model spends on thinking. Initial support is provided for `chat_claude()`, `chat_google_gemini()`, and `chat_openai()` (#720).
+* ellmer now supports a variety of built-in web search and fetch tools (#578):
+  - `claude_tool_web_search()` and `claude_tool_web_fetch()` for Claude.
+  - `google_tool_web_search()` and `google_tool_web_fetch()` for Gemini.
+  - `openai_tool_web_search()` for OpenAI.
+  If you want to do web fetch for other providers, you could use `btw::btw_tool_web_read_url()`.* `AssistantTurn`s now have a `@duration` slot, containing the total time to complete the request (@simonpcouch, #798).
 * The following deprecated functions/arguments/methods have now been removed:
   * `Chat$extract_data()` -> `chat$chat_structured()` (0.2.0)
   * `Chat$extract_data_async()` -> `chat$chat_structured_async()` (0.2.0)

diff --git a/R/provider-claude-tools.R b/R/provider-claude-tools.R
@@ -0,0 +1,100 @@
+#' Claude web search tool
+#'
+#' @description
+#' Enables Claude to search the web for up-to-date information. Your organization
+#' administrator must enable web search in the Anthropic Console before using
+#' this tool, as it costs extra ($10 per 1,000 tokens at time of writing).
+#'
+#' Learn more in <https://docs.claude.com/en/docs/agents-and-tools/tool-use/web-search-tool>.
+#'
+#' @param max_uses Integer. Maximum number of searches allowed per request.
+#' @param allowed_domains Character vector. Restrict searches to specific domains
+#'   (e.g., `c("nytimes.com", "bbc.com")`). Cannot be used with `blocked_domains`.
+#' @param blocked_domains Character vector. Exclude specific domains from searches.
+#'   Cannot be used with `allowed_domains`.
+#' @param user_location List with optional elements: `country` (2-letter code),
+#'   `city`, `region`, and `timezone` (IANA timezone) to localize search results.
+#'
+#' @family built-in tools
+#' @export
+#' @examples
+#' \dontrun{
+#' chat <- chat_claude()
+#' chat$register_tool(claude_tool_web_search())
+#' chat$chat("What was in the news today?")
+#' chat$chat("What's the biggest news in the economy?")
+#' }
+claude_tool_web_search <- function(
+  max_uses = NULL,
+  allowed_domains = NULL,
+  blocked_domains = NULL,
+  user_location = NULL
+) {
+  check_exclusive(allowed_domains, blocked_domains, .require = FALSE)
+
+  check_number_whole(max_uses, allow_null = TRUE, min = 1)
+  check_character(allowed_domains, allow_null = TRUE)
+  check_character(blocked_domains, allow_null = TRUE)
+
+  json <- compact(list(
+    name = "web_search",
+    type = "web_search_20250305",
+    max_uses = max_uses,
+    allowed_domains = allowed_domains,
+    blocked_domains = blocked_domains,
+    user_location = user_location
+  ))
+  ToolBuiltIn("web_search", json = json)
+}
+
+#' Claude web fetch tool
+#'
+#' @description
+#' Enables Claude to fetch and analyze content from web URLs. Claude can only
+#' fetch URLs that appear in the conversation context (user messages or
+#' previous tool results). For security reasons, Claude cannot dynamically
+#' construct URLs to fetch.
+#'
+#' Requires the `web-fetch-2025-09-10` beta header.
+#' Learn more in <https://docs.claude.com/en/docs/agents-and-tools/tool-use/web-fetch-tool>.
+#'
+#' @param max_uses Integer. Maximum number of fetches allowed per request.
+#' @param allowed_domains Character vector. Restrict fetches to specific domains.
+#'   Cannot be used with `blocked_domains`.
+#' @param blocked_domains Character vector. Exclude specific domains from fetches.
+#'   Cannot be used with `allowed_domains`.
+#' @param citations Logical. Whether to include citations in the response. Default is `TRUE`.
+#' @param max_content_tokens Integer. Maximum number of tokens to fetch from each URL.
+#'
+#' @family built-in tools
+#' @export
+#' @examples
+#' \dontrun{
+#' chat <- chat_claude(beta_headers = "web-fetch-2025-09-10")
+#' chat$register_tool(claude_tool_web_fetch())
+#' chat$chat("What are the latest package releases on https://tidyverse.org/blog")
+#' }
+claude_tool_web_fetch <- function(
+  max_uses = NULL,
+  allowed_domains = NULL,
+  blocked_domains = NULL,
+  citations = FALSE,
+  max_content_tokens = NULL
+) {
+  check_exclusive(allowed_domains, blocked_domains, .require = FALSE)
+
+  check_character(allowed_domains, allow_null = TRUE)
+  check_character(blocked_domains, allow_null = TRUE)
+  check_bool(citations)
+
+  json <- compact(list(
+    name = "web_fetch",
+    type = "web_fetch_20250910",
+    max_uses = max_uses,
+    allowed_domains = allowed_domains,
+    blocked_domains = blocked_domains,
+    citations = list(enabled = citations),
+    max_content_tokens = max_content_tokens
+  ))
+  ToolBuiltIn("web_fetch", json)
+}
diff --git a/R/provider-claude.R b/R/provider-claude.R
@@ -313,6 +313,12 @@ method(stream_merge_chunks, ProviderAnthropic) <- function(
     result$stop_reason <- chunk$delta$stop_reason
     result$stop_sequence <- chunk$delta$stop_sequence
     result$usage$output_tokens <- chunk$usage$output_tokens
+  } else if (chunk$delta$type == "citations_delta") {
+    # https://docs.claude.com/en/docs/build-with-claude/citations#streaming-support
+    result$content[[i]]$citations <- c(
+      result$content[[i]]$citations,
+      list(chunk$delta$citation)
+    )
   } else if (chunk$type == "error") {
     if (chunk$error$type == "overloaded_error") {
       # https://docs.anthropic.com/en/api/messages-streaming#error-events
@@ -355,6 +361,30 @@ method(value_turn, ProviderAnthropic) <- function(
         }
         ContentToolRequest(content$id, content$name, content$input)
       }
+    } else if (content$type == "server_tool_use") {
+      if (content$name == "web_search") {
+        # https://docs.claude.com/en/docs/agents-and-tools/tool-use/web-search-tool#response
+        ContentToolRequestSearch(
+          query = content$input$query,
+          json = content
+        )
+      } else if (content$name == "web_fetch") {
+        # https://docs.claude.com/en/docs/agents-and-tools/tool-use/web-fetch-tool#response
+        ContentToolRequestFetch(
+          url = content$input$url,
+          json = content
+        )
+      } else {
+        cli::cli_abort("Unknown server tool {.str {content$name}}.")
+      }
+    } else if (content$type == "web_search_tool_result") {
+      urls <- map_chr(content$content, \(x) x$url)
+      ContentToolResponseSearch(
+        url = urls,
+        json = content
+      )
+    } else if (content$type == "web_fetch_tool_result") {
+      ContentToolResponseFetch(url = content$url %||% "failed", json = content)
     } else if (content$type == "thinking") {
       ContentThinking(
         content$thinking,

diff --git a/R/provider-google-tools.R b/R/provider-google-tools.R
@@ -0,0 +1,43 @@
+#' Google web search (grounding) tool
+#'
+#' @description
+#' Enables Gemini models to search the web for up-to-date information and ground
+#' responses with citations to sources. The model automatically decides when
+#' (and how) to search the web based on your prompt. Search results are
+#' incorporated into the response with grounding metadata including source
+#' URLs and titles.
+#'
+#' Learn more in <https://ai.google.dev/gemini-api/docs/google-search>.
+#'
+#' @family built-in tools
+#' @export
+#' @examples
+#' \dontrun{
+#' chat <- chat_google_gemini()
+#' chat$register_tool(google_tool_web_search())
+#' chat$chat("What was in the news today?")
+#' chat$chat("What's the biggest news in the economy?")
+#' }
+google_tool_web_search <- function() {
+  ToolBuiltIn("web_search", list(google_search = set_names(list())))
+}
+
+#' Google URL fetch tool
+#'
+#' @description
+#' When this tool is enabled, you can include URLs directly in your prompts and
+#' Gemini will fetch and analyze the content.
+#'
+#' Learn more in <https://ai.google.dev/gemini-api/docs/url-context>.
+#'
+#' @family built-in tools
+#' @export
+#' @examples
+#' \dontrun{
+#' chat <- chat_google_gemini()
+#' chat$register_tool(google_tool_web_fetch())
+#' chat$chat("What are the latest package releases on https://tidyverse.org/blog?")
+#' }
+google_tool_web_fetch <- function() {
+  ToolBuiltIn(name = "web_fetch", json = list(url_context = set_names(list())))
+}
diff --git a/R/provider-google.R b/R/provider-google.R
@@ -222,8 +222,13 @@ method(chat_body, ProviderGoogleGemini) <- function(
 
   # https://ai.google.dev/api/caching#Tool
   if (length(tools) > 0) {
+    is_builtin <- map_lgl(tools, \(tool) S7_inherits(tool, ToolBuiltIn))
     funs <- as_json(provider, unname(tools))
-    tools <- list(functionDeclarations = funs)
+
+    tools <- c(
+      compact(list(functionDeclarations = funs[!is_builtin])),
+      unlist(funs[is_builtin], recursive = FALSE)
+    )
   } else {
     tools <- NULL
   }
@@ -279,12 +284,26 @@ method(stream_merge_chunks, ProviderGoogleGemini) <- function(
 }
 
 method(value_tokens, ProviderGoogleGemini) <- function(provider, json) {
+  # https://ai.google.dev/api/generate-content#UsageMetadata
   usage <- json$usageMetadata
+
+  # Total token count for the generation request (prompt + response candidates).
+  # Not documented, but appears to include thinking and tool use, i.e.
+  # usage$promptTokenCount + usage$candidatesTokenCount +
+  #  usage$toolUsePromptTokenCount + usage$thoughtsTokenCount ==
+  #  usage$totalTokenCount
+  total <- usage$totalTokenCount %||% 0
+
+  # Number of tokens in the prompt. When cachedContent is set, this is
+  # still the total effective prompt size meaning this includes the number
+  # of tokens in the cached content.
+  input <- usage$promptTokenCount %||% 0
+
   cached <- usage$cachedContentTokenCount %||% 0
 
   tokens(
-    input = (usage$promptTokenCount %||% 0) + -cached,
-    output = usage$candidatesTokenCount + (usage$thoughtsTokenCount %||% 0),
+    input = input - cached,
+    output = total - input,
     cached_input = cached
   )
 }

diff --git a/R/provider-openai-tools.R b/R/provider-openai-tools.R
@@ -0,0 +1,49 @@
+#' OpenAI web search tool
+#'
+#' @description
+#' Enables OpenAI models to search the web for up-to-date information. The search
+#' behavior varies by model: non-reasoning models perform simple searches, while
+#' reasoning models can perform agentic, iterative searches.
+#'
+#' Learn more at <https://platform.openai.com/docs/guides/tools-web-search>
+#'
+#' @param allowed_domains Character vector. Restrict searches to specific domains
+#'   (e.g., `c("nytimes.com", "bbc.com")`). Maximum 20 domains. URLs will be
+#'   automatically cleaned (http/https prefixes removed).
+#' @param user_location List with optional elements: `country` (2-letter ISO code),
+#'   `city`, `region`, and `timezone` (IANA timezone) to localize search results.
+#' @param external_web_access Logical. Whether to allow live internet access
+#'   (`TRUE`, default) or use only cached/indexed results (`FALSE`).
+#'
+#' @family built-in tools
+#' @export
+#' @examples
+#' \dontrun{
+#' chat <- chat_openai()
+#' chat$register_tool(openai_tool_web_search())
+#' chat$chat("Very briefly summarise the top 3 news stories of the day")
+#' chat$chat("Of those stories, which one do you think was the most interesting?")
+#' }
+openai_tool_web_search <- function(
+  allowed_domains = NULL,
+  user_location = NULL,
+  external_web_access = TRUE
+) {
+  check_character(allowed_domains, allow_null = TRUE)
+  check_bool(external_web_access)
+
+  # Strip http/https from domains
+  if (!is.null(allowed_domains)) {
+    allowed_domains <- sub("^https?://", "", allowed_domains)
+  }
+
+  json <- compact(list(
+    type = "web_search",
+    filters = if (!is.null(allowed_domains)) {
+      list(allowed_domains = allowed_domains)
+    },
+    user_location = user_location,
+    external_web_access = external_web_access
+  ))
+  ToolBuiltIn("web_search", json)
+}
diff --git a/R/provider-openai.R b/R/provider-openai.R
@@ -305,9 +305,13 @@ method(value_turn, ProviderOpenAI) <- function(
         "unknown"
       )
       ContentImageInline(mime_type, output$result)
+    } else if (output$type == "web_search_call") {
+      # https://platform.openai.com/docs/guides/tools-web-search#output-and-citations
+      ContentToolRequestSearch(query = output$action$query, json = output)
     } else {
+      browser()
       cli::cli_abort(
-        "Unknown content type {.str {content$type}}.",
+        "Unknown content type {.str {output$type}}.",
         .internal = TRUE
       )
     }