From 753a7bbbd506ae7c22c6a6e1510e2883d8be6c13 Mon Sep 17 00:00:00 2001 From: Niko Maroulis Date: Sat, 13 Jun 2026 11:57:43 -0400 Subject: [PATCH] Honor HTTP/HTTPS proxy env vars in Hub (#53) LlamaCppEx.Hub uses Req -> Finch -> Mint, which (unlike curl/wget) does not read proxy environment variables. On proxy-only networks every HuggingFace request went direct and timed out. Add proxy_request_options/2, resolving a proxy from the :proxy option (URL string, Mint {scheme, host, port, opts} tuple, or false) or the standard env vars: HTTPS_PROXY/HTTP_PROXY (and lowercase) take precedence over ALL_PROXY, with NO_PROXY/:no_proxy host bypass. Supports basic-auth userinfo (redacted in logs). Wired into search, list_gguf_files, get_model_info, and the streaming download. SOCKS proxies are detected and skipped with an actionable warning, since Mint supports HTTP/1 CONNECT proxies only; documented in a new Proxies moduledoc section with the Privoxy/gost bridge workaround. Adds 20 TDD unit tests in test/hub_proxy_test.exs. --- lib/llama_cpp_ex/hub.ex | 183 ++++++++++++++++++++++++++++++++++++++-- test/hub_proxy_test.exs | 176 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 351 insertions(+), 8 deletions(-) create mode 100644 test/hub_proxy_test.exs diff --git a/lib/llama_cpp_ex/hub.ex b/lib/llama_cpp_ex/hub.ex index db783d5..c600d9b 100644 --- a/lib/llama_cpp_ex/hub.ex +++ b/lib/llama_cpp_ex/hub.ex @@ -36,6 +36,32 @@ defmodule LlamaCppEx.Hub do ## Offline Mode Set `LLAMA_OFFLINE=1` to use only cached files without network access. + + ## Proxies + + Requests honor the standard proxy environment variables automatically: + `HTTPS_PROXY`/`HTTP_PROXY` (and their lowercase forms), falling back to + `ALL_PROXY`, with `NO_PROXY` respected for host bypass. Because HuggingFace is + served over HTTPS, the `HTTPS_PROXY` value is the one that applies; an HTTP + proxy tunnels HTTPS via the `CONNECT` method. + + # honored automatically + export HTTPS_PROXY=http://127.0.0.1:8118 + + Override or disable proxying per call with the `:proxy` (a URL string, a Mint + `{scheme, address, port, opts}` tuple, or `false`) and `:no_proxy` options: + + LlamaCppEx.Hub.search("qwen3 gguf", proxy: "http://user:pass@127.0.0.1:8118") + LlamaCppEx.Hub.download("org/model", "model.gguf", proxy: false) + + ### SOCKS is not supported + + The underlying HTTP client (Req → Finch → Mint) supports HTTP/1 proxies only — + plain forwarding and HTTPS-over-`CONNECT` tunneling. It has **no SOCKS + support**, so a `socks5://` value (e.g. from `ALL_PROXY`) is ignored with a + warning. To use a SOCKS upstream, run a local HTTP-to-SOCKS bridge such as + [Privoxy](https://www.privoxy.org) or [gost](https://github.com/go-gost/gost) + and point `HTTPS_PROXY` at the bridge's HTTP port. """ require Logger @@ -58,6 +84,7 @@ defmodule LlamaCppEx.Hub do * `:sort` - Sort by `"downloads"`, `"likes"`, or `"lastModified"`. Defaults to `"downloads"`. * `:direction` - Sort direction, `-1` for descending. Defaults to `-1`. * `:token` - HuggingFace API token. + * `:proxy`, `:no_proxy` - Proxy overrides. See the "Proxies" section above. ## Examples @@ -81,7 +108,9 @@ defmodule LlamaCppEx.Hub do limit: limit ] - case Req.get(@hf_api_url, headers: headers, params: params) do + req_opts = [headers: headers, params: params] ++ proxy_request_options(@hf_api_url, opts) + + case Req.get(@hf_api_url, req_opts) do {:ok, %{status: 200, body: body}} when is_list(body) -> models = Enum.map(body, fn m -> @@ -122,6 +151,7 @@ defmodule LlamaCppEx.Hub do * `:token` - HuggingFace API token. Defaults to `HF_TOKEN` environment variable. * `:revision` - Git revision (branch, tag, or commit). Defaults to `"main"`. * `:force` - Force re-download even if cached. Defaults to `false`. + * `:proxy`, `:no_proxy` - Proxy overrides. See the "Proxies" section above. """ @spec download(String.t(), String.t(), keyword()) :: {:ok, String.t()} | {:error, String.t()} @@ -145,7 +175,7 @@ defmodule LlamaCppEx.Hub do true -> url = build_download_url(repo_id, filename, opts) headers = auth_headers(opts) - do_download_to(url, dest, headers) + do_download_to(url, dest, headers, proxy_request_options(url, opts)) end end end @@ -178,7 +208,7 @@ defmodule LlamaCppEx.Hub do url = "#{@hf_api_url}/#{repo_id}/tree/#{revision}" headers = auth_headers(opts) - case Req.get(url, headers: headers) do + case Req.get(url, [headers: headers] ++ proxy_request_options(url, opts)) do {:ok, %{status: 200, body: body}} when is_list(body) -> files = body @@ -225,7 +255,7 @@ defmodule LlamaCppEx.Hub do url = "#{@hf_api_url}/#{repo_id}" headers = auth_headers(opts) - case Req.get(url, headers: headers) do + case Req.get(url, [headers: headers] ++ proxy_request_options(url, opts)) do {:ok, %{status: 200, body: body}} -> {:ok, body} @@ -294,6 +324,31 @@ defmodule LlamaCppEx.Hub do end end + @doc false + # Builds the Req `connect_options` needed to route a request through a proxy. + # + # The proxy is resolved from the `:proxy` option (a URL string, a Mint + # `{scheme, address, port, opts}` tuple, or `false` to disable) and otherwise + # from the standard proxy environment variables. Returns `[]` when no usable + # proxy applies. SOCKS proxies are detected and skipped — see the "Proxies" + # section in the module doc for the reasoning and the workaround. + @spec proxy_request_options(String.t(), keyword()) :: keyword() + def proxy_request_options(url, opts \\ []) do + case Keyword.get(opts, :proxy, :auto) do + {scheme, host, port, proxy_opts} + when is_atom(scheme) and is_binary(host) and is_integer(port) and is_list(proxy_opts) -> + [connect_options: [proxy: {scheme, host, port, proxy_opts}]] + + proxy_setting -> + target = URI.parse(url) + + case resolve_proxy_url(proxy_setting, target.scheme) do + nil -> [] + proxy_url -> build_proxy_options(proxy_url, target.host, opts) + end + end + end + @doc """ Build the local cache path for a model file. """ @@ -323,14 +378,124 @@ defmodule LlamaCppEx.Hub do end end - defp do_download_to(url, dest, headers) do + # --- Proxy resolution --- + + defp resolve_proxy_url(false, _scheme), do: nil + defp resolve_proxy_url(url, _scheme) when is_binary(url), do: url + defp resolve_proxy_url(:auto, scheme), do: env_proxy_url(scheme) + + # Scheme-specific vars take precedence over the catch-all ALL_PROXY, so a + # usable HTTP proxy always wins over an (unusable) SOCKS ALL_PROXY. + defp env_proxy_url("https"), do: env_any(["HTTPS_PROXY", "https_proxy"]) || env_all_proxy() + defp env_proxy_url("http"), do: env_any(["HTTP_PROXY", "http_proxy"]) || env_all_proxy() + defp env_proxy_url(_other), do: env_all_proxy() + + defp env_all_proxy, do: env_any(["ALL_PROXY", "all_proxy"]) + + defp env_any(keys) do + Enum.find_value(keys, fn key -> + case System.get_env(key) do + nil -> nil + "" -> nil + value -> value + end + end) + end + + defp build_proxy_options(proxy_url, host, opts) do + no_proxy = Keyword.get(opts, :no_proxy) || env_any(["NO_PROXY", "no_proxy"]) || "" + + if bypass_proxy?(host, no_proxy) do + [] + else + case parse_proxy(proxy_url) do + {:ok, proxy, []} -> + [connect_options: [proxy: proxy]] + + {:ok, proxy, proxy_headers} -> + [connect_options: [proxy: proxy, proxy_headers: proxy_headers]] + + {:error, {:socks, scheme}} -> + Logger.warning( + "ignoring #{scheme} proxy #{redact_proxy(proxy_url)}: the HTTP client (Req/Finch/Mint) " <> + "supports HTTP/HTTPS CONNECT proxies only, not SOCKS. Run a local HTTP-to-SOCKS " <> + "bridge (e.g. Privoxy or gost) and point HTTPS_PROXY at it instead." + ) + + [] + + {:error, :invalid} -> + Logger.warning("ignoring malformed proxy URL #{redact_proxy(proxy_url)}") + [] + end + end + end + + defp bypass_proxy?(nil, _no_proxy), do: false + + defp bypass_proxy?(host, no_proxy) do + no_proxy + |> String.split(",", trim: true) + |> Enum.map(&String.trim/1) + |> Enum.any?(&host_matches_no_proxy?(host, &1)) + end + + defp host_matches_no_proxy?(_host, ""), do: false + defp host_matches_no_proxy?(_host, "*"), do: true + + defp host_matches_no_proxy?(host, entry) do + entry = String.trim_leading(entry, ".") + host == entry or String.ends_with?(host, "." <> entry) + end + + defp parse_proxy(proxy_url) do + uri = proxy_url |> normalize_proxy_url() |> URI.parse() + + case uri.scheme do + scheme when scheme in ["http", "https"] -> + proxy = {proxy_scheme_atom(scheme), uri.host, uri.port || default_proxy_port(scheme), []} + {:ok, proxy, proxy_auth_headers(uri.userinfo)} + + "socks" <> _ -> + {:error, {:socks, String.upcase(uri.scheme)}} + + _other -> + {:error, :invalid} + end + end + + # Proxy URLs from `ALL_PROXY` or a bare `host:port` option may omit the scheme. + defp normalize_proxy_url(url) do + if Regex.match?(~r{^[a-zA-Z][a-zA-Z0-9+.\-]*://}, url), do: url, else: "http://" <> url + end + + defp proxy_scheme_atom("http"), do: :http + defp proxy_scheme_atom("https"), do: :https + + defp default_proxy_port("http"), do: 80 + defp default_proxy_port("https"), do: 443 + + defp proxy_auth_headers(nil), do: [] + + defp proxy_auth_headers(userinfo), + do: [{"proxy-authorization", "Basic " <> Base.encode64(userinfo)}] + + # Strips credentials before a proxy URL is written to the log. + defp redact_proxy(proxy_url) do + case URI.parse(normalize_proxy_url(proxy_url)) do + %URI{userinfo: nil} = uri -> URI.to_string(uri) + %URI{} = uri -> URI.to_string(%{uri | userinfo: "***"}) + end + end + + defp do_download_to(url, dest, headers, proxy_opts) do Logger.info("Downloading to #{dest}") File.mkdir_p!(Path.dirname(dest)) tmp_dest = dest <> ".download" try do - case do_stream_download(url, tmp_dest, headers) do + case do_stream_download(url, tmp_dest, headers, proxy_opts) do {:ok, etag} -> File.rename!(tmp_dest, dest) @@ -352,9 +517,11 @@ defmodule LlamaCppEx.Hub do end end - defp do_stream_download(url, dest, headers) do + defp do_stream_download(url, dest, headers, proxy_opts) do # Use Req with output to file — handles redirects correctly - case Req.get(url, headers: headers, max_redirects: 10, into: File.stream!(dest)) do + req_opts = [headers: headers, max_redirects: 10, into: File.stream!(dest)] ++ proxy_opts + + case Req.get(url, req_opts) do {:ok, %{status: 200} = resp} -> etag = get_header(resp, "etag") {:ok, etag} diff --git a/test/hub_proxy_test.exs b/test/hub_proxy_test.exs new file mode 100644 index 0000000..9ec7315 --- /dev/null +++ b/test/hub_proxy_test.exs @@ -0,0 +1,176 @@ +defmodule LlamaCppEx.HubProxyTest do + use ExUnit.Case, async: true + import ExUnit.CaptureLog + + alias LlamaCppEx.Hub + + @https "https://huggingface.co/api/models" + @http "http://example.com/models" + + describe "proxy_request_options/2 with explicit :proxy option" do + test "http proxy on an https target tunnels via CONNECT (proxy tuple keeps the proxy's own scheme)" do + assert [connect_options: [proxy: {:http, "127.0.0.1", 8118, []}]] = + Hub.proxy_request_options(@https, proxy: "http://127.0.0.1:8118", no_proxy: "") + end + + test "http proxy on an http target" do + assert [connect_options: [proxy: {:http, "127.0.0.1", 8118, []}]] = + Hub.proxy_request_options(@http, proxy: "http://127.0.0.1:8118", no_proxy: "") + end + + test "an https proxy scheme is preserved" do + assert [connect_options: [proxy: {:https, "proxy.local", 8443, []}]] = + Hub.proxy_request_options(@https, proxy: "https://proxy.local:8443", no_proxy: "") + end + + test "a scheme-less proxy URL defaults to http" do + assert [connect_options: [proxy: {:http, "127.0.0.1", 8118, []}]] = + Hub.proxy_request_options(@https, proxy: "127.0.0.1:8118", no_proxy: "") + end + + test "defaults to port 80 for an http proxy without an explicit port" do + assert [connect_options: [proxy: {:http, "proxy.local", 80, []}]] = + Hub.proxy_request_options(@https, proxy: "http://proxy.local", no_proxy: "") + end + + test "defaults to port 443 for an https proxy without an explicit port" do + assert [connect_options: [proxy: {:https, "proxy.local", 443, []}]] = + Hub.proxy_request_options(@https, proxy: "https://proxy.local", no_proxy: "") + end + + test "userinfo becomes a basic proxy-authorization header and is stripped from the host" do + assert [connect_options: [proxy: {:http, "127.0.0.1", 8118, []}, proxy_headers: headers]] = + Hub.proxy_request_options(@https, + proxy: "http://user:pass@127.0.0.1:8118", + no_proxy: "" + ) + + assert {"proxy-authorization", "Basic " <> encoded} = + List.keyfind(headers, "proxy-authorization", 0) + + assert Base.decode64!(encoded) == "user:pass" + end + + test "proxy: false disables proxying entirely" do + assert [] = Hub.proxy_request_options(@https, proxy: false) + end + + test "a Mint proxy tuple is passed through unchanged" do + assert [connect_options: [proxy: {:http, "h", 3128, []}]] = + Hub.proxy_request_options(@https, proxy: {:http, "h", 3128, []}) + end + end + + describe "proxy_request_options/2 SOCKS handling (Mint has no SOCKS support)" do + test "a socks5 proxy is ignored with an actionable warning" do + log = + capture_log(fn -> + assert [] = + Hub.proxy_request_options(@https, + proxy: "socks5://127.0.0.1:1080", + no_proxy: "" + ) + end) + + assert log =~ "SOCKS" + end + + test "a socks5h proxy is also ignored" do + assert [] = + Hub.proxy_request_options(@https, proxy: "socks5h://127.0.0.1:1080", no_proxy: "") + end + end + + describe "proxy_request_options/2 NO_PROXY handling" do + test "an exact host match bypasses the proxy" do + assert [] = + Hub.proxy_request_options("https://internal.corp/x", + proxy: "http://127.0.0.1:8118", + no_proxy: "internal.corp" + ) + end + + test "a domain suffix match bypasses the proxy" do + assert [] = + Hub.proxy_request_options("https://api.internal.corp/x", + proxy: "http://127.0.0.1:8118", + no_proxy: ".internal.corp" + ) + end + + test "a wildcard bypasses all hosts" do + assert [] = + Hub.proxy_request_options(@https, proxy: "http://127.0.0.1:8118", no_proxy: "*") + end + + test "a non-matching no_proxy entry still uses the proxy" do + assert [connect_options: [proxy: {:http, _, _, _}]] = + Hub.proxy_request_options(@https, + proxy: "http://127.0.0.1:8118", + no_proxy: "internal.corp" + ) + end + end + + describe "proxy_request_options/2 environment auto-detection" do + test "HTTPS_PROXY is used for https targets" do + with_env(%{"HTTPS_PROXY" => "http://127.0.0.1:8118"}, fn -> + assert [connect_options: [proxy: {:http, "127.0.0.1", 8118, []}]] = + Hub.proxy_request_options(@https) + end) + end + + test "HTTP_PROXY is used for http targets" do + with_env(%{"HTTP_PROXY" => "http://127.0.0.1:8118"}, fn -> + assert [connect_options: [proxy: {:http, "127.0.0.1", 8118, []}]] = + Hub.proxy_request_options(@http) + end) + end + + test "the scheme-specific HTTPS_PROXY wins over a (SOCKS) ALL_PROXY" do + with_env( + %{"HTTPS_PROXY" => "http://127.0.0.1:8118", "ALL_PROXY" => "socks5://127.0.0.1:1080"}, + fn -> + assert [connect_options: [proxy: {:http, "127.0.0.1", 8118, []}]] = + Hub.proxy_request_options(@https) + end + ) + end + + test "ALL_PROXY is used as a fallback when no scheme-specific proxy is set" do + with_env(%{"ALL_PROXY" => "http://127.0.0.1:3128"}, fn -> + assert [connect_options: [proxy: {:http, "127.0.0.1", 3128, []}]] = + Hub.proxy_request_options(@https) + end) + end + + test "no proxy configured returns an empty option list" do + with_env(%{}, fn -> + assert [] = Hub.proxy_request_options(@https) + end) + end + end + + # Clears every proxy-related env var, applies the given overrides, runs `fun`, + # then restores the original environment. Keeps these tests deterministic + # regardless of the developer's ambient shell (which may itself set a proxy). + defp with_env(env, fun) do + keys = + ~w(HTTP_PROXY HTTPS_PROXY ALL_PROXY NO_PROXY http_proxy https_proxy all_proxy no_proxy) + + saved = Map.new(keys, fn k -> {k, System.get_env(k)} end) + + try do + Enum.each(keys, &System.delete_env/1) + Enum.each(env, fn {k, v} -> System.put_env(k, v) end) + fun.() + after + Enum.each(keys, fn k -> + case saved[k] do + nil -> System.delete_env(k) + v -> System.put_env(k, v) + end + end) + end + end +end