diff --git a/.gitignore b/.gitignore index d4f84e5d1..6aee57ace 100644 --- a/.gitignore +++ b/.gitignore @@ -71,6 +71,7 @@ shlib-exports-*.txt /test/test_khash /test/test_kstring /test/test_mod +/test/test_hfile_libcurl /test/test_nibbles /test/test-parse-reg /test/test_realn diff --git a/Makefile b/Makefile index b26051106..a6617b8a2 100644 --- a/Makefile +++ b/Makefile @@ -102,7 +102,8 @@ BUILT_TEST_PROGRAMS = \ test/test-bcf-translate \ test/test-parse-reg \ test/test_introspection \ - test/test-bcf_set_variant_type + test/test-bcf_set_variant_type \ + test/test_hfile_libcurl BUILT_THRASH_PROGRAMS = \ test/thrash_threads1 \ @@ -713,6 +714,7 @@ check test: all $(HTSCODECS_TEST_TARGETS) else \ REF_PATH=: ./test.pl $(REF_CACHE_TEST_OPTS) $${TEST_OPTS:-} ; \ fi + test/test_hfile_libcurl test/hts_endian: test/hts_endian.o $(CC) $(LDFLAGS) -o $@ test/hts_endian.o $(LIBS) @@ -805,6 +807,9 @@ test/test_introspection: test/test_introspection.o libhts.a test/test-bcf_set_variant_type: test/test-bcf_set_variant_type.o libhts.a $(CC) $(LDFLAGS) -o $@ test/test-bcf_set_variant_type.o libhts.a $(LIBS) -lpthread +test/test_hfile_libcurl: test/test_hfile_libcurl.o libhts.a + $(CC) $(LDFLAGS) -o $@ test/test_hfile_libcurl.o libhts.a $(LIBS) -lpthread + # Extra tests for bundled htscodecs test_htscodecs_rans4x8: htscodecs/tests/rans4x8 cd htscodecs/tests && srcdir=. && export srcdir && ./rans4x8.test @@ -884,6 +889,7 @@ test/test-bcf-sr.o: test/test-bcf-sr.c config.h $(htslib_hts_defs_h) $(htslib_sy test/test-bcf-translate.o: test/test-bcf-translate.c config.h $(htslib_vcf_h) test/test_introspection.o: test/test_introspection.c config.h $(htslib_hts_h) $(htslib_hfile_h) test/test-bcf_set_variant_type.o: test/test-bcf_set_variant_type.c config.h $(htslib_hts_h) vcf.c +test/test_hfile_libcurl.o: test/test_hfile_libcurl.c config.h $(htslib_hfile_h) $(htslib_hts_h) # Standalone target not added to $(BUILT_TEST_PROGRAMS) as some may not # have a compiler that compiles as C++ when given a .cpp source file. @@ -1015,6 +1021,7 @@ htslib-uninstalled.pc: htslib.pc.tmp testclean: + -rm -f test/hfile_libcurl.tmp -rm -f test/*.tmp test/*.tmp.* test/faidx/*.tmp* \ test/longrefs/*.tmp.* test/ref_cache/*.tmp.* test/tabix/*.tmp.* \ test/bgzf_boundaries/*.tmp.* test/*/FAIL* \ diff --git a/hfile_libcurl.c b/hfile_libcurl.c index a18fdb571..0ca15d0e9 100644 --- a/hfile_libcurl.c +++ b/hfile_libcurl.c @@ -35,6 +35,7 @@ DEALINGS IN THE SOFTWARE. */ # include #endif #include +#include #include "hfile_internal.h" #ifdef ENABLE_PLUGINS @@ -111,11 +112,13 @@ typedef struct { unsigned can_seek : 1; // Can (attempt to) seek on this handle unsigned is_recursive:1; // Opened by hfile_libcurl itself unsigned tried_seek : 1; // At least one seek has been attempted + unsigned needs_reconnect : 1; // Deferred reconnect after retryable error int nrunning; http_headers headers; off_t delayed_seek; // Location to seek to before reading off_t last_offset; // Location we're seeking from + off_t stream_pos; // Current position in remote file for retry char *preserved; // Preserved buffer content on seek size_t preserved_bytes; // Number of preserved bytes size_t preserved_size; // Size of preserved buffer @@ -227,6 +230,43 @@ static int easy_errno(CURL *easy, CURLcode err) } } +static int is_retryable(CURL *easy, CURLcode err) +{ + switch (err) { + case CURLE_COULDNT_CONNECT: + case CURLE_SEND_ERROR: + case CURLE_RECV_ERROR: + case CURLE_PARTIAL_FILE: + case CURLE_OPERATION_TIMEDOUT: + case CURLE_GOT_NOTHING: + case CURLE_SSL_CONNECT_ERROR: +#ifdef CURLE_HTTP2 + case CURLE_HTTP2: +#endif +#ifdef CURLE_HTTP2_STREAM + case CURLE_HTTP2_STREAM: +#endif + return 1; + + case CURLE_HTTP_RETURNED_ERROR: { + long response = 0; + if (curl_easy_getinfo(easy, CURLINFO_RESPONSE_CODE, &response) + == CURLE_OK) { + switch (response) { + case 429: case 500: case 502: case 503: case 504: + return 1; + default: + break; + } + } + return 0; + } + + default: + return 0; + } +} + static int multi_errno(CURLMcode errm) { switch (errm) { @@ -255,10 +295,16 @@ static struct { char *auth_path; khash_t(auth_map) *auth_map; int allow_unencrypted_auth_header; + int retry_max; // Max retry attempts (HTS_RETRY_MAX, default 3) + long retry_delay_ms; // Initial retry delay in ms (HTS_RETRY_DELAY, default 500) + long retry_max_delay_ms; // Max retry delay in ms (HTS_RETRY_MAX_DELAY, default 60000) + long low_speed_limit; // Bytes/sec threshold (HTS_LOW_SPEED_LIMIT, default 1) + long low_speed_time; // Seconds below threshold (HTS_LOW_SPEED_TIME, default 60) pthread_mutex_t auth_lock; pthread_mutex_t share_lock; -} curl = { { 0, 0, NULL }, NULL, NULL, NULL, 0, PTHREAD_MUTEX_INITIALIZER, - PTHREAD_MUTEX_INITIALIZER }; +} curl = { { 0, 0, NULL }, NULL, NULL, NULL, 0, + 3, 500, 60000, 1, 60, + PTHREAD_MUTEX_INITIALIZER, PTHREAD_MUTEX_INITIALIZER }; static void share_lock(CURL *handle, curl_lock_data data, curl_lock_access access, void *userptr) { @@ -772,6 +818,61 @@ static size_t header_callback(void *contents, size_t size, size_t nmemb, } +static void refresh_retry_config(void) +{ + const char *val; + if ((val = getenv("HTS_RETRY_MAX")) != NULL) + curl.retry_max = atoi(val); + if ((val = getenv("HTS_RETRY_DELAY")) != NULL) + curl.retry_delay_ms = atol(val); + if ((val = getenv("HTS_RETRY_MAX_DELAY")) != NULL) + curl.retry_max_delay_ms = atol(val); + if ((val = getenv("HTS_LOW_SPEED_LIMIT")) != NULL) + curl.low_speed_limit = atol(val); + if ((val = getenv("HTS_LOW_SPEED_TIME")) != NULL) + curl.low_speed_time = atol(val); +} + +static void retry_sleep(long delay_ms) +{ +#ifdef _WIN32 + Sleep(delay_ms); +#else + struct timespec ts; + ts.tv_sec = delay_ms / 1000; + ts.tv_nsec = (delay_ms % 1000) * 1000000L; + nanosleep(&ts, NULL); +#endif +} + +static int retry_reconnect(hFILE_libcurl *fp, off_t pos) +{ + int attempt; + long delay = curl.retry_delay_ms; + int save_can_seek; + + for (attempt = 0; attempt < curl.retry_max; attempt++) { + hts_log_warning("Retrying connection (attempt %d/%d) at offset %lld", + attempt + 1, curl.retry_max, (long long) pos); + retry_sleep(delay); + + save_can_seek = fp->can_seek; + if (restart_from_position(fp, pos) == 0) { + fp->needs_reconnect = 0; + return 0; + } + // restart_from_position sets can_seek=0 on failure; restore it + fp->can_seek = save_can_seek; + + // Exponential backoff + delay *= 2; + if (delay > curl.retry_max_delay_ms) + delay = curl.retry_max_delay_ms; + } + + return -1; +} + static ssize_t libcurl_read(hFILE *fpv, void *bufferv, size_t nbytes) { hFILE_libcurl *fp = (hFILE_libcurl *) fpv; @@ -779,7 +880,17 @@ static ssize_t libcurl_read(hFILE *fpv, void *bufferv, size_t nbytes) off_t to_skip = -1; ssize_t got = 0; CURLcode err; + int retry_attempts = 0; + + // Handle deferred reconnection from a previous retryable error + if (fp->needs_reconnect) { + if (retry_reconnect(fp, fp->stream_pos) < 0) { + errno = EIO; + return -1; + } + } +retry: if (fp->delayed_seek >= 0) { assert(fp->base.offset == fp->delayed_seek); @@ -799,6 +910,7 @@ static ssize_t libcurl_read(hFILE *fpv, void *bufferv, size_t nbytes) } else { fp->last_offset = fp->delayed_seek = -1; } + fp->stream_pos += bytes; return bytes; } @@ -852,10 +964,42 @@ static ssize_t libcurl_read(hFILE *fpv, void *bufferv, size_t nbytes) fp->buffer.len = 0; if (fp->finished && fp->final_result != CURLE_OK) { + if (is_retryable(fp->easy, fp->final_result) + && curl.retry_max > 0) { + if (got > 0) { + // Return partial data; defer reconnection to next call + fp->needs_reconnect = 1; + fp->stream_pos += got; + return got; + } + // No data; retry inline + if (retry_attempts < curl.retry_max) { + long delay = curl.retry_delay_ms; + int save_can_seek = fp->can_seek; + int i; + for (i = 0; i < retry_attempts; i++) { + delay *= 2; + if (delay > curl.retry_max_delay_ms) { + delay = curl.retry_max_delay_ms; + break; + } + } + hts_log_warning("Retrying read (attempt %d/%d) at offset %lld", + retry_attempts + 1, curl.retry_max, + (long long) fp->stream_pos); + retry_sleep(delay); + if (restart_from_position(fp, fp->stream_pos) == 0) { + retry_attempts++; + goto retry; + } + fp->can_seek = save_can_seek; + } + } errno = easy_errno(fp->easy, fp->final_result); return -1; } + fp->stream_pos += got; return got; } @@ -971,6 +1115,7 @@ static off_t libcurl_seek(hFILE *fpv, off_t offset, int whence) preserve_buffer_content(fp); } fp->delayed_seek = pos; + fp->stream_pos = pos; return pos; } @@ -982,6 +1127,7 @@ static off_t libcurl_seek(hFILE *fpv, off_t offset, int whence) } fp->tried_seek = 1; + fp->stream_pos = pos; return pos; } @@ -1177,6 +1323,8 @@ libcurl_open(const char *url, const char *modes, http_headers *headers) kstring_t in_header = {0, 0, NULL}; long response; + refresh_retry_config(); + is_recursive = strchr(modes, 'R') != NULL; if ((s = strpbrk(modes, "rwa+")) != NULL) { @@ -1204,7 +1352,9 @@ libcurl_open(const char *url, const char *modes, http_headers *headers) fp->paused = fp->closing = fp->finished = fp->perform_again = 0; fp->can_seek = 1; fp->tried_seek = 0; + fp->needs_reconnect = 0; fp->delayed_seek = fp->last_offset = -1; + fp->stream_pos = 0; fp->preserved = NULL; fp->preserved_bytes = fp->preserved_size = 0; fp->is_recursive = is_recursive; @@ -1249,6 +1399,12 @@ libcurl_open(const char *url, const char *modes, http_headers *headers) } } err |= curl_easy_setopt(fp->easy, CURLOPT_USERAGENT, curl.useragent.s); + if (curl.low_speed_limit > 0 && curl.low_speed_time > 0) { + err |= curl_easy_setopt(fp->easy, CURLOPT_LOW_SPEED_LIMIT, + curl.low_speed_limit); + err |= curl_easy_setopt(fp->easy, CURLOPT_LOW_SPEED_TIME, + curl.low_speed_time); + } if (fp->headers.callback) { if (add_callback_headers(fp) != 0) goto error; } @@ -1285,10 +1441,32 @@ libcurl_open(const char *url, const char *modes, http_headers *headers) } if (fp->finished && fp->final_result != CURLE_OK) { + if (is_retryable(fp->easy, fp->final_result) + && curl.retry_max > 0) { + long delay = curl.retry_delay_ms; + int attempt, save_can_seek; + for (attempt = 0; attempt < curl.retry_max; attempt++) { + hts_log_warning("Retrying open (attempt %d/%d)", + attempt + 1, curl.retry_max); + retry_sleep(delay); + save_can_seek = fp->can_seek; + if (restart_from_position(fp, 0) == 0) { + if (!fp->finished || fp->final_result == CURLE_OK) + goto open_ok; + if (!is_retryable(fp->easy, fp->final_result)) + break; + } + fp->can_seek = save_can_seek; + delay *= 2; + if (delay > curl.retry_max_delay_ms) + delay = curl.retry_max_delay_ms; + } + } errno = easy_errno(fp->easy, fp->final_result); goto error_remove; } +open_ok: if (fp->headers.redirect) { if (response >= 300 && response < 400) { // redirection kstring_t new_url = {0, 0, NULL}; @@ -1552,6 +1730,20 @@ int PLUGIN_GLOBAL(hfile_plugin_init,_libcurl)(struct hFILE_plugin *self) curl.allow_unencrypted_auth_header = 1; } + { + const char *val; + if ((val = getenv("HTS_RETRY_MAX")) != NULL) + curl.retry_max = atoi(val); + if ((val = getenv("HTS_RETRY_DELAY")) != NULL) + curl.retry_delay_ms = atol(val); + if ((val = getenv("HTS_RETRY_MAX_DELAY")) != NULL) + curl.retry_max_delay_ms = atol(val); + if ((val = getenv("HTS_LOW_SPEED_LIMIT")) != NULL) + curl.low_speed_limit = atol(val); + if ((val = getenv("HTS_LOW_SPEED_TIME")) != NULL) + curl.low_speed_time = atol(val); + } + info = curl_version_info(CURLVERSION_NOW); ksprintf(&curl.useragent, "htslib/%s libcurl/%s", version, info->version); diff --git a/synced_bcf_reader.c b/synced_bcf_reader.c index 8f0bba5ee..7769132fa 100644 --- a/synced_bcf_reader.c +++ b/synced_bcf_reader.c @@ -465,7 +465,8 @@ static void bcf_sr_destroy1(bcf_sr_t *reader, int closefile) if ( reader->bcf_idx ) hts_idx_destroy(reader->bcf_idx); bcf_hdr_destroy(reader->header); if (closefile) { - hts_close(reader->file); + if (hts_close(reader->file) < 0) + hts_log_error("Error on closing %s", reader->fname); } if ( reader->itr ) tbx_itr_destroy(reader->itr); int j; diff --git a/test/mock_http_server.py b/test/mock_http_server.py new file mode 100755 index 000000000..1f926c4f9 --- /dev/null +++ b/test/mock_http_server.py @@ -0,0 +1,207 @@ +#!/usr/bin/env python3 +# Copyright (C) 2026 Broad Institute. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +"""Mock HTTP server for testing hfile_libcurl retry logic. + +Usage: + python test/mock_http_server.py --mode --file [--fail-count N] [--port 0] + +Prints the allocated port number to stdout on startup, then serves requests. + +Modes: + normal - Serve file normally with Content-Length and Range support + 503_then_ok - Return 503 for first N requests, then serve normally + 429_then_ok - Return 429 for first N requests, then serve normally + drop_mid_transfer - Send first half of file then close connection, N times + 404 - Always return 404 + stall - Send headers + a few bytes then sleep forever +""" + +import argparse +import os +import signal +import sys +import threading +import time +from http.server import HTTPServer, BaseHTTPRequestHandler + + +class MockHandler(BaseHTTPRequestHandler): + protocol_version = "HTTP/1.1" # Use HTTP/1.1 for proper keep-alive + file_data = b"" + mode = "normal" + fail_count = 2 + request_count = 0 + lock = threading.Lock() + + def log_message(self, format, *args): + # Suppress request logging to avoid polluting test output + pass + + def do_GET(self): + with MockHandler.lock: + MockHandler.request_count += 1 + req_num = MockHandler.request_count + + mode = MockHandler.mode + data = MockHandler.file_data + fail_count = MockHandler.fail_count + + # Parse Range header + range_start = 0 + range_header = self.headers.get("Range") + if range_header and range_header.startswith("bytes="): + range_spec = range_header[6:] + if range_spec.endswith("-"): + range_start = int(range_spec[:-1]) + elif "-" in range_spec: + parts = range_spec.split("-") + range_start = int(parts[0]) + + if mode == "404": + self.send_response(404) + self.send_header("Content-Length", "0") + self.end_headers() + return + + if mode == "503_then_ok": + if req_num <= fail_count: + self.send_response(503) + self.send_header("Content-Length", "0") + self.end_headers() + return + + if mode == "429_then_ok": + if req_num <= fail_count: + self.send_response(429) + self.send_header("Content-Length", "0") + self.end_headers() + return + + if mode == "drop_mid_transfer": + if req_num <= fail_count: + # Send headers indicating full length, but only send half + remaining = data[range_start:] + half = len(remaining) // 2 + if half < 1: + half = 1 + if range_start > 0: + self.send_response(206) + self.send_header("Content-Range", + "bytes %d-%d/%d" % (range_start, + len(data) - 1, + len(data))) + self.send_header("Content-Length", str(len(remaining))) + else: + self.send_response(200) + self.send_header("Content-Length", str(len(data))) + self.send_header("Accept-Ranges", "bytes") + self.send_header("Connection", "close") + self.end_headers() + try: + self.wfile.write(remaining[:half]) + self.wfile.flush() + except BrokenPipeError: + pass + # Force-close the socket to simulate a drop + import socket + try: + self.connection.setsockopt( + socket.SOL_SOCKET, socket.SO_LINGER, + bytes([1, 0, 0, 0, 0, 0, 0, 0])) + self.connection.shutdown(socket.SHUT_RDWR) + except Exception: + pass + try: + self.connection.close() + except Exception: + pass + return + + if mode == "stall": + # Send headers and a few bytes, then sleep forever + self.send_response(200) + self.send_header("Content-Length", str(len(data))) + self.send_header("Accept-Ranges", "bytes") + self.end_headers() + try: + self.wfile.write(data[:min(10, len(data))]) + self.wfile.flush() + # Sleep until killed + while True: + time.sleep(3600) + except (BrokenPipeError, ConnectionResetError): + pass + return + + # Normal serving (with Range support) + remaining = data[range_start:] + if range_start > 0: + self.send_response(206) + self.send_header("Content-Range", + "bytes %d-%d/%d" % (range_start, + len(data) - 1, + len(data))) + self.send_header("Content-Length", str(len(remaining))) + else: + self.send_response(200) + self.send_header("Content-Length", str(len(data))) + self.send_header("Accept-Ranges", "bytes") + self.end_headers() + self.wfile.write(remaining) + + +def main(): + parser = argparse.ArgumentParser(description="Mock HTTP server for testing") + parser.add_argument("--mode", required=True, + choices=["normal", "503_then_ok", "429_then_ok", + "drop_mid_transfer", "404", "stall"]) + parser.add_argument("--file", required=True, help="File to serve") + parser.add_argument("--fail-count", type=int, default=2, + help="Number of requests to fail before succeeding") + parser.add_argument("--port", type=int, default=0, + help="Port to listen on (0 for auto)") + args = parser.parse_args() + + with open(args.file, "rb") as f: + MockHandler.file_data = f.read() + + MockHandler.mode = args.mode + MockHandler.fail_count = args.fail_count + + server = HTTPServer(("127.0.0.1", args.port), MockHandler) + port = server.server_address[1] + + # Print port to stdout so the test program can read it + print(port, flush=True) + + # Handle SIGTERM gracefully + def handle_sigterm(signum, frame): + server.shutdown() + sys.exit(0) + + signal.signal(signal.SIGTERM, handle_sigterm) + + server.serve_forever() + + +if __name__ == "__main__": + main() diff --git a/test/test_hfile_libcurl.c b/test/test_hfile_libcurl.c new file mode 100644 index 000000000..b80134c51 --- /dev/null +++ b/test/test_hfile_libcurl.c @@ -0,0 +1,513 @@ +/* test/test_hfile_libcurl.c -- Test cases for libcurl retry/resilience. + + Copyright (C) 2026 Broad Institute. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#include + +#include +#include + +// This test requires fork/pipe/kill which are POSIX-only. +#ifdef _WIN32 +int main(void) { + fprintf(stderr, "libcurl retry tests not supported on Windows, skipping\n"); + return 0; +} +#else + +#include +#include +#include +#include +#include + +#include "../htslib/hfile.h" +#include "../htslib/hts.h" +#include "../hts_internal.h" + +#ifndef EPROTONOSUPPORT +#define EPROTONOSUPPORT ENOSYS +#endif + +// Test data file to serve +#define TEST_DATA_FILE "hfile_libcurl.tmp" +#define TEST_DATA_SIZE 16384 + +static int failures = 0; + +#define PASS(name) fprintf(stderr, " PASS: %s\n", (name)) +#define FAIL(name, ...) do { \ + fprintf(stderr, " FAIL: %s: ", (name)); \ + fprintf(stderr, __VA_ARGS__); \ + fprintf(stderr, "\n"); \ + failures++; \ +} while (0) + +// Start mock server, return its PID and port +static pid_t start_server(const char *mode, int fail_count, int *port_out) +{ + int pipefd[2]; + pid_t pid; + char fail_count_str[32]; + char port_buf[32]; + ssize_t n; + int i; + + if (pipe(pipefd) < 0) { + perror("pipe"); + return -1; + } + + snprintf(fail_count_str, sizeof(fail_count_str), "%d", fail_count); + + pid = fork(); + if (pid < 0) { + perror("fork"); + close(pipefd[0]); + close(pipefd[1]); + return -1; + } + + if (pid == 0) { + // Child: redirect stdout to pipe, exec python server + close(pipefd[0]); + dup2(pipefd[1], STDOUT_FILENO); + close(pipefd[1]); + execlp("python3", "python3", "test/mock_http_server.py", + "--mode", mode, + "--file", "test/" TEST_DATA_FILE, + "--fail-count", fail_count_str, + "--port", "0", + NULL); + perror("execlp python3"); + _exit(127); + } + + // Parent: read port from pipe + close(pipefd[1]); + + // Read with timeout (give server up to 5 seconds to start) + n = 0; + for (i = 0; i < 50 && n == 0; i++) { + n = read(pipefd[0], port_buf, sizeof(port_buf) - 1); + if (n <= 0) { + hts_usleep(100000); // 100ms + n = 0; + } + } + close(pipefd[0]); + + if (n <= 0) { + fprintf(stderr, "Failed to read port from mock server\n"); + kill(pid, SIGTERM); + waitpid(pid, NULL, 0); + return -1; + } + + port_buf[n] = '\0'; + *port_out = atoi(port_buf); + if (*port_out <= 0) { + fprintf(stderr, "Invalid port from mock server: %s\n", port_buf); + kill(pid, SIGTERM); + waitpid(pid, NULL, 0); + return -1; + } + + // Give the server a moment to be ready for connections + hts_usleep(100000); + return pid; +} + +static void stop_server(pid_t pid) +{ + int status; + pid_t ret; + + if (pid <= 0) + return; + + kill(pid, SIGKILL); + ret = waitpid(pid, &status, 0); + if (ret < 0) + perror("waitpid"); +} + +// Generate deterministic test data +static void generate_test_data(void) +{ + FILE *f; + int i; + char path[256]; + + snprintf(path, sizeof(path), "test/%s", TEST_DATA_FILE); + f = fopen(path, "wb"); + if (!f) { + perror("fopen test data"); + exit(EXIT_FAILURE); + } + for (i = 0; i < TEST_DATA_SIZE; i++) { + fputc((i * 7 + 13) & 0xFF, f); + } + fclose(f); +} + +// Read expected data from test file +static unsigned char *read_expected_data(size_t *size_out) +{ + FILE *f; + unsigned char *data; + char path[256]; + + snprintf(path, sizeof(path), "test/%s", TEST_DATA_FILE); + f = fopen(path, "rb"); + if (!f) { + perror("fopen expected data"); + return NULL; + } + data = malloc(TEST_DATA_SIZE); + if (!data) { + fclose(f); + return NULL; + } + *size_out = fread(data, 1, TEST_DATA_SIZE, f); + fclose(f); + return data; +} + +// Read entire file via hFILE +static unsigned char *hfile_read_all(const char *url, size_t *size_out) +{ + hFILE *fp; + unsigned char *buf; + size_t total = 0; + ssize_t n; + + fp = hopen(url, "r"); + if (!fp) + return NULL; + + buf = malloc(TEST_DATA_SIZE + 1024); + if (!buf) { + hclose_abruptly(fp); + return NULL; + } + + while ((n = hread(fp, buf + total, 4096)) > 0) { + total += n; + if (total > TEST_DATA_SIZE + 512) + break; + } + + if (n < 0) { + free(buf); + hclose_abruptly(fp); + *size_out = 0; + return NULL; + } + + if (hclose(fp) != 0) + perror("hclose"); + *size_out = total; + return buf; +} + +// Test 1: Normal transfer +static void test_normal_transfer(void) +{ + const char *name = "Normal transfer"; + pid_t pid; + int port; + char url[256]; + unsigned char *got, *expected; + size_t got_size, exp_size; + + pid = start_server("normal", 0, &port); + if (pid < 0) { FAIL(name, "could not start server"); return; } + + snprintf(url, sizeof(url), "http://127.0.0.1:%d/data", port); + got = hfile_read_all(url, &got_size); + expected = read_expected_data(&exp_size); + + if (!got) { + FAIL(name, "hfile_read_all returned NULL, errno=%d", errno); + } else if (got_size != exp_size) { + FAIL(name, "size mismatch: got %zu, expected %zu", got_size, exp_size); + } else if (memcmp(got, expected, exp_size) != 0) { + FAIL(name, "data mismatch"); + } else { + PASS(name); + } + + free(got); + free(expected); + stop_server(pid); +} + +// Test 2: 503 retry succeeds +static void test_503_retry(void) +{ + const char *name = "503 retry succeeds"; + pid_t pid; + int port; + char url[256]; + unsigned char *got, *expected; + size_t got_size, exp_size; + + pid = start_server("503_then_ok", 2, &port); + if (pid < 0) { FAIL(name, "could not start server"); return; } + + snprintf(url, sizeof(url), "http://127.0.0.1:%d/data", port); + setenv("HTS_RETRY_MAX", "3", 1); + setenv("HTS_RETRY_DELAY", "50", 1); + + got = hfile_read_all(url, &got_size); + expected = read_expected_data(&exp_size); + + if (!got) { + FAIL(name, "hfile_read_all returned NULL, errno=%d", errno); + } else if (got_size != exp_size) { + FAIL(name, "size mismatch: got %zu, expected %zu", got_size, exp_size); + } else if (memcmp(got, expected, exp_size) != 0) { + FAIL(name, "data mismatch"); + } else { + PASS(name); + } + + free(got); + free(expected); + unsetenv("HTS_RETRY_MAX"); + unsetenv("HTS_RETRY_DELAY"); + stop_server(pid); +} + +// Test 3: 429 retry succeeds +static void test_429_retry(void) +{ + const char *name = "429 retry succeeds"; + pid_t pid; + int port; + char url[256]; + unsigned char *got, *expected; + size_t got_size, exp_size; + + pid = start_server("429_then_ok", 2, &port); + if (pid < 0) { FAIL(name, "could not start server"); return; } + + snprintf(url, sizeof(url), "http://127.0.0.1:%d/data", port); + setenv("HTS_RETRY_MAX", "3", 1); + setenv("HTS_RETRY_DELAY", "50", 1); + + got = hfile_read_all(url, &got_size); + expected = read_expected_data(&exp_size); + + if (!got) { + FAIL(name, "hfile_read_all returned NULL, errno=%d", errno); + } else if (got_size != exp_size) { + FAIL(name, "size mismatch: got %zu, expected %zu", got_size, exp_size); + } else if (memcmp(got, expected, exp_size) != 0) { + FAIL(name, "data mismatch"); + } else { + PASS(name); + } + + free(got); + free(expected); + unsetenv("HTS_RETRY_MAX"); + unsetenv("HTS_RETRY_DELAY"); + stop_server(pid); +} + +// Test 4: Connection drop retry +static void test_drop_mid_transfer(void) +{ + const char *name = "Connection drop retry"; + pid_t pid; + int port; + char url[256]; + unsigned char *got, *expected; + size_t got_size, exp_size; + + pid = start_server("drop_mid_transfer", 2, &port); + if (pid < 0) { FAIL(name, "could not start server"); return; } + + snprintf(url, sizeof(url), "http://127.0.0.1:%d/data", port); + setenv("HTS_RETRY_MAX", "5", 1); + setenv("HTS_RETRY_DELAY", "50", 1); + + got = hfile_read_all(url, &got_size); + expected = read_expected_data(&exp_size); + + if (!got) { + FAIL(name, "hfile_read_all returned NULL, errno=%d", errno); + } else if (got_size != exp_size) { + FAIL(name, "size mismatch: got %zu, expected %zu", got_size, exp_size); + } else if (memcmp(got, expected, exp_size) != 0) { + FAIL(name, "data mismatch"); + } else { + PASS(name); + } + + free(got); + free(expected); + unsetenv("HTS_RETRY_MAX"); + unsetenv("HTS_RETRY_DELAY"); + stop_server(pid); +} + +// Test 5: 404 not retried +static void test_404_no_retry(void) +{ + const char *name = "404 not retried"; + pid_t pid; + int port; + char url[256]; + hFILE *fp; + + pid = start_server("404", 0, &port); + if (pid < 0) { FAIL(name, "could not start server"); return; } + + snprintf(url, sizeof(url), "http://127.0.0.1:%d/data", port); + setenv("HTS_RETRY_MAX", "3", 1); + setenv("HTS_RETRY_DELAY", "50", 1); + + fp = hopen(url, "r"); + if (fp != NULL) { + FAIL(name, "hopen should have failed for 404"); + hclose_abruptly(fp); + } else if (errno != ENOENT) { + FAIL(name, "expected ENOENT, got errno=%d (%s)", errno, strerror(errno)); + } else { + PASS(name); + } + + unsetenv("HTS_RETRY_MAX"); + unsetenv("HTS_RETRY_DELAY"); + stop_server(pid); +} + +// Test 6: Retry exhaustion +static void test_retry_exhaustion(void) +{ + const char *name = "Retry exhaustion"; + pid_t pid; + int port; + char url[256]; + hFILE *fp; + + pid = start_server("503_then_ok", 999, &port); + if (pid < 0) { FAIL(name, "could not start server"); return; } + + snprintf(url, sizeof(url), "http://127.0.0.1:%d/data", port); + setenv("HTS_RETRY_MAX", "2", 1); + setenv("HTS_RETRY_DELAY", "50", 1); + + fp = hopen(url, "r"); + if (fp != NULL) { + FAIL(name, "hopen should have failed after retry exhaustion"); + hclose_abruptly(fp); + } else { + PASS(name); + } + + unsetenv("HTS_RETRY_MAX"); + unsetenv("HTS_RETRY_DELAY"); + stop_server(pid); +} + +// Test 7: Retry disabled +static void test_retry_disabled(void) +{ + const char *name = "Retry disabled"; + pid_t pid; + int port; + char url[256]; + hFILE *fp; + + pid = start_server("503_then_ok", 1, &port); + if (pid < 0) { FAIL(name, "could not start server"); return; } + + snprintf(url, sizeof(url), "http://127.0.0.1:%d/data", port); + setenv("HTS_RETRY_MAX", "0", 1); + setenv("HTS_RETRY_DELAY", "50", 1); + + fp = hopen(url, "r"); + if (fp != NULL) { + FAIL(name, "hopen should have failed with retries disabled"); + hclose_abruptly(fp); + } else { + PASS(name); + } + + unsetenv("HTS_RETRY_MAX"); + unsetenv("HTS_RETRY_DELAY"); + stop_server(pid); +} + +int main(void) +{ + // Check python3 is available + if (system("python3 --version >/dev/null 2>&1") != 0) { + fprintf(stderr, "python3 not found, skipping libcurl retry tests\n"); + return 0; // Skip rather than fail + } + + // Check that HTTP URLs are supported (i.e. libcurl backend is loaded). + // Try opening a URL that will fail to connect — if errno is + // EPROTONOSUPPORT the http:// scheme isn't registered. + { + hFILE *probe; + setenv("HTS_RETRY_MAX", "0", 1); + probe = hopen("http://0.0.0.0:1/probe", "r"); + unsetenv("HTS_RETRY_MAX"); + if (probe) { + hclose_abruptly(probe); + } else if (errno == EPROTONOSUPPORT) { + fprintf(stderr, "HTTP not supported, skipping libcurl retry tests\n"); + return 0; + } + // ECONNREFUSED or similar means libcurl is working, continue + } + + generate_test_data(); + + fprintf(stderr, "test_hfile_libcurl:\n"); + + test_normal_transfer(); + test_503_retry(); + test_429_retry(); + test_drop_mid_transfer(); + test_404_no_retry(); + test_retry_exhaustion(); + test_retry_disabled(); + + // Clean up test data + unlink("test/" TEST_DATA_FILE); + + if (failures > 0) { + fprintf(stderr, "%d test(s) FAILED\n", failures); + return EXIT_FAILURE; + } + + fprintf(stderr, "All tests passed.\n"); + return EXIT_SUCCESS; +} + +#endif /* !_WIN32 */