From ec876b691c53aa3b6b92c01ecebef91a3026e1fd Mon Sep 17 00:00:00 2001 From: rasapala Date: Mon, 15 Jun 2026 15:47:55 +0200 Subject: [PATCH 1/5] Networ proff the pull tests --- src/test/pull_hf_model_test.cpp | 119 ++++++++++++++++++++++++++++++-- 1 file changed, 112 insertions(+), 7 deletions(-) diff --git a/src/test/pull_hf_model_test.cpp b/src/test/pull_hf_model_test.cpp index 6e0c6bd554..71fc0ee323 100644 --- a/src/test/pull_hf_model_test.cpp +++ b/src/test/pull_hf_model_test.cpp @@ -499,6 +499,23 @@ class HfPull : public TestWithTempDir { ::SetUpServerForDownloadAndStart(this->t, this->server, sourceModel, downloadPath, task, timeoutSeconds); } + int RunPullHfModelAndGetCode(const std::string& sourceModel, const std::string& modelRepositoryPath, const std::string& pullTask) { + server.setShutdownRequest(0); + char* argv[] = {(char*)"ovms", + (char*)"--pull", + (char*)"--source_model", + (char*)sourceModel.c_str(), + (char*)"--model_repository_path", + (char*)modelRepositoryPath.c_str(), + (char*)"--task", + (char*)pullTask.c_str()}; + int argc = 8; + int exitCode = server.start(argc, argv); + server.setShutdownRequest(1); + server.setShutdownRequest(0); + return exitCode; + } + void TearDown() { server.setShutdownRequest(1); if (t) @@ -512,6 +529,9 @@ class HfPull : public TestWithTempDir { class HfPullCache : public HfPull { protected: + static constexpr int CACHE_PULL_MAX_ATTEMPTS = 3; + static constexpr int CACHE_PULL_RETRY_DELAY_MS = 1000; + static std::once_flag cacheInitFlag; static std::unique_ptr cacheDir; static std::string cachedRepositoryPath; @@ -531,14 +551,99 @@ class HfPullCache : public HfPull { std::string cacheDownloadPath = ovms::FileSystem::joinPath({cacheDir->dir.string(), "repository"}); std::string pullTask = this->task; - this->ServerPullHfModel(sourceModelName, cacheDownloadPath, pullTask); - server.setShutdownRequest(1); - if (t) - t->join(); - server.setShutdownRequest(0); + auto buildModelBasePath = [&](const std::string& repositoryRoot) { + return ovms::FileSystem::joinPath({repositoryRoot, MODEL_NAMESPACE, MODEL_ID}); + }; + auto hasCompleteCache = [&](const std::string& repositoryRoot) { + const std::string modelBase = buildModelBasePath(repositoryRoot); + std::error_code ec; + const bool hasModel = std::filesystem::exists(ovms::FileSystem::appendSlash(modelBase) + "openvino_model.bin", ec); + if (ec) + return false; + const bool hasDetok = std::filesystem::exists(ovms::FileSystem::appendSlash(modelBase) + "openvino_detokenizer.bin", ec); + if (ec) + return false; + const bool hasTok = std::filesystem::exists(ovms::FileSystem::appendSlash(modelBase) + "openvino_tokenizer.bin", ec); + if (ec) + return false; + const bool hasTokModel = std::filesystem::exists(ovms::FileSystem::appendSlash(modelBase) + "tokenizer.model", ec); + if (ec) + return false; + const bool hasGraph = std::filesystem::exists(ovms::FileSystem::appendSlash(modelBase) + "graph.pbtxt", ec); + if (ec) + return false; + + // Validate expected known sizes for deterministic cache integrity checks. + const bool modelSizeOk = hasModel && (std::filesystem::file_size(ovms::FileSystem::appendSlash(modelBase) + "openvino_model.bin", ec) == OPENVINO_MODEL_BIN_FULL_SIZE_BYTES); + if (ec) + return false; + const bool detokSizeOk = hasDetok && (std::filesystem::file_size(ovms::FileSystem::appendSlash(modelBase) + "openvino_detokenizer.bin", ec) == OPENVINO_DETOKENIZER_BIN_FULL_SIZE_BYTES); + if (ec) + return false; + const bool tokSizeOk = hasTok && (std::filesystem::file_size(ovms::FileSystem::appendSlash(modelBase) + "openvino_tokenizer.bin", ec) == OPENVINO_TOKENIZER_BIN_FULL_SIZE_BYTES); + if (ec) + return false; + const bool tokModelSizeOk = hasTokModel && (std::filesystem::file_size(ovms::FileSystem::appendSlash(modelBase) + "tokenizer.model", ec) == TOKENIZER_MODEL_FULL_SIZE_BYTES); + if (ec) + return false; + + const bool hasRepoMarker = std::filesystem::exists(ovms::libgit2::getLfsWipMarkerPath(repositoryRoot), ec); + if (ec) + return false; + const bool hasModelMarker = std::filesystem::exists(ovms::libgit2::getLfsWipMarkerPath(modelBase), ec); + if (ec) + return false; + + return hasGraph && modelSizeOk && detokSizeOk && tokSizeOk && tokModelSizeOk && !hasRepoMarker && !hasModelMarker; + }; + auto looksLikeRecoverableNetworkFailure = [&](const std::string& repositoryRoot) { + const std::string modelBase = buildModelBasePath(repositoryRoot); + std::error_code ec; + const bool modelBaseExists = std::filesystem::exists(modelBase, ec); + if (ec || !modelBaseExists) + return false; + + // Interrupted network/LFS transfers usually leave partial files or marker traces. + const bool hasRepoMarker = std::filesystem::exists(ovms::libgit2::getLfsWipMarkerPath(repositoryRoot), ec); + if (ec) + return false; + const bool hasModelMarker = std::filesystem::exists(ovms::libgit2::getLfsWipMarkerPath(modelBase), ec); + if (ec) + return false; + if (hasRepoMarker || hasModelMarker) + return true; + + const std::string modelBin = ovms::FileSystem::appendSlash(modelBase) + "openvino_model.bin"; + const bool modelExists = std::filesystem::exists(modelBin, ec); + if (ec) + return false; + if (!modelExists) + return true; + + const std::uintmax_t modelSize = std::filesystem::file_size(modelBin, ec); + if (ec) + return false; + return (modelSize > 0) && (modelSize < OPENVINO_MODEL_BIN_FULL_SIZE_BYTES); + }; + + int lastExitCode = EXIT_FAILURE; + for (int attempt = 1; attempt <= CACHE_PULL_MAX_ATTEMPTS; ++attempt) { + lastExitCode = this->RunPullHfModelAndGetCode(sourceModelName, cacheDownloadPath, pullTask); + if ((lastExitCode == EXIT_SUCCESS) && hasCompleteCache(cacheDownloadPath)) { + cachedRepositoryPath = cacheDownloadPath; + ASSERT_TRUE(std::filesystem::exists(cachedRepositoryPath)); + return; + } - cachedRepositoryPath = cacheDownloadPath; - ASSERT_TRUE(std::filesystem::exists(cachedRepositoryPath)); + const bool recoverable = looksLikeRecoverableNetworkFailure(cacheDownloadPath) || !hasCompleteCache(cacheDownloadPath); + if (!recoverable || (attempt == CACHE_PULL_MAX_ATTEMPTS)) { + FAIL() << "Failed to initialize shared HF cache after " << attempt + << " attempt(s). Last exit code: " << lastExitCode + << ". Cache path: " << cacheDownloadPath; + } + SPDLOG_WARN("Shared HF cache initialization attempt {} failed with exit code {}. Retrying pull.", attempt, lastExitCode); + std::this_thread::sleep_for(std::chrono::milliseconds(CACHE_PULL_RETRY_DELAY_MS)); + } }); } From dc3ab85a75cca27acea066f4ca9ab3208ab591bf Mon Sep 17 00:00:00 2001 From: Rafal Sapala Date: Mon, 15 Jun 2026 16:27:38 +0200 Subject: [PATCH 2/5] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- src/test/pull_hf_model_test.cpp | 38 +++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/src/test/pull_hf_model_test.cpp b/src/test/pull_hf_model_test.cpp index 71fc0ee323..b0736f0a22 100644 --- a/src/test/pull_hf_model_test.cpp +++ b/src/test/pull_hf_model_test.cpp @@ -499,22 +499,28 @@ class HfPull : public TestWithTempDir { ::SetUpServerForDownloadAndStart(this->t, this->server, sourceModel, downloadPath, task, timeoutSeconds); } - int RunPullHfModelAndGetCode(const std::string& sourceModel, const std::string& modelRepositoryPath, const std::string& pullTask) { - server.setShutdownRequest(0); - char* argv[] = {(char*)"ovms", - (char*)"--pull", - (char*)"--source_model", - (char*)sourceModel.c_str(), - (char*)"--model_repository_path", - (char*)modelRepositoryPath.c_str(), - (char*)"--task", - (char*)pullTask.c_str()}; - int argc = 8; - int exitCode = server.start(argc, argv); - server.setShutdownRequest(1); - server.setShutdownRequest(0); - return exitCode; - } +int RunPullHfModelAndGetCode(const std::string& sourceModel, const std::string& modelRepositoryPath, const std::string& pullTask) { + server.setShutdownRequest(0); + std::vector args = { + "ovms", + "--pull", + "--source_model", + sourceModel, + "--model_repository_path", + modelRepositoryPath, + "--task", + pullTask, + }; + std::vector argv; + argv.reserve(args.size()); + for (auto& a : args) { + argv.push_back(a.data()); + } + const int exitCode = server.start(static_cast(argv.size()), argv.data()); + server.setShutdownRequest(1); + server.setShutdownRequest(0); + return exitCode; +} void TearDown() { server.setShutdownRequest(1); From 26862d172c9fc51b1d0b2a3111c1111699c46028 Mon Sep 17 00:00:00 2001 From: rasapala Date: Mon, 15 Jun 2026 16:30:24 +0200 Subject: [PATCH 3/5] Code review copilot --- src/test/pull_hf_model_test.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/test/pull_hf_model_test.cpp b/src/test/pull_hf_model_test.cpp index b0736f0a22..1cc0833d09 100644 --- a/src/test/pull_hf_model_test.cpp +++ b/src/test/pull_hf_model_test.cpp @@ -635,13 +635,14 @@ class HfPullCache : public HfPull { int lastExitCode = EXIT_FAILURE; for (int attempt = 1; attempt <= CACHE_PULL_MAX_ATTEMPTS; ++attempt) { lastExitCode = this->RunPullHfModelAndGetCode(sourceModelName, cacheDownloadPath, pullTask); - if ((lastExitCode == EXIT_SUCCESS) && hasCompleteCache(cacheDownloadPath)) { + const bool cacheComplete = hasCompleteCache(cacheDownloadPath); + if ((lastExitCode == EXIT_SUCCESS) && cacheComplete) { cachedRepositoryPath = cacheDownloadPath; ASSERT_TRUE(std::filesystem::exists(cachedRepositoryPath)); return; } - const bool recoverable = looksLikeRecoverableNetworkFailure(cacheDownloadPath) || !hasCompleteCache(cacheDownloadPath); + const bool recoverable = looksLikeRecoverableNetworkFailure(cacheDownloadPath) || !cacheComplete; if (!recoverable || (attempt == CACHE_PULL_MAX_ATTEMPTS)) { FAIL() << "Failed to initialize shared HF cache after " << attempt << " attempt(s). Last exit code: " << lastExitCode @@ -654,6 +655,12 @@ class HfPullCache : public HfPull { } void seedCurrentTestRepository() { + ASSERT_FALSE(cachedRepositoryPath.empty()) + << "Shared HF cache was never successfully initialized (call_once completed with a failure). " + "All HfPullCache tests in this process will be unable to seed their working directory."; + ASSERT_TRUE(std::filesystem::exists(cachedRepositoryPath)) + << "Shared HF cache path does not exist on disk: " << cachedRepositoryPath + << ". Cache initialization completed but left no usable directory."; std::error_code ec; std::filesystem::copy(cachedRepositoryPath, testRepositoryPath, From 951b489c0f91f937c4909751fd3331c305c5be58 Mon Sep 17 00:00:00 2001 From: Rafal Sapala Date: Tue, 16 Jun 2026 10:53:57 +0200 Subject: [PATCH 4/5] Style --- src/test/pull_hf_model_test.cpp | 44 ++++++++++++++++----------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/src/test/pull_hf_model_test.cpp b/src/test/pull_hf_model_test.cpp index 8b5b245987..ed89e999a0 100644 --- a/src/test/pull_hf_model_test.cpp +++ b/src/test/pull_hf_model_test.cpp @@ -499,28 +499,28 @@ class HfPull : public TestWithTempDir { ::SetUpServerForDownloadAndStart(this->t, this->server, sourceModel, downloadPath, task, timeoutSeconds); } -int RunPullHfModelAndGetCode(const std::string& sourceModel, const std::string& modelRepositoryPath, const std::string& pullTask) { - server.setShutdownRequest(0); - std::vector args = { - "ovms", - "--pull", - "--source_model", - sourceModel, - "--model_repository_path", - modelRepositoryPath, - "--task", - pullTask, - }; - std::vector argv; - argv.reserve(args.size()); - for (auto& a : args) { - argv.push_back(a.data()); - } - const int exitCode = server.start(static_cast(argv.size()), argv.data()); - server.setShutdownRequest(1); - server.setShutdownRequest(0); - return exitCode; -} + int RunPullHfModelAndGetCode(const std::string& sourceModel, const std::string& modelRepositoryPath, const std::string& pullTask) { + server.setShutdownRequest(0); + std::vector args = { + "ovms", + "--pull", + "--source_model", + sourceModel, + "--model_repository_path", + modelRepositoryPath, + "--task", + pullTask, + }; + std::vector argv; + argv.reserve(args.size()); + for (auto& a : args) { + argv.push_back(a.data()); + } + const int exitCode = server.start(static_cast(argv.size()), argv.data()); + server.setShutdownRequest(1); + server.setShutdownRequest(0); + return exitCode; + } void TearDown() { server.setShutdownRequest(1); From 03730e45ca29fde43dc46ec6f4d7bbfc4a3f0151 Mon Sep 17 00:00:00 2001 From: Rafal Sapala Date: Wed, 17 Jun 2026 15:54:59 +0200 Subject: [PATCH 5/5] Update src/test/pull_hf_model_test.cpp Co-authored-by: Adrian Tobiszewski --- src/test/pull_hf_model_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/pull_hf_model_test.cpp b/src/test/pull_hf_model_test.cpp index ed89e999a0..9fd178ae8c 100644 --- a/src/test/pull_hf_model_test.cpp +++ b/src/test/pull_hf_model_test.cpp @@ -499,7 +499,7 @@ class HfPull : public TestWithTempDir { ::SetUpServerForDownloadAndStart(this->t, this->server, sourceModel, downloadPath, task, timeoutSeconds); } - int RunPullHfModelAndGetCode(const std::string& sourceModel, const std::string& modelRepositoryPath, const std::string& pullTask) { + static int RunPullHfModelAndGetCode(const std::string& sourceModel, const std::string& modelRepositoryPath, const std::string& pullTask) { server.setShutdownRequest(0); std::vector args = { "ovms",