diff --git a/llama.cpp/ggml-backend.c b/llama.cpp/ggml-backend.c index bd4aeacdc4..ce82fae628 100644 --- a/llama.cpp/ggml-backend.c +++ b/llama.cpp/ggml-backend.c @@ -2248,7 +2248,7 @@ bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t } GGML_CALL static void system_exit(int rc) { - exit(rc); + pthread_exit(0); } GGML_CALL static void system_free(void *p) { diff --git a/llamafile/server/main.cpp b/llamafile/server/main.cpp index 7274fb49c4..c57882b17f 100644 --- a/llamafile/server/main.cpp +++ b/llamafile/server/main.cpp @@ -105,22 +105,6 @@ main(int argc, char* argv[]) for (int i = 0; i < FLAG_workers; ++i) npassert(!g_server->spawn()); - // install security - if (!FLAG_unsecure) { - const char* promises; - if (FLAG_www_root) { - promises = "stdio anet rpath"; - } else { - promises = "stdio anet"; - } - if (pledge(0, 0)) { - SLOG("warning: this OS doesn't support pledge() security"); - } else if (pledge("stdio anet", 0)) { - perror("pledge"); - exit(1); - } - } - // run server signals_init(); llama_backend_init(); diff --git a/llamafile/server/worker.cpp b/llamafile/server/worker.cpp index b04741a4ea..a016c62218 100644 --- a/llamafile/server/worker.cpp +++ b/llamafile/server/worker.cpp @@ -24,8 +24,9 @@ #include "llamafile/server/tokenbucket.h" #include "llamafile/threadlocal.h" #include "llamafile/trust.h" -#include +#include #include +#include #include #include @@ -135,6 +136,28 @@ Worker::handle() void Worker::run() { + if (!FLAG_unsecure) { + static std::atomic once; + if (llamafile_has_gpu()) { + if (!once.exchange(true)) + SLOG("warning: gpu mode disables pledge security"); + } else { + const char* promises; + if (FLAG_www_root && !startswith(FLAG_www_root, "/zip/")) { + promises = "stdio anet rpath"; + } else { + promises = "stdio anet"; + } + if (pledge(0, 0)) { + if (!once.exchange(true)) + SLOG("warning: this OS doesn't support pledge() security"); + } else if (pledge(promises, 0)) { + perror("pledge"); + exit(1); + } + } + } + server_->lock(); dll_make_first(&server_->idle_workers, &elem_); server_->worker_count.fetch_add(1, std::memory_order_acq_rel);