diff --git a/Makefile b/Makefile index 6b3d6de..5562814 100644 --- a/Makefile +++ b/Makefile @@ -145,6 +145,7 @@ endif ifeq ($(BUILD_TYPE),metal) EXTRA_LIBS= + CGO_LDFLAGS+="-framework Accelerate -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders" CMAKE_ARGS+=-DLLAMA_METAL=ON EXTRA_TARGETS+=llama.cpp/ggml-metal.o endif @@ -163,6 +164,7 @@ $(info I UNAME_P: $(UNAME_P)) $(info I UNAME_M: $(UNAME_M)) $(info I CFLAGS: $(CFLAGS)) $(info I CXXFLAGS: $(CXXFLAGS)) +$(info I CGO_LDFLAGS: $(CGO_LDFLAGS)) $(info I LDFLAGS: $(LDFLAGS)) $(info I BUILD_TYPE: $(BUILD_TYPE)) $(info I CMAKE_ARGS: $(CMAKE_ARGS)) @@ -191,10 +193,10 @@ llama.cpp/k_quants.o: llama.cpp/ggml.o cd build && cp -rf CMakeFiles/ggml.dir/k_quants.c.o ../llama.cpp/k_quants.o llama.cpp/llama.o: - $(MAKE) -C llama.cpp llama.o + cd build && make llama.o && cp -rf CMakeFiles/llama.dir/llama.cpp.o ../llama.cpp/llama.o llama.cpp/common.o: - $(MAKE) -C llama.cpp common.o + cd build && make common && cp -rf examples/CMakeFiles/common.dir/common.cpp.o ../llama.cpp/common.o binding.o: llama.cpp/ggml.o llama.cpp/llama.o llama.cpp/common.o $(CXX) $(CXXFLAGS) -I./llama.cpp -I./llama.cpp/examples binding.cpp -o binding.o -c $(LDFLAGS) @@ -208,4 +210,4 @@ clean: rm -rf build test: libbinding.a - @C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go test -v ./... + @C_INCLUDE_PATH=${INCLUDE_PATH} CGO_LDFLAGS=${CGO_LDFLAGS} LIBRARY_PATH=${LIBRARY_PATH} go test -v ./... diff --git a/README.md b/README.md index cdc0903..b63dd4c 100644 --- a/README.md +++ b/README.md @@ -73,7 +73,9 @@ ggml_opencl: device FP16 support: true ``` BUILD_TYPE=metal make libbinding.a -CGO_LDFLAGS="-framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders" LIBRARY_PATH=$PWD C_INCLUDE_PATH=$PWD go run ./examples -m "/model/path/here" -t 14 +CGO_LDFLAGS="-framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders" LIBRARY_PATH=$PWD C_INCLUDE_PATH=$PWD go build ./examples/main.go +cp build/bin/ggml-metal.metal . +./main -m "/model/path/here" -t 1 -ngl 1 ``` Enjoy! diff --git a/examples/main.go b/examples/main.go index 3649f82..1b3fedc 100644 --- a/examples/main.go +++ b/examples/main.go @@ -13,8 +13,9 @@ import ( ) var ( - threads = 4 - tokens = 128 + threads = 4 + tokens = 128 + gpulayers = 0 ) func main() { @@ -22,6 +23,7 @@ func main() { flags := flag.NewFlagSet(os.Args[0], flag.ExitOnError) flags.StringVar(&model, "m", "./models/7B/ggml-model-q4_0.bin", "path to q4_0.bin model file to load") + flags.IntVar(&gpulayers, "ngl", 0, "Number of GPU layers to use") flags.IntVar(&threads, "t", runtime.NumCPU(), "number of threads to use during computation") flags.IntVar(&tokens, "n", 512, "number of tokens to predict") @@ -30,7 +32,7 @@ func main() { fmt.Printf("Parsing program arguments failed: %s", err) os.Exit(1) } - l, err := llama.New(model, llama.EnableF16Memory, llama.SetContext(128), llama.EnableEmbeddings) + l, err := llama.New(model, llama.EnableF16Memory, llama.SetContext(128), llama.EnableEmbeddings, llama.SetGPULayers(gpulayers)) if err != nil { fmt.Println("Loading the model failed:", err.Error()) os.Exit(1)