Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added support to select GPU using metal on Apple Intel or Apple Silicon using --main-gpu index #8962

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/llava/clip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1116,7 +1116,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
#endif

#ifdef GGML_USE_METAL
new_clip->backend = ggml_backend_metal_init();
new_clip->backend = ggml_backend_metal_init(0);
LOG_TEE("%s: CLIP using Metal backend\n", __func__);
#endif

Expand Down
4 changes: 2 additions & 2 deletions examples/rpc/rpc-server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,9 @@ static ggml_backend_t create_backend() {
}
#elif GGML_USE_METAL
fprintf(stderr, "%s: using Metal backend\n", __func__);
backend = ggml_backend_metal_init();
backend = ggml_backend_metal_init(0);
if (!backend) {
fprintf(stderr, "%s: ggml_backend_metal_init() failed\n", __func__);
fprintf(stderr, "%s: ggml_backend_metal_init(0) failed\n", __func__);
}
#endif

Expand Down
2 changes: 1 addition & 1 deletion ggml/include/ggml-metal.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ extern "C" {

GGML_API void ggml_backend_metal_log_set_callback(ggml_log_callback log_callback, void * user_data);

GGML_API ggml_backend_t ggml_backend_metal_init(void);
GGML_API ggml_backend_t ggml_backend_metal_init(int deviceIndex);

GGML_API bool ggml_backend_is_metal(ggml_backend_t backend);

Expand Down
69 changes: 54 additions & 15 deletions ggml/src/ggml-metal.m
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#import <Foundation/Foundation.h>

#import <Metal/Metal.h>
#import <sys/sysctl.h>

#undef MIN
#undef MAX
Expand Down Expand Up @@ -293,21 +294,59 @@ static void ggml_metal_log(enum ggml_log_level level, const char * format, ...){
return data;
}

static struct ggml_backend_metal_context * ggml_metal_init(int n_cb) {
GGML_METAL_LOG_INFO("%s: allocating\n", __func__);
static struct ggml_backend_metal_context * ggml_metal_init( int deviceIndex, int n_cb) {
GGML_METAL_LOG_INFO("%s: allocating\n", __func__);

#if TARGET_OS_OSX && !GGML_METAL_NDEBUG
// Show all the Metal device instances in the system
NSArray * devices = MTLCopyAllDevices();
for (id<MTLDevice> device in devices) {
GGML_METAL_LOG_INFO("%s: found device: %s\n", __func__, [[device name] UTF8String]);
}
[devices release]; // since it was created by a *Copy* C method
// Show all the Metal device instances in the system
NSArray * devices = MTLCopyAllDevices();
for (id<MTLDevice> device in devices) {
GGML_METAL_LOG_INFO("%s: found device: %s\n", __func__, [[device name] UTF8String]);
}
[devices release]; // since it was created by a *Copy* C method
#endif

// Pick and show default Metal device
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
GGML_METAL_LOG_INFO("%s: picking default device: %s\n", __func__, [[device name] UTF8String]);

size_t size_arm;
NSMutableString *logMessages = [NSMutableString string];

// Check for Apple Silicon (M1, M2, etc.)
if (sysctlbyname("hw.optional.arm64", NULL, &size_arm, NULL, 0) == 0 && size_arm == 4) {
int isAppleSilicon = 0;
sysctlbyname("hw.optional.arm64", &isAppleSilicon, &size_arm, NULL, 0);
if (isAppleSilicon) {
[logMessages appendString:@"This Mac is running on an Apple Silicon (M) Series processor."];
} else {
[logMessages appendString:@"This Mac is running on an Intel processor."];
}
} else {
[logMessages appendString:@"This Mac is running on an Intel processor."];
}

GGML_METAL_LOG_INFO("%s'%s'\n", __func__, [logMessages UTF8String]);
// Pick and show default Metal device
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
NSString *defaultDeviceName = device.name;
GGML_METAL_LOG_INFO("%s: picking default device: %s\n", __func__, [[device name] UTF8String]);
GGML_METAL_LOG_INFO("%s: Passed GPU at index %d:\n", __func__, deviceIndex);
NSArray<id<MTLDevice>> *alldevices = MTLCopyAllDevices();
// Check if passed device index is within range
if (deviceIndex<=(alldevices.count -1)) {
for (NSUInteger i = 0; i < alldevices.count; i++) {
id<MTLDevice> selectgpu = alldevices[i];
NSString *deviceName = selectgpu.name;
if (i == deviceIndex) {
if (![defaultDeviceName isEqualToString:deviceName]) {
device = selectgpu;
GGML_METAL_LOG_INFO("%s: Picking Index GPU Name: %s\n", __func__, [ deviceName UTF8String]);
}else{
[alldevices release];
}
break;
}
}

}

// Configure context
struct ggml_backend_metal_context * ctx = malloc(sizeof(struct ggml_backend_metal_context));
Expand Down Expand Up @@ -3238,8 +3277,8 @@ static ggml_guid_t ggml_backend_metal_guid(void) {
return &guid;
}

ggml_backend_t ggml_backend_metal_init(void) {
struct ggml_backend_metal_context * ctx = ggml_metal_init(GGML_DEFAULT_N_THREADS);
ggml_backend_t ggml_backend_metal_init(int deviceIndex) {
struct ggml_backend_metal_context * ctx = ggml_metal_init(deviceIndex,GGML_DEFAULT_N_THREADS);
if (ctx == NULL) {
GGML_METAL_LOG_ERROR("%s: error: failed to allocate context\n", __func__);
return NULL;
Expand Down Expand Up @@ -3295,8 +3334,8 @@ void ggml_backend_metal_capture_next_compute(ggml_backend_t backend) {
GGML_CALL ggml_backend_t ggml_backend_reg_metal_init(const char * params, void * user_data); // silence warning

GGML_CALL ggml_backend_t ggml_backend_reg_metal_init(const char * params, void * user_data) {
return ggml_backend_metal_init();
return ggml_backend_metal_init((int) (intptr_t) user_data);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where is the user_data set?


GGML_UNUSED(params);
GGML_UNUSED(user_data);
// GGML_UNUSED(user_data);
}
2 changes: 1 addition & 1 deletion src/llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16588,7 +16588,7 @@ struct llama_context * llama_new_context_with_model(
// initialize backends
#if defined(GGML_USE_METAL)
if (model->n_gpu_layers > 0) {
ctx->backend_metal = ggml_backend_metal_init();
ctx->backend_metal = ggml_backend_metal_init(model->main_gpu);
if (ctx->backend_metal == nullptr) {
LLAMA_LOG_ERROR("%s: failed to initialize Metal backend\n", __func__);
llama_free(ctx);
Expand Down