containers · rhatdan · Feb 5, 2025 · Feb 2, 2025 · rhatdan · Feb 2, 2025
@@ -115,7 +115,8 @@ pass --group-add keep-groups to podman (default: False)
 Needed to access the gpu on some systems, but has an impact on security, use with caution.
 
 #### **--ngl**
-number of gpu layers (default: 999)
+number of gpu layers, 0 means CPU inferencing, 999 means use max layers (default: -1)
+The default -1, means use whatever is automatically deemed appropriate (0 or 999)
 
 #### **--nocontainer**
 do not run RamaLama in the default container (default: False)

@@ -50,8 +50,9 @@
 #keep_groups = false
 
 # Default number of layers offloaded to the gpu
+# -1 means use whatever is automatically deemed appropriate (0 or 999)
 #
-#ngl = 999
+#ngl = -1
 
 # Specify default port for services to listen on
 #

@@ -92,9 +92,10 @@ RAMALAMA_IMAGE environment variable overrides this field.
 Pass `--group-add keep-groups` to podman, when using podman.
 In some cases this is needed to access the gpu from a rootless container
 
-**ngl**=999
+**ngl**=-1
 
-Default number of layers to offload to the gpu
+number of gpu layers, 0 means CPU inferencing, 999 means use max layers (default: -1)
+The default -1, means use whatever is automatically deemed appropriate (0 or 999)
 
 **port**="8080"
 

@@ -196,8 +196,8 @@ def configure_arguments(parser):
         "--ngl",
         dest="ngl",
         type=int,
-        default=config.get("ngl", 999),
-        help="Number of layers to offload to the gpu, if available",
+        default=config.get("ngl", -1),
+        help="Number of layers to offload to the gpu, if available"
     )
     parser.add_argument(
         "--keep-groups",

@@ -195,7 +195,7 @@ def setup_container(self, args):
     def gpu_args(self, args, runner=False):
         gpu_args = []
         if (
-            args.gpu
+            args.gpu > 0
             or os.getenv("HIP_VISIBLE_DEVICES")
             or os.getenv("ASAHI_VISIBLE_DEVICES")
             or os.getenv("CUDA_VISIBLE_DEVICES")