Test llava with quantizied weights

frost-beta · Sep 26, 2024 · c960a33 · c960a33
1 parent 6ebc13c
commit c960a33
Show file tree

Hide file tree

Showing 2 changed files with 8 additions and 4 deletions.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -24,8 +24,8 @@ jobs:
           huggingface download --silent Qwen/Qwen2-0.5B
           yarn tsx src/generate.ts --max-tokens=128 Qwen2-0.5B
 
-          huggingface download --silent llava-hf/llava-1.5-7b-hf
-          yarn tsx src/generate.ts --max-tokens=128 llava-1.5-7b-hf 'USER: How are you?\nASSISTANT:'
+          huggingface download --silent mlx-community/llava-1.5-7b-4bit
+          yarn tsx src/generate.ts --max-tokens=128 llava-1.5-7b-4bit 'USER: How are you?\nASSISTANT:'
 
   publish:
     if: startsWith(github.ref, 'refs/tags/')

diff --git a/src/models/llava.ts b/src/models/llava.ts
@@ -121,8 +121,12 @@ export class Model extends BaseModel {
       // [out_channels, in_channels, kH, KW]
       // MLX Conv2d expects the weight tensor to be of shape:
       // [out_channels, kH, KW, in_channels]
-      if (key.endsWith('patch_embedding.weight'))
-        weights[key] = weights[key].transpose(0, 2, 3, 1);
+      if (key.endsWith('patch_embedding.weight')) {
+        // Some mlx-community models already transposed it for us.
+        const {shape} = weights[key];
+        if (shape[1] != shape[2])
+          weights[key] = weights[key].transpose(0, 2, 3, 1);
+      }
     }
   }