SciSharp · nipeone · Mar 27, 2025 · Mar 27, 2025 · Mar 27, 2025 · Mar 27, 2025
diff --git a/.github/workflows/compile.yml b/.github/workflows/compile.yml
@@ -28,13 +28,25 @@ jobs:
         include:
           - build: 'noavx'
             defines: '-DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF'
+            os: ubuntu-24.04
+            arch: x64
           - build: 'avx2'
             defines: ''
+            os: ubuntu-24.04
+            arch: x64
           - build: 'avx'
             defines: '-DGGML_AVX2=OFF'
+            os: ubuntu-24.04
+            arch: x64
           - build: 'avx512'
             defines: '-DGGML_AVX512=ON'
-    runs-on: ubuntu-24.04
+            os: ubuntu-24.04
+            arch: x64
+          - build: 'aarch64'
+            defines: '-DGGML_NATIVE=OFF -DGGML_CPU_AARCH64=ON -DGGML_CPU_ARM_ARCH=armv8-a'
+            os: ubuntu-24.04-arm
+            arch: arm64
+    runs-on: ${{ matrix.os }}
     steps:
       - uses: actions/checkout@v4
         with:
@@ -52,28 +64,28 @@ jobs:
       - uses: actions/upload-artifact@v4
         with:
           path: ./build/bin/libllama.so
-          name: llama-bin-linux-${{ matrix.build }}-x64.so
+          name: llama-bin-linux-${{ matrix.build }}-${{ matrix.arch }}.so
           if-no-files-found: error
       - uses: actions/upload-artifact@v4
         with:
           path: ./build/bin/libggml.so
-          name: ggml-bin-linux-${{ matrix.build }}-x64.so
+          name: ggml-bin-linux-${{ matrix.build }}-${{ matrix.arch }}.so
           if-no-files-found: error
       - uses: actions/upload-artifact@v4
         with:
           path: ./build/bin/libggml-base.so
-          name: ggml-base-bin-linux-${{ matrix.build }}-x64.so
+          name: ggml-base-bin-linux-${{ matrix.build }}-${{ matrix.arch }}.so
           if-no-files-found: error
       - uses: actions/upload-artifact@v4
         with:
           path: ./build/bin/libggml-cpu.so
-          name: ggml-cpu-bin-linux-${{ matrix.build }}-x64.so
+          name: ggml-cpu-bin-linux-${{ matrix.build }}-${{ matrix.arch }}.so
           if-no-files-found: error
       - name: Upload Llava
         uses: actions/upload-artifact@v4
         with:
           path: ./build/bin/libllava_shared.so
-          name: llava-bin-linux-${{ matrix.build }}-x64.so
+          name: llava-bin-linux-${{ matrix.build }}-${{ matrix.arch }}.so
           if-no-files-found: error
 
   compile-musl:
@@ -601,7 +613,7 @@ jobs:
       - name: Rearrange Files
         run: |
           # Make all directories at once
-          mkdir --parents deps/{noavx,avx,avx2,avx512,musl-noavx,musl-avx,musl-avx2,musl-avx512,osx-arm64,osx-x64,osx-x64-rosetta2,cu11.7.1,cu12.2.0,vulkan,android-arm64-v8a,android-x86,android-x86_64}
+          mkdir --parents deps/{noavx,avx,avx2,avx512,linux-arm64,musl-noavx,musl-avx,musl-avx2,musl-avx512,osx-arm64,osx-x64,osx-x64-rosetta2,cu11.7.1,cu12.2.0,vulkan,android-arm64-v8a,android-x86,android-x86_64}
 
           # Linux
           cp artifacts/ggml-bin-linux-noavx-x64.so/libggml.so           deps/noavx/libggml.so
@@ -628,6 +640,13 @@ jobs:
           cp artifacts/llama-bin-linux-avx512-x64.so/libllama.so         deps/avx512/libllama.so
           cp artifacts/llava-bin-linux-avx512-x64.so/libllava_shared.so  deps/avx512/libllava_shared.so
 
+          # Arm64
+          cp artifacts/ggml-bin-linux-aarch64-arm64.so/libggml.so           deps/linux-arm64/libggml.so
+          cp artifacts/ggml-base-bin-linux-aarch64-arm64.so/libggml-base.so deps/linux-arm64/libggml-base.so
+          cp artifacts/ggml-cpu-bin-linux-aarch64-arm64.so/libggml-cpu.so   deps/linux-arm64/libggml-cpu.so
+          cp artifacts/llama-bin-linux-aarch64-arm64.so/libllama.so         deps/linux-arm64/libllama.so
+          cp artifacts/llava-bin-linux-aarch64-arm64.so/libllava_shared.so  deps/linux-arm64/libllava_shared.so
+
           # Musl
           cp artifacts/ggml-bin-musl-noavx-x64.so/libggml.so           deps/musl-noavx/libggml.so
           cp artifacts/ggml-base-bin-musl-noavx-x64.so/libggml-base.so deps/musl-noavx/libggml-base.so

diff --git a/LLama.Unittest/Constants.cs b/LLama.Unittest/Constants.cs
@@ -7,6 +7,7 @@ internal static class Constants
         public static readonly string GenerativeModelPath = "Models/Llama-3.2-1B-Instruct-Q4_0.gguf";
         public static readonly string GenerativeModelPath2 = "Models/smollm-360m-instruct-add-basics-q8_0.gguf";
         public static readonly string EmbeddingModelPath = "Models/all-MiniLM-L12-v2.Q8_0.gguf";
+        public static readonly string RerankingModelPath = "Models/jina-reranker-v1-tiny-en-FP16.gguf";
 
         public static readonly string LLavaModelPath = "Models/llava-v1.6-mistral-7b.Q3_K_XS.gguf";
         public static readonly string LLavaMmpPath = "Models/mmproj-model-f16.gguf";

diff --git a/LLama.Unittest/LLama.Unittest.csproj b/LLama.Unittest/LLama.Unittest.csproj
@@ -46,6 +46,12 @@
             <LocalFileName>smollm-360m-instruct-add-basics-q8_0.gguf</LocalFileName>
         </DownloadFileItem>
 
+        <DownloadFileItem Include="jina-reranker-v1-tiny-en-FP16.gguf">
+            <SourceUrl>https://huggingface.co/gpustack/jina-reranker-v1-tiny-en-GGUF/resolve/main/jina-reranker-v1-tiny-en-FP16.gguf</SourceUrl>
+            <DestinationFolder>Models</DestinationFolder>
+            <LocalFileName>jina-reranker-v1-tiny-en-FP16.gguf</LocalFileName>
+        </DownloadFileItem>
+
         <DownloadFileItem Include="llava-v1.6-mistral-7b">
             <SourceUrl>https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/llava-v1.6-mistral-7b.Q3_K_XS.gguf</SourceUrl>
             <DestinationFolder>Models</DestinationFolder>
@@ -130,6 +136,9 @@
     <None Update="Models\Llama-3.2-1B-Instruct-Q4_0.gguf">
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </None>
+    <None Update="Models\jina-reranker-v1-tiny-en-FP16.gguf">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
     <None Update="Models\smollm-360m-instruct-add-basics-q8_0.gguf">
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </None>

diff --git a/LLama.Unittest/LLamaRerankerTests.cs b/LLama.Unittest/LLamaRerankerTests.cs
@@ -0,0 +1,74 @@
+using LLama.Common;
+using LLama.Extensions;
+using LLama.Native;
+using Microsoft.Extensions.AI;
+using System.Runtime.InteropServices;
+using Xunit.Abstractions;
+
+namespace LLama.Unittest;
+
+public sealed class LLamaRerankerTests
+{
+    private readonly ITestOutputHelper _testOutputHelper;
+    private readonly LLamaReranker _reranker;
+    public LLamaRerankerTests(ITestOutputHelper testOutputHelper)
+    {
+        _testOutputHelper = testOutputHelper;
+
+        var @params = new ModelParams(Constants.RerankingModelPath)
+        {
+            ContextSize = 0,
+            PoolingType = LLamaPoolingType.Rank,
+            GpuLayerCount = Constants.CIGpuLayerCount,
+
+        };
+        using var weights = LLamaWeights.LoadFromFile(@params);
+        _reranker = new LLamaReranker(weights, @params);
+    }
+
+    [Fact]
+    public async Task CompareRerankingScore()
+    {
+
+
+        var input = "what is panda?";
+        var documents = new string[] {
+            "hi",
+            "it's a bear",
+            string.Join(", ","The giant panda (Ailuropoda melanoleuca)",
+            "sometimes called a panda bear or simply panda",
+            "is a bear species endemic to China.") 
+        };
+        var scores = await _reranker.GetRelevanceScores(input, documents, normalize: false);
+
+        Assert.True(documents.Length == scores.Count);
+
+        _testOutputHelper.WriteLine($"Rerank score 0: {scores[0]:F4}");
+        _testOutputHelper.WriteLine($"Rerank score 1: {scores[1]:F4}");
+        _testOutputHelper.WriteLine($"Rerank score 2: {scores[2]:F4}");
+    }
+
+    [Fact]
+    public async Task MostRelevantDocument()
+    {
+        var input = "what is panda?";
+        var documents = new string[] {
+            "hi",
+            "it's a bear",
+            string.Join(", ","The giant panda (Ailuropoda melanoleuca)",
+            "sometimes called a panda bear or simply panda",
+            "is a bear species endemic to China.")
+        };
+        var scores = await _reranker.GetRelevanceScores(input, documents, normalize: true);
+
+        Assert.NotNull(scores);
+        Assert.True(documents.Length == scores.Count);
+
+        int maxIndex = scores.Select((score, index) => (score, index))
+                             .MaxBy(x => x.score)
+                             .index;
+
+        var maxScoreDocument = documents[maxIndex];
+        Assert.Equal(documents[2], maxScoreDocument);
+    }
+}
diff --git a/LLama.Unittest/Native/SafeLlamaModelHandleVocabularyTests.cs b/LLama.Unittest/Native/SafeLlamaModelHandleVocabularyTests.cs
@@ -0,0 +1,37 @@
+using System.Text;
+using System.Xml.Linq;
+using LLama.Common;
+using LLama.Extensions;
+using Microsoft.Extensions.Logging;
+
+
+namespace LLama.Unittest.Native;
+
+public class SafeLlamaModelHandleVocabularyTests
+{
+    private readonly LLamaWeights _model;
+
+    public SafeLlamaModelHandleVocabularyTests()
+    {
+        var @params = new ModelParams(Constants.RerankingModelPath)
+        {
+            ContextSize = 0,
+            PoolingType = LLama.Native.LLamaPoolingType.Rank,
+            GpuLayerCount = Constants.CIGpuLayerCount
+        };
+        _model = LLamaWeights.LoadFromFile(@params);
+    }
+
+    [Fact]
+    public void GetLLamaTokenString()
+    {
+        var bos = _model.Vocab.BOS;
+        var eos = _model.Vocab.EOS;
+
+        var bosStr = _model.Vocab.LLamaTokenToString(bos, true);
+        var eosStr = _model.Vocab.LLamaTokenToString(eos, true);
+
+        Assert.Equal("<s>", bosStr);
+        Assert.Equal("</s>", eosStr);
+    }
+}