Skip to content

add LLamaReranker and tests #1150

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 25 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
2105dc3
add support for linux-arm64
nipeone Mar 27, 2025
bc4dde8
update compile.yml
nipeone Mar 27, 2025
aeef2eb
update compile.yml DGGML_CPU_ARM_ARCH=armv8-a
nipeone Mar 27, 2025
80d75d9
update runtime.targets
nipeone Mar 27, 2025
5996b40
Merge branch 'SciSharp:master' into master
nipeone Apr 2, 2025
6f4c53c
add LLamaReranker and tests
nipeone Apr 3, 2025
a69f814
Merge branch 'feature-llamareranker'
nipeone Apr 3, 2025
6f8b7ce
Merge branch 'SciSharp:master' into master
nipeone Apr 7, 2025
31c1218
Merge branch 'SciSharp:master' into master
nipeone Apr 11, 2025
15c5247
Merge branch 'SciSharp:master' into feature-llamareranker
nipeone Apr 11, 2025
c604359
optimize LLamaReranker function
nipeone Apr 11, 2025
d99670c
fix Reranking if documents is too large
nipeone Apr 11, 2025
05677fe
fix Reranking if document contains null
nipeone Apr 15, 2025
6becd43
Merge branch 'SciSharp:master' into master
nipeone Apr 18, 2025
4258cc1
Merge branch 'SciSharp:master' into feature-llamareranker
nipeone Apr 18, 2025
c62980f
Apply suggestions from code review
martindevans Apr 21, 2025
dfb3cc9
Update .github/workflows/compile.yml
martindevans Apr 21, 2025
47f90c4
Merge branch 'master' into master
martindevans Apr 21, 2025
8d61a92
Merge branch 'SciSharp:master' into feature-llamareranker
nipeone Apr 21, 2025
e1939eb
Merge branch 'SciSharp:master' into master
nipeone Apr 29, 2025
49ae0a8
Merge branch 'SciSharp:master' into feature-llamareranker
nipeone Apr 29, 2025
474cfd1
Merge branch 'SciSharp:master' into master
nipeone May 6, 2025
a53f503
Merge branch 'master' of https://github.com/nipeone/LLamaSharp
nipeone May 6, 2025
9ed7378
Merge upstream/master and resolve conflicts
nipeone May 6, 2025
37bb3c3
Merge branch 'master' into feature-llamareranker
nipeone May 6, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 26 additions & 7 deletions .github/workflows/compile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,25 @@ jobs:
include:
- build: 'noavx'
defines: '-DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF'
os: ubuntu-24.04
arch: x64
- build: 'avx2'
defines: ''
os: ubuntu-24.04
arch: x64
- build: 'avx'
defines: '-DGGML_AVX2=OFF'
os: ubuntu-24.04
arch: x64
- build: 'avx512'
defines: '-DGGML_AVX512=ON'
runs-on: ubuntu-24.04
os: ubuntu-24.04
arch: x64
- build: 'aarch64'
defines: '-DGGML_NATIVE=OFF -DGGML_CPU_AARCH64=ON -DGGML_CPU_ARM_ARCH=armv8-a'
os: ubuntu-24.04-arm
arch: arm64
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
with:
Expand All @@ -52,28 +64,28 @@ jobs:
- uses: actions/upload-artifact@v4
with:
path: ./build/bin/libllama.so
name: llama-bin-linux-${{ matrix.build }}-x64.so
name: llama-bin-linux-${{ matrix.build }}-${{ matrix.arch }}.so
if-no-files-found: error
- uses: actions/upload-artifact@v4
with:
path: ./build/bin/libggml.so
name: ggml-bin-linux-${{ matrix.build }}-x64.so
name: ggml-bin-linux-${{ matrix.build }}-${{ matrix.arch }}.so
if-no-files-found: error
- uses: actions/upload-artifact@v4
with:
path: ./build/bin/libggml-base.so
name: ggml-base-bin-linux-${{ matrix.build }}-x64.so
name: ggml-base-bin-linux-${{ matrix.build }}-${{ matrix.arch }}.so
if-no-files-found: error
- uses: actions/upload-artifact@v4
with:
path: ./build/bin/libggml-cpu.so
name: ggml-cpu-bin-linux-${{ matrix.build }}-x64.so
name: ggml-cpu-bin-linux-${{ matrix.build }}-${{ matrix.arch }}.so
if-no-files-found: error
- name: Upload Llava
uses: actions/upload-artifact@v4
with:
path: ./build/bin/libllava_shared.so
name: llava-bin-linux-${{ matrix.build }}-x64.so
name: llava-bin-linux-${{ matrix.build }}-${{ matrix.arch }}.so
if-no-files-found: error

compile-musl:
Expand Down Expand Up @@ -601,7 +613,7 @@ jobs:
- name: Rearrange Files
run: |
# Make all directories at once
mkdir --parents deps/{noavx,avx,avx2,avx512,musl-noavx,musl-avx,musl-avx2,musl-avx512,osx-arm64,osx-x64,osx-x64-rosetta2,cu11.7.1,cu12.2.0,vulkan,android-arm64-v8a,android-x86,android-x86_64}
mkdir --parents deps/{noavx,avx,avx2,avx512,linux-arm64,musl-noavx,musl-avx,musl-avx2,musl-avx512,osx-arm64,osx-x64,osx-x64-rosetta2,cu11.7.1,cu12.2.0,vulkan,android-arm64-v8a,android-x86,android-x86_64}

# Linux
cp artifacts/ggml-bin-linux-noavx-x64.so/libggml.so deps/noavx/libggml.so
Expand All @@ -628,6 +640,13 @@ jobs:
cp artifacts/llama-bin-linux-avx512-x64.so/libllama.so deps/avx512/libllama.so
cp artifacts/llava-bin-linux-avx512-x64.so/libllava_shared.so deps/avx512/libllava_shared.so

# Arm64
cp artifacts/ggml-bin-linux-aarch64-arm64.so/libggml.so deps/linux-arm64/libggml.so
cp artifacts/ggml-base-bin-linux-aarch64-arm64.so/libggml-base.so deps/linux-arm64/libggml-base.so
cp artifacts/ggml-cpu-bin-linux-aarch64-arm64.so/libggml-cpu.so deps/linux-arm64/libggml-cpu.so
cp artifacts/llama-bin-linux-aarch64-arm64.so/libllama.so deps/linux-arm64/libllama.so
cp artifacts/llava-bin-linux-aarch64-arm64.so/libllava_shared.so deps/linux-arm64/libllava_shared.so

# Musl
cp artifacts/ggml-bin-musl-noavx-x64.so/libggml.so deps/musl-noavx/libggml.so
cp artifacts/ggml-base-bin-musl-noavx-x64.so/libggml-base.so deps/musl-noavx/libggml-base.so
Expand Down
1 change: 1 addition & 0 deletions LLama.Unittest/Constants.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ internal static class Constants
public static readonly string GenerativeModelPath = "Models/Llama-3.2-1B-Instruct-Q4_0.gguf";
public static readonly string GenerativeModelPath2 = "Models/smollm-360m-instruct-add-basics-q8_0.gguf";
public static readonly string EmbeddingModelPath = "Models/all-MiniLM-L12-v2.Q8_0.gguf";
public static readonly string RerankingModelPath = "Models/jina-reranker-v1-tiny-en-FP16.gguf";

public static readonly string LLavaModelPath = "Models/llava-v1.6-mistral-7b.Q3_K_XS.gguf";
public static readonly string LLavaMmpPath = "Models/mmproj-model-f16.gguf";
Expand Down
9 changes: 9 additions & 0 deletions LLama.Unittest/LLama.Unittest.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@
<LocalFileName>smollm-360m-instruct-add-basics-q8_0.gguf</LocalFileName>
</DownloadFileItem>

<DownloadFileItem Include="jina-reranker-v1-tiny-en-FP16.gguf">
<SourceUrl>https://huggingface.co/gpustack/jina-reranker-v1-tiny-en-GGUF/resolve/main/jina-reranker-v1-tiny-en-FP16.gguf</SourceUrl>
<DestinationFolder>Models</DestinationFolder>
<LocalFileName>jina-reranker-v1-tiny-en-FP16.gguf</LocalFileName>
</DownloadFileItem>

<DownloadFileItem Include="llava-v1.6-mistral-7b">
<SourceUrl>https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/llava-v1.6-mistral-7b.Q3_K_XS.gguf</SourceUrl>
<DestinationFolder>Models</DestinationFolder>
Expand Down Expand Up @@ -130,6 +136,9 @@
<None Update="Models\Llama-3.2-1B-Instruct-Q4_0.gguf">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="Models\jina-reranker-v1-tiny-en-FP16.gguf">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="Models\smollm-360m-instruct-add-basics-q8_0.gguf">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
Expand Down
74 changes: 74 additions & 0 deletions LLama.Unittest/LLamaRerankerTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
using LLama.Common;
using LLama.Extensions;
using LLama.Native;
using Microsoft.Extensions.AI;
using System.Runtime.InteropServices;
using Xunit.Abstractions;

namespace LLama.Unittest;

public sealed class LLamaRerankerTests
{
private readonly ITestOutputHelper _testOutputHelper;
private readonly LLamaReranker _reranker;
public LLamaRerankerTests(ITestOutputHelper testOutputHelper)
{
_testOutputHelper = testOutputHelper;

var @params = new ModelParams(Constants.RerankingModelPath)
{
ContextSize = 0,
PoolingType = LLamaPoolingType.Rank,
GpuLayerCount = Constants.CIGpuLayerCount,

};
using var weights = LLamaWeights.LoadFromFile(@params);
_reranker = new LLamaReranker(weights, @params);
}

[Fact]
public async Task CompareRerankingScore()
{


var input = "what is panda?";
var documents = new string[] {
"hi",
"it's a bear",
string.Join(", ","The giant panda (Ailuropoda melanoleuca)",
"sometimes called a panda bear or simply panda",
"is a bear species endemic to China.")
};
var scores = await _reranker.GetRelevanceScores(input, documents, normalize: false);

Assert.True(documents.Length == scores.Count);

_testOutputHelper.WriteLine($"Rerank score 0: {scores[0]:F4}");
_testOutputHelper.WriteLine($"Rerank score 1: {scores[1]:F4}");
_testOutputHelper.WriteLine($"Rerank score 2: {scores[2]:F4}");
}

[Fact]
public async Task MostRelevantDocument()
{
var input = "what is panda?";
var documents = new string[] {
"hi",
"it's a bear",
string.Join(", ","The giant panda (Ailuropoda melanoleuca)",
"sometimes called a panda bear or simply panda",
"is a bear species endemic to China.")
};
var scores = await _reranker.GetRelevanceScores(input, documents, normalize: true);

Assert.NotNull(scores);
Assert.True(documents.Length == scores.Count);

int maxIndex = scores.Select((score, index) => (score, index))
.MaxBy(x => x.score)
.index;

var maxScoreDocument = documents[maxIndex];
Assert.Equal(documents[2], maxScoreDocument);
}
}
37 changes: 37 additions & 0 deletions LLama.Unittest/Native/SafeLlamaModelHandleVocabularyTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
using System.Text;
using System.Xml.Linq;
using LLama.Common;
using LLama.Extensions;
using Microsoft.Extensions.Logging;


namespace LLama.Unittest.Native;

public class SafeLlamaModelHandleVocabularyTests
{
private readonly LLamaWeights _model;

public SafeLlamaModelHandleVocabularyTests()
{
var @params = new ModelParams(Constants.RerankingModelPath)
{
ContextSize = 0,
PoolingType = LLama.Native.LLamaPoolingType.Rank,
GpuLayerCount = Constants.CIGpuLayerCount
};
_model = LLamaWeights.LoadFromFile(@params);
}

[Fact]
public void GetLLamaTokenString()
{
var bos = _model.Vocab.BOS;
var eos = _model.Vocab.EOS;

var bosStr = _model.Vocab.LLamaTokenToString(bos, true);
var eosStr = _model.Vocab.LLamaTokenToString(eos, true);

Assert.Equal("<s>", bosStr);
Assert.Equal("</s>", eosStr);
}
}
Loading
Loading