Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/pre-commit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ jobs:

- name: Run pre-commit check
run: make precommit-check
env:
CI: true

- name: Show pre-commit results
if: failure()
Expand Down
12 changes: 6 additions & 6 deletions .github/workflows/publish-crate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,17 +71,17 @@ jobs:
exit 1
fi

- name: Run tests
- name: Run tests (CPU-only, no CUDA)
working-directory: candle-binding
run: cargo test --verbose
run: cargo test --no-default-features --verbose

- name: Check crate
- name: Check crate (CPU-only, no CUDA)
working-directory: candle-binding
run: cargo check --verbose
run: cargo check --no-default-features --verbose

- name: Build crate
- name: Build crate (CPU-only, no CUDA)
working-directory: candle-binding
run: cargo build --release --verbose
run: cargo build --release --no-default-features --verbose

- name: Dry run publish
working-directory: candle-binding
Expand Down
5 changes: 3 additions & 2 deletions .github/workflows/test-and-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@ jobs:
- name: Check go mod tidy
run: make check-go-mod-tidy

- name: Build Rust library
run: make rust
- name: Build Rust library (CPU-only, no CUDA)
run: make rust-ci

- name: Install HuggingFace CLI
run: |
Expand All @@ -86,6 +86,7 @@ jobs:
- name: Run semantic router tests
run: make test
env:
CI: true
CGO_ENABLED: 1
LD_LIBRARY_PATH: ${{ github.workspace }}/candle-binding/target/release

Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ repos:
pass_filenames: false
- id: cargo-check
name: cargo check
entry: bash -c 'cd candle-binding && cargo check'
entry: bash -c 'cd candle-binding && cargo check --no-default-features'
language: system
files: \.rs$
pass_filenames: false
Expand Down
10 changes: 5 additions & 5 deletions Dockerfile.extproc
Original file line number Diff line number Diff line change
Expand Up @@ -30,24 +30,24 @@ COPY candle-binding/Cargo.loc[k] ./candle-binding/
COPY tools/make/ tools/make/
COPY Makefile ./

# Pre-build dependencies to cache them
# Pre-build dependencies to cache them (CPU-only, no CUDA)
RUN cd candle-binding && \
mkdir -p src && \
echo "fn main() {}" > src/lib.rs && \
cargo build --release && \
cargo build --release --no-default-features && \
rm -rf src

# Copy source code and build
COPY candle-binding/src/ ./candle-binding/src/

# Use Makefile to build the Rust library (rebuild with actual source code)
RUN echo "Building Rust library with actual source code..." && \
# Use Makefile to build the Rust library (rebuild with actual source code, CPU-only, no CUDA)
RUN echo "Building Rust library with actual source code (CPU-only, no CUDA)..." && \
echo "Checking source files:" && \
ls -la candle-binding/src/ && \
echo "Forcing clean rebuild..." && \
cd candle-binding && \
cargo clean && \
cargo build --release && \
cargo build --release --no-default-features && \
echo "Checking built library:" && \
find target -name "*.so" -type f && \
ls -la target/release/
Expand Down
20 changes: 10 additions & 10 deletions Dockerfile.extproc.cross
Original file line number Diff line number Diff line change
Expand Up @@ -72,29 +72,29 @@ COPY candle-binding/Cargo.loc[k] ./candle-binding/
COPY tools/make/ tools/make/
COPY Makefile ./

# Create a modified Makefile for cross-compilation
# Create a modified Makefile for cross-compilation (CPU-only, no CUDA)
RUN if [ "$TARGETARCH" = "arm64" ]; then \
echo "Modifying rust.mk for ARM64 cross-compilation..."; \
sed -i 's/cd candle-binding && cargo build --release/cd candle-binding \&\& cargo build --release --target aarch64-unknown-linux-gnu/' tools/make/rust.mk; \
echo "Modifying rust.mk for ARM64 cross-compilation (CPU-only, no CUDA)..."; \
sed -i 's/cd candle-binding && cargo build --release/cd candle-binding \&\& cargo build --release --no-default-features --target aarch64-unknown-linux-gnu/' tools/make/rust.mk; \
cat tools/make/rust.mk | grep "cargo build"; \
fi

# Pre-build dependencies to cache them
# Pre-build dependencies to cache them (CPU-only, no CUDA)
RUN cd candle-binding && \
mkdir -p src && \
echo "fn main() {}" > src/lib.rs && \
if [ "$TARGETARCH" = "arm64" ]; then \
cargo build --release --target aarch64-unknown-linux-gnu; \
cargo build --release --no-default-features --target aarch64-unknown-linux-gnu; \
else \
cargo build --release; \
cargo build --release --no-default-features; \
fi && \
rm -rf src

# Copy source code and build
COPY candle-binding/src/ ./candle-binding/src/

# Build with cross-compilation (rebuild with actual source code)
RUN echo "Building Rust library with actual source code..." && \
# Build with cross-compilation (rebuild with actual source code, CPU-only, no CUDA)
RUN echo "Building Rust library with actual source code (CPU-only, no CUDA)..." && \
echo "Current directory: $(pwd)" && \
echo "TARGETARCH: $TARGETARCH" && \
ls -la candle-binding/src/ && \
Expand All @@ -107,9 +107,9 @@ RUN echo "Building Rust library with actual source code..." && \
export CC_aarch64_unknown_linux_gnu=aarch64-linux-gnu-gcc; \
export CXX_aarch64_unknown_linux_gnu=aarch64-linux-gnu-g++; \
export AR_aarch64_unknown_linux_gnu=aarch64-linux-gnu-ar; \
cargo build --release --target aarch64-unknown-linux-gnu; \
cargo build --release --no-default-features --target aarch64-unknown-linux-gnu; \
else \
cargo build --release --target x86_64-unknown-linux-gnu; \
cargo build --release --no-default-features --target x86_64-unknown-linux-gnu; \
fi && \
echo "Checking built library..." && \
find target -name "*.so" -type f
Expand Down
52 changes: 52 additions & 0 deletions candle-binding/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions candle-binding/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ license = "MIT OR Apache-2.0"
name = "candle_semantic_router"
crate-type = ["staticlib", "cdylib"]

[features]
default = ["cuda"]
cuda = ["candle-core/cuda", "candle-nn/cuda", "candle-transformers/cuda"]

[dependencies]
anyhow = { version = "1", features = ["backtrace"] }
candle-core = "0.8.4"
Expand Down
3 changes: 3 additions & 0 deletions config/config.development.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ semantic_cache:
max_entries: 100
ttl_seconds: 600
eviction_policy: "fifo"
use_hnsw: true # Enable HNSW for faster search
hnsw_m: 16
hnsw_ef_construction: 200

tools:
enabled: false
Expand Down
58 changes: 58 additions & 0 deletions config/config.hybrid.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
bert_model:
model_id: models/all-MiniLM-L12-v2
threshold: 0.6
use_cpu: true

semantic_cache:
enabled: true
backend_type: "hybrid" # Hybrid HNSW + Milvus backend
similarity_threshold: 0.85
ttl_seconds: 3600

# Hybrid cache specific settings
max_memory_entries: 100000 # Max entries in HNSW index (100K)

# HNSW parameters
hnsw_m: 16 # Number of bi-directional links
hnsw_ef_construction: 200 # Construction quality parameter

# Milvus configuration file path
backend_config_path: "config/milvus.yaml"

tools:
enabled: true
top_k: 3
similarity_threshold: 0.2
tools_db_path: "config/tools_db.json"
fallback_to_empty: true

prompt_guard:
enabled: true
use_modernbert: true
model_id: "models/jailbreak_classifier_modernbert-base_model"
threshold: 0.7
use_cpu: true
jailbreak_mapping_path: "models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json"

# vLLM Endpoints Configuration
vllm_endpoints:
- name: "endpoint1"
address: "172.28.0.20"
port: 8002
weight: 1

model_config:
"qwen3":
reasoning_family: "qwen3"
preferred_endpoints: ["endpoint1"]
pii_policy:
allow_by_default: true

# Classifier configuration
classifier:
enabled: true
model_path: "models/qwen3-router_model/router_qwen_generative_model.safetensors"
tokenizer_path: "models/qwen3-router_model"
use_cpu: true
threshold: 0.7

11 changes: 10 additions & 1 deletion config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,20 @@ bert_model:

semantic_cache:
enabled: true
backend_type: "memory" # Options: "memory" or "milvus"
backend_type: "memory" # Options: "memory", "milvus", or "hybrid"
similarity_threshold: 0.8
max_entries: 1000 # Only applies to memory backend
ttl_seconds: 3600
eviction_policy: "fifo"
# HNSW index configuration (for memory backend only)
use_hnsw: true # Enable HNSW index for faster similarity search
hnsw_m: 16 # Number of bi-directional links (higher = better recall, more memory)
hnsw_ef_construction: 200 # Construction parameter (higher = better quality, slower build)

# Hybrid cache configuration (when backend_type: "hybrid")
# Combines in-memory HNSW for fast search with Milvus for scalable storage
# max_memory_entries: 100000 # Max entries in HNSW index (default: 100,000)
# backend_config_path: "config/milvus.yaml" # Path to Milvus config

tools:
enabled: true
Expand Down
2 changes: 1 addition & 1 deletion src/semantic-router/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ require (
go.opentelemetry.io/otel/sdk v1.38.0
go.opentelemetry.io/otel/trace v1.38.0
go.uber.org/zap v1.27.0
golang.org/x/sys v0.37.0
google.golang.org/grpc v1.75.0
gopkg.in/yaml.v3 v3.0.1
k8s.io/apimachinery v0.31.4
Expand Down Expand Up @@ -93,7 +94,6 @@ require (
go.yaml.in/yaml/v2 v2.4.2 // indirect
golang.org/x/net v0.43.0 // indirect
golang.org/x/sync v0.16.0 // indirect
golang.org/x/sys v0.35.0 // indirect
golang.org/x/text v0.28.0 // indirect
golang.org/x/tools v0.35.0 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20250929231259-57b25ae835d4 // indirect
Expand Down
4 changes: 2 additions & 2 deletions src/semantic-router/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -426,8 +426,8 @@ golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220209214540-3681064d5158/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI=
golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ=
golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
Expand Down
Loading
Loading