7171 - cuda_version : ' 12.8.1'
7272 cuda_version_short : ' 12.8'
7373 cuda_tag : ' 12.8.1-cudnn-devel-ubuntu22.04'
74- architectures : ' 75-virtual;80-virtual;86-virtual;89-virtual;90-virtual;100-virtual'
74+ architectures : ' 75-virtual;80-virtual;86-virtual;89-virtual;90-virtual;100-virtual;120-virtual '
7575 - cuda_version : ' 12.9.1'
7676 cuda_version_short : ' 12.9'
7777 cuda_tag : ' 12.9.1-cudnn-devel-ubuntu22.04'
@@ -163,28 +163,20 @@ jobs:
163163 echo '=== Copying binaries ==='
164164 cd /workspace
165165 mkdir -p binaries/cuda-${{ matrix.cuda_version_short }}
166-
167- # Copy only essential binaries
168- cp llama.cpp/build/bin/llama-cli binaries/cuda-${{ matrix.cuda_version_short }}/ 2>/dev/null || true
169- cp llama.cpp/build/bin/llama-server binaries/cuda-${{ matrix.cuda_version_short }}/ 2>/dev/null || true
170- cp llama.cpp/build/bin/llama-bench binaries/cuda-${{ matrix.cuda_version_short }}/ 2>/dev/null || true
171- cp llama.cpp/build/bin/llama-quantize binaries/cuda-${{ matrix.cuda_version_short }}/ 2>/dev/null || true
172- cp llama.cpp/build/bin/llama-embedding binaries/cuda-${{ matrix.cuda_version_short }}/ 2>/dev/null || true
173-
174- # Copy essential libraries
175- cp llama.cpp/build/ggml/src/libggml*.so* binaries/cuda-${{ matrix.cuda_version_short }}/ 2>/dev/null || true
176- cp llama.cpp/build/src/libllama.so binaries/cuda-${{ matrix.cuda_version_short }}/ 2>/dev/null || true
177-
178- # Strip binaries to reduce size
179- strip binaries/cuda-${{ matrix.cuda_version_short }}/* 2>/dev/null || true
180-
166+
167+ # Copy everything from build/bin
168+ cp -r llama.cpp/build/bin/* binaries/cuda-${{ matrix.cuda_version_short }}/
169+
170+ # Strip binaries to reduce size (executables only, not .so files)
171+ find binaries/cuda-${{ matrix.cuda_version_short }}/ -type f -executable ! -name "*.so*" -exec strip {} \; 2>/dev/null || true
172+
181173 echo '=== Creating version info ==='
182174 echo 'llama.cpp version: ${{ needs.check-release.outputs.release_tag }}' > binaries/cuda-${{ matrix.cuda_version_short }}/VERSION.txt
183175 echo 'CUDA version: ${{ matrix.cuda_version }}' >> binaries/cuda-${{ matrix.cuda_version_short }}/VERSION.txt
184176 echo 'Architectures: ${{ matrix.architectures }}' >> binaries/cuda-${{ matrix.cuda_version_short }}/VERSION.txt
185- echo 'Build date: '\ $(date -u +%Y-%m-%d) >> binaries/cuda-${{ matrix.cuda_version_short }}/VERSION.txt
177+ echo 'Build date: '$(date -u +%Y-%m-%d) >> binaries/cuda-${{ matrix.cuda_version_short }}/VERSION.txt
186178 echo 'Build hash: ${{ needs.check-release.outputs.release_hash }}' >> binaries/cuda-${{ matrix.cuda_version_short }}/VERSION.txt
187-
179+
188180 echo '=== Build complete ==='
189181 ls -lh binaries/cuda-${{ matrix.cuda_version_short }}/
190182
@@ -203,9 +195,8 @@ jobs:
203195
204196 - name : Create tarball
205197 run : |
206- cd binaries/cuda-${{ matrix.cuda_version_short }}
207- tar -czf ../llama.cpp-${{ needs.check-release.outputs.release_tag }}-cuda-${{ matrix.cuda_version_short }}.tar.gz .
208- cd ..
198+ cd binaries
199+ tar -czf llama.cpp-${{ needs.check-release.outputs.release_tag }}-cuda-${{ matrix.cuda_version_short }}.tar.gz cuda-${{ matrix.cuda_version_short }}
209200 ls -lh *.tar.gz
210201
211202 - name : Upload artifact
@@ -251,7 +242,7 @@ jobs:
251242 ## CUDA Versions
252243 - CUDA 12.4 - Architectures: 7.5, 8.0, 8.6, 8.9, 9.0
253244 - CUDA 12.6 - Architectures: 7.5, 8.0, 8.6, 8.9, 9.0
254- - CUDA 12.8 - Architectures: 7.5, 8.0, 8.6, 8.9, 9.0, 10.0
245+ - CUDA 12.8 - Architectures: 7.5, 8.0, 8.6, 8.9, 9.0, 10.0, 12.0
255246 - CUDA 12.9 - Architectures: 7.5, 8.0, 8.6, 8.9, 9.0, 10.0, 12.0
256247 - CUDA 13.0 - Architectures: 7.5, 8.0, 8.6, 8.9, 9.0, 10.0, 12.0
257248
@@ -261,7 +252,7 @@ jobs:
261252 - 8.6: RTX 3000 series
262253 - 8.9: RTX 4000 series, L4, L40
263254 - 9.0: H100, H200
264- - 10.0: B100, B200, GB200 (Blackwell)
255+ - 10.0: B200
265256 - 12.0: RTX Pro series, RTX 50xx
266257
267258 ## Usage
0 commit comments