Skip to content

Commit 748efa4

Browse files
committed
Ensure all binaries are copied
1 parent 0adb516 commit 748efa4

File tree

1 file changed

+14
-23
lines changed

1 file changed

+14
-23
lines changed

.github/workflows/build-cuda.yml

Lines changed: 14 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ jobs:
7171
- cuda_version: '12.8.1'
7272
cuda_version_short: '12.8'
7373
cuda_tag: '12.8.1-cudnn-devel-ubuntu22.04'
74-
architectures: '75-virtual;80-virtual;86-virtual;89-virtual;90-virtual;100-virtual'
74+
architectures: '75-virtual;80-virtual;86-virtual;89-virtual;90-virtual;100-virtual;120-virtual'
7575
- cuda_version: '12.9.1'
7676
cuda_version_short: '12.9'
7777
cuda_tag: '12.9.1-cudnn-devel-ubuntu22.04'
@@ -163,28 +163,20 @@ jobs:
163163
echo '=== Copying binaries ==='
164164
cd /workspace
165165
mkdir -p binaries/cuda-${{ matrix.cuda_version_short }}
166-
167-
# Copy only essential binaries
168-
cp llama.cpp/build/bin/llama-cli binaries/cuda-${{ matrix.cuda_version_short }}/ 2>/dev/null || true
169-
cp llama.cpp/build/bin/llama-server binaries/cuda-${{ matrix.cuda_version_short }}/ 2>/dev/null || true
170-
cp llama.cpp/build/bin/llama-bench binaries/cuda-${{ matrix.cuda_version_short }}/ 2>/dev/null || true
171-
cp llama.cpp/build/bin/llama-quantize binaries/cuda-${{ matrix.cuda_version_short }}/ 2>/dev/null || true
172-
cp llama.cpp/build/bin/llama-embedding binaries/cuda-${{ matrix.cuda_version_short }}/ 2>/dev/null || true
173-
174-
# Copy essential libraries
175-
cp llama.cpp/build/ggml/src/libggml*.so* binaries/cuda-${{ matrix.cuda_version_short }}/ 2>/dev/null || true
176-
cp llama.cpp/build/src/libllama.so binaries/cuda-${{ matrix.cuda_version_short }}/ 2>/dev/null || true
177-
178-
# Strip binaries to reduce size
179-
strip binaries/cuda-${{ matrix.cuda_version_short }}/* 2>/dev/null || true
180-
166+
167+
# Copy everything from build/bin
168+
cp -r llama.cpp/build/bin/* binaries/cuda-${{ matrix.cuda_version_short }}/
169+
170+
# Strip binaries to reduce size (executables only, not .so files)
171+
find binaries/cuda-${{ matrix.cuda_version_short }}/ -type f -executable ! -name "*.so*" -exec strip {} \; 2>/dev/null || true
172+
181173
echo '=== Creating version info ==='
182174
echo 'llama.cpp version: ${{ needs.check-release.outputs.release_tag }}' > binaries/cuda-${{ matrix.cuda_version_short }}/VERSION.txt
183175
echo 'CUDA version: ${{ matrix.cuda_version }}' >> binaries/cuda-${{ matrix.cuda_version_short }}/VERSION.txt
184176
echo 'Architectures: ${{ matrix.architectures }}' >> binaries/cuda-${{ matrix.cuda_version_short }}/VERSION.txt
185-
echo 'Build date: '\$(date -u +%Y-%m-%d) >> binaries/cuda-${{ matrix.cuda_version_short }}/VERSION.txt
177+
echo 'Build date: '$(date -u +%Y-%m-%d) >> binaries/cuda-${{ matrix.cuda_version_short }}/VERSION.txt
186178
echo 'Build hash: ${{ needs.check-release.outputs.release_hash }}' >> binaries/cuda-${{ matrix.cuda_version_short }}/VERSION.txt
187-
179+
188180
echo '=== Build complete ==='
189181
ls -lh binaries/cuda-${{ matrix.cuda_version_short }}/
190182
@@ -203,9 +195,8 @@ jobs:
203195
204196
- name: Create tarball
205197
run: |
206-
cd binaries/cuda-${{ matrix.cuda_version_short }}
207-
tar -czf ../llama.cpp-${{ needs.check-release.outputs.release_tag }}-cuda-${{ matrix.cuda_version_short }}.tar.gz .
208-
cd ..
198+
cd binaries
199+
tar -czf llama.cpp-${{ needs.check-release.outputs.release_tag }}-cuda-${{ matrix.cuda_version_short }}.tar.gz cuda-${{ matrix.cuda_version_short }}
209200
ls -lh *.tar.gz
210201
211202
- name: Upload artifact
@@ -251,7 +242,7 @@ jobs:
251242
## CUDA Versions
252243
- CUDA 12.4 - Architectures: 7.5, 8.0, 8.6, 8.9, 9.0
253244
- CUDA 12.6 - Architectures: 7.5, 8.0, 8.6, 8.9, 9.0
254-
- CUDA 12.8 - Architectures: 7.5, 8.0, 8.6, 8.9, 9.0, 10.0
245+
- CUDA 12.8 - Architectures: 7.5, 8.0, 8.6, 8.9, 9.0, 10.0, 12.0
255246
- CUDA 12.9 - Architectures: 7.5, 8.0, 8.6, 8.9, 9.0, 10.0, 12.0
256247
- CUDA 13.0 - Architectures: 7.5, 8.0, 8.6, 8.9, 9.0, 10.0, 12.0
257248
@@ -261,7 +252,7 @@ jobs:
261252
- 8.6: RTX 3000 series
262253
- 8.9: RTX 4000 series, L4, L40
263254
- 9.0: H100, H200
264-
- 10.0: B100, B200, GB200 (Blackwell)
255+
- 10.0: B200
265256
- 12.0: RTX Pro series, RTX 50xx
266257
267258
## Usage

0 commit comments

Comments
 (0)