Skip to content

Commit

Permalink
Adjust performance settings for Tesla V100 GPUs.
Browse files Browse the repository at this point in the history
  • Loading branch information
kshitizbakshi-adobe committed Apr 15, 2019
1 parent 286b0fe commit ca4c373
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 7 deletions.
3 changes: 3 additions & 0 deletions install-nvidia-coreos.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,7 @@ echo "NVIDIA Drivers built! Trying to install now...\n";
/bin/bash nvidia_install.sh $1 $2 $3
echo "NVIDIA Drivers installed!\n"

cp nvidia-configure-settings.sh /opt/bin
systemctl start --no-block nvidia-configure-settings.service

cd $org_dir
12 changes: 12 additions & 0 deletions nvidia-configure-settings.service
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[Unit]
Description=NVIDIA Settings Configuration for performance
Wants=local-fs.target
StartLimitIntervalSec=0

[Service]
Type=simple
StandardOutput=journal+console
StandardError=journal+console
Restart=on-failure
RestartSec=1
ExecStart=/bin/bash /opt/bin/nvidia-configure-settings.sh
21 changes: 21 additions & 0 deletions nvidia-configure-settings.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/bin/sh
set -e

PERSISTENCE_MODE=$(/opt/bin/nvidia-smi --query-gpu=persistence_mode --format=csv | sed -n 2p | awk '{print $1}')
if [ "$PERSISTENCE_MODE" != "Enabled" ]
then
echo "Setting performance settings requires persistence mode to be enabled. Exiting..."
exit -1
fi

GPU_CHIP_NAME=$(/opt/bin/nvidia-smi --query-gpu=gpu_name --format=csv | sed -n 2p | awk '{print $2}')
if [ "$GPU_CHIP_NAME" = "V100-SXM2-16GB" ]
then
echo "Detected a Tesla V100-SXM2-16GB chip. Setting clocks as suggested by AWS Documentation for P3 instances."
/opt/bin/nvidia-smi -ac 877,1530
else
#Not a Tesla V100 GPU. Try turning off auto-boost at least, given we don't know max clocks...
/opt/bin/nvidia-smi --auto-boost-default=0
fi

echo "NVIDIA performance configuration complete."
10 changes: 4 additions & 6 deletions nvidia-persistenced.service
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
[Unit]
Description=NVIDIA Persistence Daemon
Wants=local-fs.target
After=nvidia-start.service

[Service]
Type=forking
StandardOutput=journal+console
StandardError=journal+console
PIDFile=/var/run/nvidia-persistenced/nvidia-persistenced.pid
Restart=always
RestartSec=2
ExecStart=/opt/bin/nvidia-persistenced --user nvidia-persistenced --persistence-mode --verbose
ExecStopPost=/bin/rm -rf /var/run/nvidia-persistenced

[Install]
WantedBy=multi-user.target
ExecStopPost=/bin/rm -rf /var/run/nvidia-persistenced
3 changes: 3 additions & 0 deletions nvidia-start.service
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ Description=Load NVIDIA module
After=local-fs.target

[Service]
Type=oneshot
StandardOutput=journal+console
StandardError=journal+console
ExecStart=/opt/bin/nvidia-start.sh

[Install]
Expand Down
1 change: 0 additions & 1 deletion nvidia_install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -56,5 +56,4 @@ systemctl daemon-reload
systemctl enable nvidia-start.service
systemctl start nvidia-start.service

systemctl enable nvidia-persistenced.service
systemctl start nvidia-persistenced.service

0 comments on commit ca4c373

Please sign in to comment.