Added an updated version of original ProDiags to tools

weka · Aug 23, 2022 · d1b8625 · d1b8625
1 parent c0b16a0
commit d1b8625
Show file tree

Hide file tree

Showing 20 changed files with 1,634 additions and 0 deletions.
diff --git a/prodiags/CHANGELOG b/prodiags/CHANGELOG
@@ -0,0 +1,71 @@
+Version 0.1 10/12/2020
+Initial upload
+
+Version 0.1 10/25/2020
+Converted few scripts to run under Python environment
+Added -e option to display errors only
+
+Version 0.2 11/01/2020
+Added update option using -u command line option VERSION file used to update
+Added several new tests in testbank
+
+Version 0.3 11/03/2020
+Added support for version 3.9.0 for performing SSD test as weka output is
+changed for weka cluster drive and weka cluster host commands
+
+Version 0.4 11/07/2020
+Added beautifier for output to text and file of reported logs
+Added freespace test for Weka mounted partition to calculated required space
+for Weka installation
+
+Version 0.5 11/09/2020
+Added Free boot device test, Physical installed RAM allocation test
+Added setup.sh to install python packages as needed
+
+Version 0.6 11/14/2020
+Fixed output and errors in some tests with Jacky feedback
+Added hostname and IP addresses return in tests
+Minor typos and fixes
+
+Version 0.7 11/19/2020
+Added ECC RAM test, added logs collection automatically to compatible S3
+bucket. Small fixes and addons
+
+Version 0.8 11/30/2020
+Added ECC CPU, and general critical BMC errors test, minor bugs and fixes
+Beatified test outputs according to David H. request
+
+Version 0.9 12/4/2020
+Fixes in IPMI scripts to support ipmiutil download from 3rd party location for CentOS 8
+Fixed memtest, added support for weka 3.10.x-beta. Small bug fixes
+
+Version 1.0 12/6/2020
+Added support for versions 3.9.3, 3.9.2, 3.9.1. Cosmetics on output, internet
+connection verification, root user verification. Version output. Support for
+#dont_run parameter in test, so test is excluded. Removed white spaces on
+output of tests to look cleaner. If no -e specified everything is spit on the
+screen.
+
+Version 1.0 12/17/2020
+Fixed log file closing when running with verbosity (minor fix)
+
+Version 1.1 12/23/2020
+Added an option for user to agree to upload the logs for analysis.
+Added a standalone version without the .py extension, avoid extra python libraries
+installation. For ipmi tests, added localalized ipmiutil package for RH /
+Centos Linux distros. MC (minio client is not added as we assume there is no
+internet available)
+
+Version 1.2 1/19/2021
+Fixed ssdtest to allow proper NVMe spare defaults handling. Fixed support for
+both IPMItool and IPMIutil packages specifically for Hitachi case. Changed
+default logs upload behaviour to NO
+
+Version 1.3 8/15/2021
+Added support for Weka version 3.12 - updated scripts, binary and tests
+
+Version 1.4 1/21/2022
+Added support for Weka version 3.13 and 3.14 - updated scripts, binary and tests
+
+8/8/2022
+Removed connection to non weka domains, might break functionality of ipmi tests, if some packages are missing from the install
diff --git a/prodiags/README b/prodiags/README
@@ -0,0 +1,57 @@
+WekaIO_ProDiags tool version 1.4
+Written by Daniel Slabodar (October 2020) bugz to [email protected], [email protected] or [email protected]
+
+Tool is written in Python language to supprot multithreaded and multinode runtime of various hardware diagnostic tests allocated in testbank directory for Weka.IO
+==============================================================================
+usage: wekaIO_ProDiags.py or for standalone version ./wekaIO_ProDiags [-h] [-v] [-u] [-l] [-r N [N ...]] [-ra] [-e] [-nj]
+                          [-no] [-f PATH]
+optional arguments:
+    -h, --help            show this help message and exit
+    -v, --version         WekaIO_ProDiags version
+    -u, --update          Software update (update will not work for now)
+    -l, --list            Show all available tests
+    -r N [N ...], --run N [N ...]
+Run specified tests
+    -ra, --runall         Run all available tests
+    -e, --errors_only     Show failed tests only
+    -nj, --nojson         no JSON report
+    -no, --nooutput       no scripts output
+    -f PATH, --file PATH  Output file (default: standard output)
+
+Example: ./wekaIO_ProDiags.py -ra -e -nj (would run all tests in testbank displaying errors only and TEXT output only format)
+         ./wekaIO_ProDiags.py -l (would display list of available tests in testbank directory)
+	     ./wekaIO_ProDiags.py -r 3 2 1 -e -no (would run tests number 3,2 and 1 from testbank displaying errors only and JSON output only format)
+	     ./wekaIO_ProDiags.py -u (would test if there are any new updates - URL configured in config.py section)
+
+Installation & Runtime:
+	 Run as sudo user:
+	 mkdir WekaIO_ProDiags
+	 cd WekaIO_ProDiags
+	 wget http://xxx/WekaIO_ProDiags/latest.tar
+	 tar xvf latest.tar
+	 ./wekaIO_ProDiags.py or ./wekaIO_ProDiags (for standalone version) currently, standalone version 1.3 doesn't work in Python libraries with version below 3.7, for supporting binaries please take version 1.2
+
+Please use setup.sh to install the required python libraries!
+
+Adding #run_once in test header would run that specific test only once
+Adding #dont_run in test would exclude the test from running
+
+Needed Python 3.x.x libraries if running a standalone version, only python3 is required:
+====================================
+threading Thread
+pathlib
+time sleep
+random randint
+os
+sys
+argparse
+scp SCPClient
+paramiko SSHClient,AutoAddPolicy
+json
+config
+traceback
+requests
+io
+tarfile
+====================================
+
diff --git a/prodiags/VERSION b/prodiags/VERSION
@@ -0,0 +1 @@
+1.4
diff --git a/prodiags/collect_diags.sh b/prodiags/collect_diags.sh
@@ -0,0 +1,124 @@
+#!/bin/bash
+# Tool to collect diagnostics and upload them to dedicated S3 compatible cloud bucket
+
+# Default settings
+url="http://xxx:9000"
+name="myminio"
+bucketname="wekaioprodiags"
+akey="YODAINSPACE"
+skey="SPACEISINYODA"
+
+now=$(date +"%m-%d-%Y_%H-%M-%S")
+cluster_name=""
+log_fname="/var/log/WekaIO_ProDiags.log"
+
+internet_host="lib.ru"
+
+function check_outer_space()
+{
+# Function to check internet connection
+ping -c 1 -W 1 $internet_host 1> /dev/null 2> /dev/null
+if [ $? -ne 0 ]; then
+	echo "Internet connection is unavailable, not collecting logs"
+	exit 1
+fi
+
+
+}
+
+function ask_perm()
+{
+# Function to ask for logs upload permission
+echo -en "We are going to upload the log to a centralized logging server for Weka support personel to look at it, do you agree? (yes/no/(Default: No)): "; read b
+case $b in 
+	yes|Yes|YES|y|Y	) echo "Thank you, uploading the logs"
+				return 0;;
+	no|No|NO|n|N	) echo "Thank you, log upload cancelled"
+				exit 0;;
+	*		) echo "Default selected, log upload cancelled"
+				exit 0;;
+esac
+
+}
+
+
+function get_minio()
+{
+#Getting Minio client
+rm -rf mc 
+rm -rf ~/root/.mc*
+which curl 1> /dev/null 2> /dev/null
+if [ $? -eq 1 ]; then
+	exit 1
+fi
+curl -s http://xxx/WekaIO_ProDiags/lib/mc -o mc
+if [ -r mc ] ; then
+	chmod +x mc
+else
+	exit 1
+fi
+}
+
+function set_alias()
+{
+./mc alias set $name $url $akey $skey 1> /dev/null 2> /dev/null
+}
+
+function get_weka_cluster_info()
+{
+if [ -r /usr/bin/weka ]; then
+	weka status > /tmp/weka_status_"$now".txt
+	cluster_name=`cat /tmp/weka_status_"$now".txt | grep "cluster:" | awk {'print $2'}`
+else
+	return
+fi
+}
+
+function get_tools_version()
+{
+cat VERSION > /tmp/VERSION_"$now".txt
+
+}
+
+function get_logs()
+{
+# Getting local logs
+# Would exit if logs not found here..
+if [ ! -r $log_fname ]; then
+	clean
+	exit 1
+fi
+
+cp $log_fname /tmp
+mv /tmp/WekaIO_ProDiags.log /tmp/WekaIO_ProDiags_"$cluster_name"_"$now".log
+tar cfz /tmp/WekaIO_ProDiags_"$cluster_name"_"$now".tgz /tmp/WekaIO_ProDiags_"$cluster_name"_"$now".log /tmp/VERSION_"$now".txt /tmp/weka_status_"$now".txt 2> /dev/null
+
+}
+
+function upload_logs()
+{
+# Function to upload local runtime logs to remote S3 bucket
+./mc mb $name/$bucketname 1> /dev/null 2> /dev/null
+./mc cp /tmp/WekaIO_ProDiags_"$cluster_name"_"$now".tgz $name/$bucketname 1> /dev/null
+}
+
+function clean()
+{
+# Clean up the mess
+rm -rf /tmp/weka_status_"$now".txt
+rm -rf /tmp/VERSION_"$now".txt
+rm -rf /tmp/WekaIO_ProDiags_"$now".txt
+rm -rf mc
+rm -rf ~/root/.mc*
+}
+
+# MAIN
+check_outer_space
+ask_perm
+get_minio
+set_alias
+get_weka_cluster_info
+get_tools_version
+get_logs
+upload_logs
+clean
diff --git a/prodiags/config.py b/prodiags/config.py
@@ -0,0 +1,6 @@
+USERNAME = 'root'
+PASSWORD = ''
+SSH_AUTH_TIMEOUT = 30
+SSH_CONNECT_TIMEOUT = 30
+SSH_EXEC_TIMEOUT = 30
+TAR_URL = "http://xxx/WekaIO_ProDiags/latest.tar"
diff --git a/prodiags/lib/ipmiutil-3.1.6-1.1.x86_64.rpm b/prodiags/lib/ipmiutil-3.1.6-1.1.x86_64.rpm
diff --git a/prodiags/setup.sh b/prodiags/setup.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+# Script to install python3 required packages
+
+# Globals
+res=""
+
+which pip3 1> /dev/null 2> /dev/null
+if [ $? -eq 1 ]; then
+	echo "Pip3 is required as part of Python 3.x.x installation to run this tool"
+	exit 1
+fi
+
+pip3 install scp requests pathlib paramiko 1> /dev/null 2> /dev/null
+if [ $? -eq 1 ]; then
+	pip3 install scp requests pathlib paramiko
+	echo "Failed to install some of the components"
+	exit 1
+else
+	echo "$0 completed successfully!"
+fi
diff --git a/prodiags/testbank/alocram/alocram.py b/prodiags/testbank/alocram/alocram.py
@@ -0,0 +1,56 @@
+#!/bin/bash
+#run_once
+
+# Globals
+res="0"
+
+function barline () {
+## barline
+echo "================================================================="
+}
+
+function testname () {
+## testname
+echo "Test name: Testing Weka allocated RAM that is equal between hosts"
+which hostname 1> /dev/null 2> /dev/null
+if [ $? -eq 1 ]; then
+	echo "Hostname command not found"
+else
+	echo "Hostname: `hostname`"
+	echo "IP address: `hostname -I`"
+fi
+}
+
+function testrun () {
+# Test run
+barline
+testname
+
+diff=`weka cluster host -b -J | grep -i memory | sed 's/^ *//g' | awk {'print $2'} | sed 's/,//g' | uniq | wc -l`
+if [ "$diff" != "1" ]; then 
+	echo "At least one of the hosts has wrong RAM allocated, please check with weka cluster host -b -J command" 
+        res="1"
+fi
+
+}
+
+# MAIN
+# If there is parameter after the script run command, output everything out
+
+if [ "$1" ]; then
+	testrun
+	if [ "$res" -eq "1" ]; then
+		exit 1
+	fi
+else
+	rm /tmp/$(basename $0).log 1> /dev/null 2> /dev/null
+	testrun > /tmp/$(basename $0).log
+	if [ "$res" -ne "0" ]; then
+		cat /tmp/$(basename $0).log
+		rm /tmp/$(basename $0).log 1> /dev/null 2> /dev/null
+		exit 1
+	else
+		rm /tmp/$(basename $0).log 1> /dev/null 2> /dev/null
+		exit 0
+	fi
+fi
diff --git a/prodiags/testbank/freeram/freeram.py b/prodiags/testbank/freeram/freeram.py
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+# Globals
+res="0"
+
+function barline () {
+## barline
+echo "================================================================="
+}
+
+function testname () {
+## testname
+echo "Test name: Free RAM test"
+which hostname 1> /dev/null 2> /dev/null
+if [ $? -eq 1 ]; then
+	echo "Hostname command not found"
+else
+	echo "Hostname: `hostname`"
+	echo "IP address: `hostname -I`"
+fi
+}
+
+function testrun () {
+# Test run
+barline
+testname
+
+free -lh
+
+if [ $? -eq 1 ]; then
+	res="1"
+fi
+
+}
+
+# MAIN
+# If there is parameter after the script run command, output everything out
+
+if [ "$1" ]; then
+	testrun
+	if [ "$res" -eq "1" ]; then
+		exit 1
+	fi
+else
+	rm /tmp/$(basename $0).log 1> /dev/null 2> /dev/null
+	testrun > /tmp/$(basename $0).log
+	if [ "$res" -ne "0" ]; then
+		cat /tmp/$(basename $0).log
+		rm /tmp/$(basename $0).log 1> /dev/null 2> /dev/null
+		exit 1
+	else
+		rm /tmp/$(basename $0).log 1> /dev/null 2> /dev/null
+		exit 0
+	fi
+fi