|
| 1 | +#!/bin/bash |
| 2 | +#print the options |
| 3 | +usage () { |
| 4 | + echo "" |
| 5 | + echo "This program provides several scripts to perform common tasks on sequences in standard FASTQ format" |
| 6 | + echo "" |
| 7 | + echo "For user manual please go to: https://github.com/raymondkiu/fqtk" |
| 8 | + echo "" |
| 9 | + echo "Usage: fqtk [options] FASTQ" |
| 10 | + echo "" |
| 11 | + echo "Options:" |
| 12 | + echo " -a Rename FASTQ identifier for FASTQ reads. Optionally with /1 or /2. Usage: ./fqtk -a R1.FASTQ NEWIDENTIFIER /1" |
| 13 | + echo " -q Print FASTQ stats" |
| 14 | + echo " -t Convert FASTQ to FASTA. Usage: ./fqtk -t FASTQ > NEWFILENAME" |
| 15 | + echo " -r Print FASTQ stats in tabular format" |
| 16 | + echo " -h Print usage and exit" |
| 17 | + echo " -v Print version and exit" |
| 18 | + echo "" |
| 19 | + echo "Version 1.0 (2022)" |
| 20 | + echo "Author: Raymond Kiu [email protected]" |
| 21 | + echo ""; |
| 22 | +} |
| 23 | + |
| 24 | +version (){ |
| 25 | +echo "fqtk v1.0" |
| 26 | +} |
| 27 | + |
| 28 | + |
| 29 | +FILE=$2 |
| 30 | +name=$3 |
| 31 | +sequence=$4 |
| 32 | + |
| 33 | +rename1(){ |
| 34 | +if [ -e "$FILE" ];then |
| 35 | +: |
| 36 | +else |
| 37 | + echo "$FILE file does not seem to exist. Program will now exit." |
| 38 | + exit 1 |
| 39 | +fi |
| 40 | + |
| 41 | +cat $FILE | awk '{print (NR%4 == 1) ? "@'$name'" ++i "'$sequence'": $0}' |
| 42 | +} |
| 43 | + |
| 44 | + |
| 45 | +fastqstats () { |
| 46 | +if [ -e "$FILE" ];then |
| 47 | +: |
| 48 | +else |
| 49 | + echo "$FILE file does not seem to exist. Program will now exit." |
| 50 | + exit 1 |
| 51 | +fi |
| 52 | +COUNT=$(cat $FILE|echo $((`wc -l`/4))) |
| 53 | +READ=$(awk 'NR % 4 == 2 { s += length($1); t++} END {print s/t}' $FILE) |
| 54 | +SIZE=$(awk 'BEGIN{sum=0;}{if(NR%4==2){sum+=length($0);}}END{print sum;}' $FILE) |
| 55 | +filesize=$(du -sh $FILE|awk '{print $1}') |
| 56 | +MaxRead=$(awk 'NR%4==2{print length($0)}' $FILE|sort -n|tail -n 1) |
| 57 | +MinRead=$(awk 'NR%4==2{print length($0)}' $FILE|sort -n|head -n 1) |
| 58 | + |
| 59 | +echo "Sample: $FILE" |
| 60 | +echo "File size: $filesize" |
| 61 | +echo "Total bases: $SIZE" |
| 62 | +echo "Reads: $COUNT" |
| 63 | +echo "Max read length: $MaxRead" |
| 64 | +echo "Min read length: $MinRead" |
| 65 | +echo "Mean read length: $READ" |
| 66 | +exit 0 |
| 67 | +} |
| 68 | +fastq2fasta (){ |
| 69 | +if [ -e "$FILE" ];then |
| 70 | +: |
| 71 | +else |
| 72 | + echo "$FILE file does not seem to exist. Program will now exit." |
| 73 | + exit 1 |
| 74 | +fi |
| 75 | + |
| 76 | +sed -n '1~4s/^@/>/p;2~4p' $FILE |
| 77 | +} |
| 78 | + |
| 79 | +fastqtabular () { |
| 80 | +if [ -e "$FILE" ];then |
| 81 | +: |
| 82 | +else |
| 83 | + echo "$FILE file does not seem to exist. Program will now exit." |
| 84 | + exit 1 |
| 85 | +fi |
| 86 | +COUNT=$(cat $FILE|echo $((`wc -l`/4))) |
| 87 | +READ=$(awk 'NR % 4 == 2 { s += length($1); t++} END {print s/t}' $FILE) |
| 88 | +SIZE=$(awk 'BEGIN{sum=0;}{if(NR%4==2){sum+=length($0);}}END{print sum;}' $FILE) |
| 89 | +filesize=$(du -sh $FILE|awk '{print $1}') |
| 90 | +MaxRead=$(awk 'NR%4==2{print length($0)}' $FILE|sort -n|tail -n 1) |
| 91 | +MinRead=$(awk 'NR%4==2{print length($0)}' $FILE|sort -n|head -n 1) |
| 92 | + |
| 93 | +# Print info in tabular format, -e is for backslash, -n for new line: |
| 94 | +echo -n -e "SampleID\t"; |
| 95 | +echo -n -e "Size\t"; |
| 96 | +echo -n -e "Total_bases\t"; |
| 97 | +echo -n -e "Reads\t"; |
| 98 | +echo -n -e "MaxRL\t"; |
| 99 | +echo -n -e "MinRL\t"; |
| 100 | +echo -e "MeanRL\t"; |
| 101 | + |
| 102 | +echo -n -e "$FILE\t" |
| 103 | +echo -n -e "$filesize\t" |
| 104 | +echo -n -e "$SIZE\t" |
| 105 | +echo -n -e "$COUNT\t" |
| 106 | +echo -n -e "$MaxRead\t" |
| 107 | +echo -n -e "$MinRead\t" |
| 108 | +echo -e "$READ" |
| 109 | +exit 0 |
| 110 | +} |
| 111 | + |
| 112 | +# Skip over processed options |
| 113 | +shift $((OPTIND-1)) |
| 114 | +# check for mandatory positional parameters, only 1 positional argument will be checked |
| 115 | +if [ $# -lt 1 ]; then |
| 116 | + echo "Missing optional argument or positional argument, please supply your fastq reads" |
| 117 | + echo "" |
| 118 | + echo "Options: ./fqtk -h" |
| 119 | + echo "" |
| 120 | + echo "" |
| 121 | + exit 1 |
| 122 | +fi |
| 123 | + |
| 124 | +# Call options |
| 125 | +while getopts ':aqtrhv' opt;do |
| 126 | + case $opt in |
| 127 | + a) rename1; exit;; |
| 128 | + q) fastqstats; exit;; |
| 129 | + t) fastq2fasta; exit;; |
| 130 | + r) fastqtabular; exit;; |
| 131 | + h) usage; exit;; |
| 132 | + v) version; exit;; |
| 133 | + \?) echo "Invalid option: -$OPTARG" >&2; exit 1;; |
| 134 | + :) echo "Missing option argument for -$OPTARG" >&2; exit 1;; |
| 135 | + *) echo "Unimplemented option: -$OPTARG" >&2; exit 1;; |
| 136 | + esac |
| 137 | +done |
| 138 | + |
| 139 | +shift $((OPTIND-1)) |
| 140 | +if [ $OPTIND -eq 1 ];then |
| 141 | + echo "Missing optional argument" |
| 142 | + echo "" |
| 143 | + echo "Options: ./fqtk -h" |
| 144 | + echo "" |
| 145 | + exit 1 |
| 146 | +fi |
0 commit comments