Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MetaGWASToolKit updates #45

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
1 change: 1 addition & 0 deletions PROJECTNAME.PHENOTYPES.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
PHENOTYPE SAMPLESIZE GWASINPUT
41 changes: 41 additions & 0 deletions SCRIPTS/gsmr/GSMR.plot_effect.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/hpc/local/Rocky8/dhl_ec/software/R-3.6.3/bin/Rscript --vanilla
# SLURM settings
#SBATCH --job-name=gsmr_plotter # Job name
#SBATCH --mail-type=END,FAIL # Mail events (NONE, BEGIN, END, FAIL, ALL)
#SBATCH [email protected] # Where to send mail
#SBATCH --nodes=1 #run on one node
#SBATCH --ntasks=1 # Run on a single CPU
#SBATCH --mem-per-cpu=10G # Job memory request
#SBATCH --time=1:00:00 # Time limit hrs:min:sec
#SBATCH --output=gsmr_plotter.log # Standard output and error log

phenotype <- Sys.getenv("PHENOTYPE")
trait <- Sys.getenv("TRAIT")
resultdir <- Sys.getenv("resultdir")
#x_label <- trait # TRAIT corresponds to X_LABEL
#y_label <- phenotype # PHENOTYPE corresponds to Y_LABEL
#color_index <- as.integer(Sys.getenv("COLOR"))
print(phenotype)
print(trait)
# Construct the file path using the variables
file_path1 <- sprintf("%s/gsmr/%s.out.eff_plot.gz", resultdir, trait)
source("/hpc/dhl_ec/esmulders/gsmr/gsmr_plot.r")


# Read the data
gsmr_data <- read_gsmr_data(file_path1)
gsmr_summary(gsmr_data)

file_path2 <- sprintf("%s/gsmr/%s_effect.png", resultdir, trait)

png(filename = file_path2, width = 480, height = 480)
# Plotting function
plot_gsmr_effect(gsmr_data, trait, phenotype, colors()[630])

# Close the device to save the plot
dev.off()





139 changes: 139 additions & 0 deletions SCRIPTS/gsmr/_archive/gsmr.analysis (09-07-2024, 15.20).sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
#!/bin/bash

# SLURM settings
#SBATCH --array=1-50 # create 23 jobs from this
#SBATCH --job-name=gsmr # Job name
#SBATCH --mail-type=FAIL # Mail events (NONE, BEGIN, END, FAIL, ALL)
#SBATCH [email protected] # Where to send mail
#SBATCH --nodes=1 # Run on one node
#SBATCH --ntasks=1 # Run on a single CPU
#SBATCH --mem=128G # Job memory request
#SBATCH --time=1:00:00 # Time limit hrs:min:sec
#SBATCH --output=gsmr.out # Standard output and error log

echo "Starting task" $SLURM_ARRAY_TASK_ID

# Ensure that SLURM_ARRAY_TASK_ID is set
if [ -z "$SLURM_ARRAY_TASK_ID" ]; then
echo "SLURM_ARRAY_TASK_ID is not set."
exit 1
fi


GSMR="/hpc/dhl_ec/esmulders/gsmr"
GCTA="/hpc/local/Rocky8/dhl_ec/software/gcta_v1.94.1"
DIRECTION=0
REF="${GSMR}/gsmr_ref_data.txt"
#PHENO="META_cox_CHDDEAD_MI_ALL"
#TRAIT_FILE="${GSMR}/gsmr_traits.txt"
PHENO=""
TRAIT_FILE=""
#EXPOSURE_FILE="${GSMR}/gsmr_${trait}_${PHENO}_exposure.txt"


# Parsing command-line options
while getopts ":p:t:d:j:r:" opt; do
case ${opt} in
p) PHENO=$OPTARG ;;
t) TRAIT_FILE=$OPTARG ;;
d) DATA=$OPTARG ;;
j) PROJECTNAME=$OPTARG ;;
r) REF=$OPTARG ;;
:) echo "Option -$OPTARG requires an argument." >&2; exit 1 ;;
\?) echo "Invalid option: -$OPTARG" >&2; exit 1 ;;
esac
done

Total=$(wc -w < "$TRAIT_FILE") # Count total amount of IDs (amount of lines in file)
# Check if the current SLURM_ARRAY_TASK_ID is less than Total
if (( SLURM_ARRAY_TASK_ID < $Total )); then
# Your commands here
echo "Processing task ID $SLURM_ARRAY_TASK_ID"
else
# Cancel the job if SLURM_ARRAY_TASK_ID is not valid
echo "Invalid task ID $SLURM_ARRAY_TASK_ID. Cancelling job."
scancel $SLURM_JOB_ID
fi
echo "Analyzing phenotype ${PHENO}"
echo "${REF}"
#TRAIT_FILE="${GSMR}/gsmr_traits.txt"
#output_dir="${DATA}/${PROJECTNAME}/${PHENO}/gsmr"
#mkdir -p "$output_dir"
#echo "$output_dir"
OUTCOME_FILE="${DATA}/${PROJECTNAME}/${PHENO}/gsmr/gsmr_${PHENO}_outcome.txt"
#echo "$TRAIT_FILE"
#echo "$OUTCOME_FILE"
# Write the outcome to the outcome file
echo "$PHENO ${DATA}/${PROJECTNAME}/${PHENO}/input/${PHENO}.cojo.gz" > "$OUTCOME_FILE"

# Step 1: Process each trait file for GSMR analysis
echo "Starting GSMR analysis for each trait..."
#mkdir -p "$GSMR/gsmr_$PHENO"

# while read -r line; do
# # Extract the trait name and file path
# trait=$(echo "$line" | awk '{print $1}')
# file_path=$(echo "$line" | awk '{print $2}')
# EXPOSURE_FILE="${GSMR}/gsmr_${trait}_${PHENO}_exposure.txt"
# # Copy the current trait line to the exposure file
# echo "$line" > "$EXPOSURE_FILE"
#
#
#
# # Define the output file name
# output_file="${GSMR}/gsmr_${PHENO}/gsmr_result_${trait}.out"
#
# # Run GSMR analysis
# echo "Running GSMR analysis for $trait..."
# ${GCTA} --mbfile "$REF" --gsmr-file "$EXPOSURE_FILE" "$OUTCOME_FILE" --gsmr-direction "$DIRECTION" --effect-plot --gsmr2-beta --out "$output_file"
#
# echo "GSMR analysis completed for $trait. Results saved to $output_file."
# done < "$TRAIT_FILE"
#
# # Read the specified line from TRAIT_FILE based on SLURM_ARRAY_TASK_ID
# line=$(sed -n "${SLURM_ARRAY_TASK_ID}p" "$TRAIT_FILE")
#
# # Extract the trait and file path
# trait=$(echo $line | cut -d' ' -f1)
# file_path=$(echo $line | cut -d' ' -f2)

# Create the output directory if it does not exist


# Get the line corresponding to the current SLURM_ARRAY_TASK_ID
line=$(sed -n "${SLURM_ARRAY_TASK_ID}p" "$TRAIT_FILE")

# Logging
echo "Line extracted: $line"

# Extract the trait name and file path
trait=$(echo "$line" | awk '{print $1}')
file_path=$(echo "$line" | awk '{print $2}')



# Logging
echo "Trait: $trait"
echo "File Path: $file_path"
EXPOSURE_FILE="${DATA}/${PROJECTNAME}/${PHENO}/gsmr/${trait}_${PHENO}_exposure.txt"

# Copy the current trait line to the exposure file
echo "$line" > "$EXPOSURE_FILE"

# Logging
echo "Exposure file content:"
cat "$EXPOSURE_FILE"

# Define the output file name
#output_file="${GSMR}/gsmr_${PHENO}/gsmr_result_${trait}.out"
output_file="${DATA}/${PROJECTNAME}/${PHENO}/gsmr/${trait}.out"
echo "output file: ${output_file}"
# Run GSMR analysis directly with the trait line
echo "Running GSMR analysis for $trait..."
${GCTA} --mbfile "${REF}" --gsmr-file "${EXPOSURE_FILE}" "${OUTCOME_FILE}" --gsmr-direction "${DIRECTION}" --effect-plot --gsmr2-beta --out "${output_file}"

echo "GSMR analysis completed for $trait. Results saved to $output_file."

# Delete the individual exposure file
rm "$EXPOSURE_FILE"
echo "Deleted exposure file: $EXPOSURE_FILE"
108 changes: 108 additions & 0 deletions SCRIPTS/gsmr/gsmr.analysis.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
#!/bin/bash

# SLURM settings
#SBATCH --array=1-55 # create jobs from this
#SBATCH --job-name=gsmr # Job name
#SBATCH --mail-type=FAIL # Mail events (NONE, BEGIN, END, FAIL, ALL)
#SBATCH [email protected] # Where to send mail
#SBATCH --nodes=1 # Run on one node
#SBATCH --ntasks=1 # Run on a single CPU
#SBATCH --mem=128G # Job memory request
#SBATCH --time=1:00:00 # Time limit hrs:min:sec
#SBATCH --output=gsmr.out # Standard output and error log

echo "Starting task" $SLURM_ARRAY_TASK_ID

# Ensure that SLURM_ARRAY_TASK_ID is set
if [ -z "$SLURM_ARRAY_TASK_ID" ]; then
echo "SLURM_ARRAY_TASK_ID is not set."
exit 1
fi


GSMR="/hpc/dhl_ec/esmulders/MetaGWASToolKit/SCRIPTS/gsmr"
GCTA="/hpc/local/Rocky8/dhl_ec/software/gcta_v1.94.1"
DIRECTION=0
REF="${GSMR}/gsmr_ref_data.EUR.txt"

PHENOTYPE=""
TRAIT_FILE=""


# Parsing command-line options
while getopts ":p:t:d:j:r:" opt; do
case ${opt} in
p) PHENOTYPE=$OPTARG ;;
t) TRAIT_FILE=$OPTARG ;;
d) DATA=$OPTARG ;;
j) PROJECTNAME=$OPTARG ;;
r) REF=$OPTARG ;;
:) echo "Option -$OPTARG requires an argument." >&2; exit 1 ;;
\?) echo "Invalid option: -$OPTARG" >&2; exit 1 ;;
esac
done

Total=$(wc -w < "$TRAIT_FILE") # Count total amount of IDs (amount of lines in file)
# Check if the current SLURM_ARRAY_TASK_ID is less than Total
if (( SLURM_ARRAY_TASK_ID < $Total )); then
# Your commands here
echo "Processing task ID $SLURM_ARRAY_TASK_ID"
else
# Cancel the job if SLURM_ARRAY_TASK_ID is not valid
echo "Invalid task ID $SLURM_ARRAY_TASK_ID. Cancelling job."
scancel $SLURM_JOB_ID
fi
echo "Analyzing phenotype ${PHENOTYPE}"
echo "${REF}"

mkdir -p "${DATA}/gsmr"
OUTCOME_FILE="${DATA}/gsmr/gsmr_${PHENOTYPE}_outcome.txt"
#echo "$TRAIT_FILE"
#echo "$OUTCOME_FILE"
# Write the outcome to the outcome file
echo "$PHENOTYPE ${DATA}/input/${PHENOTYPE}.cojo.gz" > "$OUTCOME_FILE"

# Step 1: Process each trait file for GSMR analysis
echo "Starting GSMR analysis for each trait..."
#mkdir -p "$GSMR/gsmr_$PHENOTYPE"

# Get the line corresponding to the current SLURM_ARRAY_TASK_ID
line=$(sed -n "${SLURM_ARRAY_TASK_ID}p" "$TRAIT_FILE")

# Logging
echo "Line extracted: $line"

# Extract the trait name and file path
trait=$(echo "$line" | awk '{print $1}')
file_path=$(echo "$line" | awk '{print $2}')


# Logging
echo "Trait: $trait"
echo "File Path: $file_path"
EXPOSURE_FILE="${DATA}/gsmr/${trait}_${PHENOTYPE}_exposure.txt"

# Copy the current trait line to the exposure file
echo "$line" > "$EXPOSURE_FILE"

# Logging
echo "Exposure file content:"
cat "$EXPOSURE_FILE"

# Define the output file name
#output_file="${GSMR}/gsmr_${PHENOTYPE}/gsmr_result_${trait}.out"
output_file="${DATA}/gsmr/${trait}.out"
echo "output file: ${output_file}"
# Run GSMR analysis directly with the trait line
echo "Running GSMR analysis for $trait in $PHENOTYPE"
TRAIT="${trait}"
resultdir="${DATA}"
${GCTA} --mbfile "${REF}" --gsmr-file "${EXPOSURE_FILE}" "${OUTCOME_FILE}" --gsmr-direction "${DIRECTION}" --effect-plot --gsmr2-beta --out "${output_file}"
#
echo "GSMR analysis completed for $trait. Results saved to $output_file."

PHENOTYPE=$PHENOTYPE TRAIT=$trait resultdir=$resultdir Rscript ${GSMR}/GSMR.plot_effect.R

## Delete the individual exposure file
# rm "$EXPOSURE_FILE"
#echo "Deleted exposure file: $EXPOSURE_FILE"
Loading