From be9114a2a2d325e9b45cc6f14b0918c1b0002b2a Mon Sep 17 00:00:00 2001 From: Toni Hermoso Pulido Date: Thu, 10 Oct 2024 12:46:52 +0200 Subject: [PATCH] Adding nextflow_schema.json and carrying on --- main.nf | 102 +++++++++++++++++++--------------- nextflow_schema.json | 128 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 186 insertions(+), 44 deletions(-) create mode 100644 nextflow_schema.json diff --git a/main.nf b/main.nf index 3b77bb6..5a35471 100644 --- a/main.nf +++ b/main.nf @@ -2,7 +2,7 @@ /* - * Copyright (c) 2019-2020, Centre for Genomic Regulation (CRG) + * Copyright (c) 2019-2024, Centre for Genomic Regulation (CRG) * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this @@ -17,10 +17,13 @@ ExOrthist pipeline for Bioinformatics Core @ CRG @authors Luca Cozzuto Federica Mantica + Toni Hermoso Pulido =========================================================== */ -version = '0.1' +nextflow.enable.dsl=2 + +version = '2.0.0' /* * Input parameters: @@ -30,51 +33,62 @@ params.help = false params.resume = false -log.info """ - -╔╦╗┬ ┬┌─┐ ╔═╗─┐ ┬╔═╗┬─┐┌┬┐┬ ┬┬┌─┐┌┬┐ - ║ ├─┤├┤ ║╣ ┌┴┬┘║ ║├┬┘ │ ├─┤│└─┐ │ - ╩ ┴ ┴└─┘ ╚═╝┴ └─╚═╝┴└─ ┴ ┴ ┴┴└─┘ ┴ - -============================================================================== -annotations (GTF files) : ${params.annotations} -genomes (fasta files) : ${params.genomes} -cluster file (txt files) : ${params.cluster} -pairwise evo distances : ${params.evodists} -long distance parameters : ${params.long_dist} -medium distance parameters : ${params.medium_dist} -short distance parameters : ${params.short_dist} -pre-computed alignments : ${params.prevaln} -alignment number : ${params.alignmentnum} -orthogroup number : ${params.orthogroupnum} -extraexons (e.g. from VastDB) : ${params.extraexons} -bona fide orthologous exon pairs : ${params.bonafide_pairs} -orthopairs : ${params.orthopairs} -output (output folder) : ${params.output} -email for notification : ${params.email} - -INFORMATION ABOUT OPTIONS: -The long, medium, short distance cut-offs are in the format: "int_num;ex_seq;ex_len;prot_sim". -Only exon matches respecting all cut-offs are considered homologous. -- int_num (0,1,2): Number of surrounding intron positions required to be conserved. -- ex_seq (from 0 to 1): Minimum sequence similarity % between a - pair of homologous exons and their corresponding upstream and - downstream exons. -- ex_len (from 0 to 1): Maximum size difference between two homologous exons - (as a fraction of either exon). -- prot_sim (from 0 to 1): Minimum sequence similarity over the entire pairwise alignment - for a pair of protein isoforms to be considered for comparison. - -See online README at https://github.com/biocorecrg/ExOrthist for further information about the options. -""" - -if (params.help) { - log.info """ExOrthist v2.0.0""" - log.info """ExOrthist is a Nextflow-based pipeline to obtain groups of exon orthologous at all evolutionary timescales.\n""" - exit 1 +// log.info """ +// +// ╔╦╗┬ ┬┌─┐ ╔═╗─┐ ┬╔═╗┬─┐┌┬┐┬ ┬┬┌─┐┌┬┐ +// ║ ├─┤├┤ ║╣ ┌┴┬┘║ ║├┬┘ │ ├─┤│└─┐ │ +// ╩ ┴ ┴└─┘ ╚═╝┴ └─╚═╝┴└─ ┴ ┴ ┴┴└─┘ ┴ +// +// ============================================================================== +// annotations (GTF files) : ${params.annotations} +// genomes (fasta files) : ${params.genomes} +// cluster file (txt files) : ${params.cluster} +// pairwise evo distances : ${params.evodists} +// long distance parameters : ${params.long_dist} +// medium distance parameters : ${params.medium_dist} +// short distance parameters : ${params.short_dist} +// pre-computed alignments : ${params.prevaln} +// alignment number : ${params.alignmentnum} +// orthogroup number : ${params.orthogroupnum} +// extraexons (e.g. from VastDB) : ${params.extraexons} +// bona fide orthologous exon pairs : ${params.bonafide_pairs} +// orthopairs : ${params.orthopairs} +// output (output folder) : ${params.output} +// email for notification : ${params.email} +// +// INFORMATION ABOUT OPTIONS: +// The long, medium, short distance cut-offs are in the format: "int_num;ex_seq;ex_len;prot_sim". +// Only exon matches respecting all cut-offs are considered homologous. +// - int_num (0,1,2): Number of surrounding intron positions required to be conserved. +// - ex_seq (from 0 to 1): Minimum sequence similarity % between a +// pair of homologous exons and their corresponding upstream and +// downstream exons. +// - ex_len (from 0 to 1): Maximum size difference between two homologous exons +// (as a fraction of either exon). +// - prot_sim (from 0 to 1): Minimum sequence similarity over the entire pairwise alignment +// for a pair of protein isoforms to be considered for comparison. +// +// See online README at https://github.com/biocorecrg/ExOrthist for further information about the options. +// """ +// +// if (params.help) { +// log.info """ExOrthist v2.0.0""" +// log.info """ExOrthist is a Nextflow-based pipeline to obtain groups of exon orthologous at all evolutionary timescales.\n""" +// exit 1 +// } + +// Load the schema +params = new nextflow.script.ScriptBinding().getParams() +def schema = new File("$projectDir/nextflow_schema.json") +if (schema.exists()) { + params = new groovy.json.JsonSlurper().parse(schema).properties.params } + + if (params.resume) exit 1, "Are you making the classical --resume typo? Be careful!!!! ;)" + + if( !workflow.resume ) { println "Removing the output folder" new File("${params.output}").delete() diff --git a/nextflow_schema.json b/nextflow_schema.json new file mode 100644 index 0000000..c44b602 --- /dev/null +++ b/nextflow_schema.json @@ -0,0 +1,128 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/biocorecrg/exorthist/master/nextflow_schema.json", + "title": "ExOrthist pipeline parameters", + "description": "Schema for the parameters of your ExOrthist pipeline", + "type": "object", + "definitions": { + "input_output_options": { + "title": "Input/Output Options", + "type": "object", + "required": [ + "annotations", + "genomes", + "cluster", + "output" + ], + "properties": { + "annotations": { + "type": "string", + "description": "Path to GTF annotation files", + "format": "file-path" + }, + "genomes": { + "type": "string", + "description": "Path to fasta genome files", + "format": "file-path" + }, + "cluster": { + "type": "string", + "description": "Path to cluster file (txt files)", + "format": "file-path" + }, + "output": { + "type": "string", + "description": "Output folder path", + "format": "directory-path" + } + } + }, + "analysis_parameters": { + "title": "Analysis Parameters", + "type": "object", + "required": [ + "evodists", + "long_dist", + "medium_dist", + "short_dist" + ], + "properties": { + "evodists": { + "type": "string", + "description": "Path to a file with pairwise evolutionary distances", + "format": "file-path" + }, + "long_dist": { + "type": "string", + "description": "Long distance parameters" + }, + "medium_dist": { + "type": "string", + "description": "Medium distance parameters" + }, + "short_dist": { + "type": "string", + "description": "Short distance parameters" + }, + "prevaln": { + "type": "string", + "description": "Pre-computed alignments" + }, + "alignmentnum": { + "type": "integer", + "description": "Alignment number" + }, + "orthogroupnum": { + "type": "integer", + "description": "Orthogroup number" + } + } + }, + "additional_data": { + "title": "Additional Data", + "type": "object", + "properties": { + "extraexons": { + "type": "string", + "description": "Extra exons (e.g. from VastDB)", + "format": "file-path" + }, + "bonafide_pairs": { + "type": "string", + "description": "Bona fide orthologous exon pairs", + "format": "file-path" + }, + "orthopairs": { + "type": "string", + "description": "Orthopairs", + "format": "file-path" + } + } + }, + "notification_options": { + "title": "Notification Options", + "type": "object", + "properties": { + "email": { + "type": "string", + "description": "Email for notification", + "format": "email" + } + } + } + }, + "allOf": [ + { + "$ref": "#/definitions/input_output_options" + }, + { + "$ref": "#/definitions/analysis_parameters" + }, + { + "$ref": "#/definitions/additional_data" + }, + { + "$ref": "#/definitions/notification_options" + } + ] +}