Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

nf-core/mag and nf-core/funcscan #24

Draft
wants to merge 10 commits into
base: master
Choose a base branch
from
46 changes: 46 additions & 0 deletions Main/lib/perl/RetrieveFuncscanResultsFromComputeCluster.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package MicrobiomeWorkflow::Main::RetrieveFunscanResultsFromComputeCluster;

@ISA = (ReFlow::Controller::WorkflowStepHandle);

use strict;
use warnings;
use ReFlow::Controller::WorkflowStepHandle;
use File::Basename;

sub run {
my ($self, $test, $undo) = @_;

my $clusterDir = join("/", $self->getClusterWorkflowDataDir(), $self->getParamValue("clusterDir"));
my $targetDir = join("/", $self->getWorkflowDataDir(), $self->getParamValue("targetDir"));

if($undo){
$self->runCmd(0, "rm -f $targetDir/*");
}else {
if ($test) {
$self->runCmd(0, "echo test > $targetDir/test.txt"); ## TODO a real file name or something here
} else {

## TODO decide what all we need from funcscan and collect it up nicely here
## $self->runCmd(0, "fun stuff here");
my $from = "TODO";
my $to = "TODO";

$self->copyFromCluster("$clusterDir", $from, $to, 0);
}
}

}

sub getParamDeclaration {
return (
"clusterDir",
"targetDir",
);
}

sub getConfigDeclaration {
return (
# [name, default, description]
);
}
1;
48 changes: 48 additions & 0 deletions Main/lib/perl/RetrieveMagResultsFromComputeCluster.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package MicrobiomeWorkflow::Main::RetrieveMagResultsFromComputeCluster;

@ISA = (ReFlow::Controller::WorkflowStepHandle);

use strict;
use warnings;
use ReFlow::Controller::WorkflowStepHandle;
use File::Basename;

sub run {
my ($self, $test, $undo) = @_;

my $clusterDir = join("/", $self->getClusterWorkflowDataDir(), $self->getParamValue("clusterDir"));
my $targetDir = join("/", $self->getWorkflowDataDir(), $self->getParamValue("targetDir"));

if($undo){
$self->runCmd(0, "rm -f $targetDir/*");
}else {
if ($test) {
$self->runCmd(0, "echo test > $targetDir/test.txt"); ## TODO a real file name or something here
} else {

## TODO decide what all we need from mag and collect it up nicely here
## I think we want to return everything except the megahit assemblies ??
## were not planning to load anything for the assemblies, but funcscan will need them as input
## $self->runCmd(0, "fun stuff here");
my $from = "TODO";
my $to = "TODO";

$self->copyFromCluster("$clusterDir", $from, $to, 0);
}
}

}

sub getParamDeclaration {
return (
"clusterDir",
"targetDir",
);
}

sub getConfigDeclaration {
return (
# [name, default, description]
);
}
1;
46 changes: 46 additions & 0 deletions Main/lib/perl/WorkflowSteps/MakeNfCoreFuncscanParamsFile.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package MicrobiomeWorkflow::Main::WorkflowSteps::MakeNfCoreFuncscanParamsFile;

@ISA = (ApiCommonWorkflow::Main::WorkflowSteps::WorkflowStep);

use strict;
use warnings;
use ApiCommonWorkflow::Main::WorkflowSteps::WorkflowStep;

sub run {
my ($self, $test, $undo) = @_;

my $paramsFilePath = join("/", $self->getWorkflowDataDir(), $self->getParamValue("analysisDir"), "nf-params.json");
my $samplesheetPath = join("/", $self->getWorkflowDataDir(), $self->getParamValue("analysisDir"), "samplesheet.csv");

## TODO sort all these ref dbs, and double check which ones i need.
## TODO compare these args to what i tested on pmacs and make sure i didnt miss anything

if ($undo) {
$self->runCmd(0,"rm -rf $configPath");
} else {
open(F, ">", $paramsFilePath) or die "$! :Can't open config file '$paramsFilePath' for writing";

print F
"
{
\"input\": \"$samplesheetPath\",
\"outdir\": \"out\",
\"run_amp_screening\": true,
\"run_arg_screening\": true,
\"run_bgc_screening\": true,
\"amp_hmmsearch_models\": \"TODO\",
\"amp_ampcombi_db\": \"TODO\",
\"arg_amrfinderplus_db\": \"TODO\",
\"arg_deeparg_data\": \"TODO\",
\"bgc_antismash_databases\": \"TODO\",
\"bgc_deepbgc_database\": \"TODO\",
\"bgc_hmmsearch_models\": \"TODO\"
}
";

close(F);
}
}

1;

31 changes: 31 additions & 0 deletions Main/lib/perl/WorkflowSteps/MakeNfCoreFuncscanSamplesheet.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package MicrobiomeWorkflow::Main::WorkflowSteps::MakeNfCoreFuncscanSamplesheet;

@ISA = (ApiCommonWorkflow::Main::WorkflowSteps::WorkflowStep);

use strict;
use warnings;
use ApiCommonWorkflow::Main::WorkflowSteps::WorkflowStep;

sub run {
my ($self, $test, $undo) = @_;

my $sampleToFastqPath = join("/", $self->getWorkflowDataDir(), $self->getParamValue("analysisDir"), $self->getParamValue("sampleToFastqFileName"));
my $magAnalysisDir = join("/", $self->getWorkflowDataDir(), $self->getParamValue("magAnalysisDir"));
my $samplesheetPath = join("/", $self->getWorkflowDataDir(), $self->getParamValue("analysisDir"), "samplesheet.csv");

if ($undo) {
$self->runCmd(0,"rm -rf $configPath");
} else {
open(F, ">", $samplesheetPath) or die "$! :Can't open config file '$samplesheetPath' for writing";

# TODO write the header here, then
# loop through the sampleToFastq file and get sample names
# for each sample, write a row to the samplesheet that points to the associated mag output file
close(F);

## remove the sampleToFastq file
$self->runCmd(0,"rm -rf $sampleToFastqPath"); ## TODO make sure we want to do this, vs ignore its existence
}
}

1;
51 changes: 51 additions & 0 deletions Main/lib/perl/WorkflowSteps/MakeNfCoreMagParamsFile.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package MicrobiomeWorkflow::Main::WorkflowSteps::MakeNfCoreMagParamsFile;

@ISA = (ApiCommonWorkflow::Main::WorkflowSteps::WorkflowStep);

use strict;
use warnings;
use ApiCommonWorkflow::Main::WorkflowSteps::WorkflowStep;

sub run {
my ($self, $test, $undo) = @_;

my $paramsFilePath = join("/", $self->getWorkflowDataDir(), $self->getParamValue("analysisDir"), "nf-params.json");
my $samplesheetPath = join("/", $self->getWorkflowDataDir(), $self->getParamValue("analysisDir"), "samplesheet.csv");
my $krakenDBPath = join("/", $self->getWorkflowDataDir(), $self->getParamValue("krakenDBPath"));
my $singleEnd = $self->getParamValue("isPaired") ? "false" : "true";

## TODO compare these args to what i tested on pmacs and make sure i didnt miss anything
## TODO figure out for sure which of these ref dbs we need and update them

if ($undo) {
$self->runCmd(0,"rm -rf $configPath");
} else {
open(F, ">", $paramsFilePath) or die "$! :Can't open config file '$paramsFilePath' for writing";

print F
"
{
\"input\": \"$samplesheetPath\",
\"outdir\": \"out\",
\"kraken2_db\": \"$krakenDBPath\",
\"single_end\": $singleEnd,
\"cat_db\": \"TODO\",
\"gtdb_db\": \"TODO\",
\"gtdb_mash\": \"TODO\",
\"skip_spades\": true,
\"skip_spadeshybrid\": true,
\"run_virus_identification\": true,
\"skip_concoct\": true,
\"metaeuk_db\": \"TODO\",
\"busco_db\": \"TODO\",
\"checkm_db\": \"TODO\",
\"gunc_db\": \"TODO\"
}
";

close(F);
}
}

1;

33 changes: 33 additions & 0 deletions Main/lib/perl/WorkflowSteps/MakeNfCoreMagSamplesheet.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package MicrobiomeWorkflow::Main::WorkflowSteps::MakeNfCoreMagSamplesheet;

@ISA = (ApiCommonWorkflow::Main::WorkflowSteps::WorkflowStep);

use strict;
use warnings;
use ApiCommonWorkflow::Main::WorkflowSteps::WorkflowStep;

sub run {
my ($self, $test, $undo) = @_;

my $sampleToFastqPath = join("/", $self->getWorkflowDataDir(), $self->getParamValue("analysisDir"), $self->getParamValue("sampleToFastqFileName"));
my $samplesheetPath = join("/", $self->getWorkflowDataDir(), $self->getParamValue("analysisDir"), "samplesheet.csv");

if ($undo) {
$self->runCmd(0,"rm -rf $configPath");
} else {
open(F, ">", $samplesheetPath) or die "$! :Can't open config file '$samplesheetPath' for writing";

# TODO write the header here, then
# loop through the sampleToFastq file and reformat rows
# write out the reformatted rows to the samplesheet
# i need to go look at these two files yet to see what needs doing here

close(F);

## remove the sampleToFastq file
$self->runCmd(0,"rm -rf $sampleToFastqPath");
}
}

1;

53 changes: 53 additions & 0 deletions Main/lib/perl/WorkflowSteps/MakeNfCorePMACSConfig.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package MicrobiomeWorkflow::Main::WorkflowSteps::MakeNfCorePMACSConfig;

@ISA = (ApiCommonWorkflow::Main::WorkflowSteps::WorkflowStep);

use strict;
use warnings;
use ApiCommonWorkflow::Main::WorkflowSteps::WorkflowStep;

sub run {
my ($self, $test, $undo) = @_;

my $configPath = join("/", $self->getWorkflowDataDir(), $self->getParamValue("analysisDir"), $self->getParamValue("configFileName"));

my $memoryInGb = $self->getParamValue("memoryInGb");
my $nextflowTowerAccessToken = $self->getParamValue("nextflowTowerAccessToken");
my $nextflowTowerWorkspaceId = $self->getParamValue("nextflowTowerWorkspaceId");

# not sure we need to grab these, since this is pmacs specific :(
# id like to look into a pmacs config file here: https://nf-co.re/configs
my $executor = $self->getClusterExecutor();
my $queue = $self->getClusterQueue();

if ($undo) {
$self->runCmd(0,"rm -rf $configPath");
} else {
open(F, ">", $configPath) or die "$! :Can't open config file '$configPath' for writing";

print F
"process {
executor = '$executor'
queue = '$queue'
}
// nf-core requests memory in kb
// pmacs wants requests for memory in mb
// therefore, request for 6mb will look like 6gb to pmacs.
// having to do this trick makes this config specific to pmacs, unfortunately.
params {
max_memory = $memoryInGb.MB
}
singularity {
enabled = true
}
tower {
accessToken = '$nextflowTowerAccessToken'
workspaceId = '$nextflowTowerWorkspaceId'
enabled = true
}";
close(F);
}
}

1;

53 changes: 53 additions & 0 deletions Main/lib/xml/workflow/runNfCoreFunscanOnCluster.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
<workflowGraph name="runNfCoreFuncscanOnCluster">
<param name="memoryInGb"/>
<param name="nextflowTowerAccessToken"/>
<param name="nextflowTowerWorkspaceId"/>
<param name="sampleToFastqFileName"/>
<param name="analysisDir"/>
<param name="resultDir"/>
<param name="magAnalysisDir"/>
<param name="pipelineVersion"/>

<step name="makeNfCoreFuncscanPMACSConfig" stepClass="MicrobiomeWorkflow::Main::WorkflowSteps::MakeNfCorePMACSConfig">
<paramValue name="analysisDir">$$analysisDir$$</paramValue>
<paramValue name="clusterResultDir">$$analysisDir$$/results</paramValue>
<paramValue name="configFileName">nextflow.config</paramValue>
<paramValue name="memoryInGb">$$memoryInGb</paramValue>
<paramValue name="nextflowTowerAccessToken">$$nextflowTowerAccessToken</paramValue>
<paramValue name="nextflowTowerWorkspaceId">$$nextflowTowerWorkspaceId</paramValue>
</step>

<step name="makeNfCoreFuncscanSamplesheet" stepClass="MicrobiomeWorkflow::Main::WorkflowSteps::MakeNfCoreFuncscanSamplesheet">
<paramValue name="analysisDir">$$analysisDir$$</paramValue>
<paramValue name="sampleToFastqFileName">$$sampleToFastqFileName</paramValue>
<paramValue name="magAnalysisDir">$$magAnalysisDir</paramValue>

<!-- TODO also some ref dbs to manage here -->
<step name="makeNfCoreFuncscanParamsFile" stepClass="MicrobiomeWorkflow::Main::WorkflowSteps::MakeNfCoreFuncscanParamsFile">
<paramValue name="analysisDir">$$analysisDir$$</paramValue>

<step name="mirrorToCluster" stepClass="MicrobiomeWorkflow::Main::MirrorToComputeCluster" stepLoadTypes="toCluster">
<paramValue name="fileOrDirToMirror">$$analysisDir$$</paramValue>
<depends name="makeNfCoreFuncscanPMACSConfig"/>
<depends name="makeNfCoreFuncscanSamplesheet"/>
<depends name="makeNfCoreFuncscanParamsFile"/>
</step>

<step name="runClusterTask" stepClass="ReFlow::StepClasses::RunAndMonitorNextflow">
<paramValue name="workingDir">$$analysisDir$$</paramValue>
<paramValue name="resultsDir">$$analysisDir$$/results</paramValue>
<paramValue name="nextflowConfigFile">$$analysisDir$$/nextflow.config</paramValue>
<paramValue name="nextflowParamsFile">$$analysisDir$$/nf-params.json</paramValue>
<paramValue name="nextflowWorkflow">nf-core/funcscan</paramValue>
<paramValue name="isNfCoreWorkflow">true</paramValue>
<paramValue name="pipelineVersion">$$pipelineVersion$$</paramValue>
<depends name="mirrorToCluster"/>
</step>

<step name="retrieveNfCoreFuncscanResultsFromComputeCluster" stepClass="MicrobiomeWorkflow::Main::RetrieveNfCoreFuncscanResultsFromComputeCluster" stepLoadTypes="fromCluster">
<paramValue name="clusterDir">$$analysisDir$$/results</paramValue>
<paramValue name="targetDir">$$resultDir$$</paramValue>
<depends name="runClusterTask"/>
</step>
</workflowGraph>

Loading