-
Notifications
You must be signed in to change notification settings - Fork 63
/
Copy patheventClusterer.py
125 lines (100 loc) · 4.91 KB
/
eventClusterer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# -*- coding: utf-8 -*-
"""
Created on Wed May 25 04:20:00 CEST 2016
@authors: Juan C Entizne, Juan L. Trincado
@email: juancarlos.entizne01[at]estudiant.upf.edu,
juanluis.trincado[at]upf.edu
"""
import os
import logging
from lib.cluster_tools import cluster_analysis
from argparse import ArgumentParser, RawTextHelpFormatter
description = \
"Description:\n\n" + \
"This tool cluster events that change significantly in at least one pair of conditions, across multiple conditions.\n" + \
"This tool takes as input the .dpsi and .psivec files generate by SUPPA differentialAnalysis method\n" + \
"and generates a .clustvec on which the events has been tagged according to their cluster membership\n"
parser = ArgumentParser(description=description, formatter_class=RawTextHelpFormatter,
add_help=False)
parser.add_argument('-d', '--dpsi',
dest="dpsi",
nargs=1,
action="store",
help="Input file of delta-PSI values (.dpsi format)")
parser.add_argument('-p', '--psivec',
dest="psivec",
nargs=1,
action="store",
help="Input file with PSI values (.psivec format)")
parser.add_argument('-st', '--sig-threshold',
dest="sig_threshold",
action="store",
type=float,
default=0.05,
help="P-value cut-off for significant events. (Default: 0.05).")
parser.add_argument('-dt', '--dpsi-threshold',
dest="dpsi_threshold",
action="store",
type=float,
default=0.05,
help="Lower-bound for the absolute delta PSI value to cluster. (Default: 0.05).")
parser.add_argument('-e', '--eps',
dest="eps",
action="store",
type=float,
default=0.05,
help="Maximum (Euclidean) distance (between 0 and 1) to consider two events as members of "
"the same cluster. (Default: 0.05).")
parser.add_argument('-s', '--separation',
action="store",
type=float,
default=0,
help="Minimum separation for considering two points in different clusters. (Default: 0).")
parser.add_argument('-n', '--min-pts',
dest="minpts",
action="store",
type=int,
default=20,
help="Minimum number of events required per cluster. (Default: 20).")
parser.add_argument("-m", "--metric", dest="metric", choices=["euclidean", "manhattan", "cosine"],
default="euclidean", help="Distance function to be used."
"Options:\n"
"\teuclidean (Default),\n"
"\tmanhattan,\n"
"\tcosine.\n")
parser.add_argument("-c", "--clustering", choices=["OPTICS", "DBSCAN"],
default="DBSCAN", help="Clustering method to use."
"Options:\n"
"\tOPTICS ,\n"
"\tDBSCAN (Default).\n")
parser.add_argument('-g', '--groups',
dest="indexes",
action="store",
required=True,
type=str,
nargs="*",
help="Ranges of column numbers specifying the replicates per condition. "
"Column numbers have to be continuous, with no overlapping or missing columns between them. "
"Ex: 1-3,4-6")
parser.add_argument('-o', '--output',
dest="output",
action="store",
help="Name of the output file.")
def create_path(lst):
temp_lst = []
for fl in lst:
if not os.path.isabs(fl):
fl_path = os.getcwd()+"/"+fl
temp_lst.append(fl_path)
else:
temp_lst.append(fl)
return temp_lst
def main():
args = parser.parse_args()
# Check if path is absolute, if not the program use the current working path
dpsi_file = create_path(args.dpsi)
psivec_file = create_path(args.psivec)
cluster_analysis(dpsi_file[0], psivec_file[0], args.sig_threshold, args.dpsi_threshold, args.eps, args.minpts,
args.metric,args.indexes[0], args.clustering, args.separation, args.output)
if __name__ == "__main__":
main()