Skip to content

Commit d1d8fb2

Browse files
committed
rpy2 migration, demonstrate the column parameter feature request
1 parent 83c513f commit d1d8fb2

File tree

4 files changed

+50
-24
lines changed

4 files changed

+50
-24
lines changed

tools/correlation/cor.py

Lines changed: 31 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@
66
"""
77

88
import sys
9-
from rpy import *
9+
import rpy2.robjects as robjects
10+
r = robjects.r
11+
1012

1113
def stop_err(msg):
1214
sys.stderr.write(msg)
@@ -17,17 +19,25 @@ def main():
1719
assert method in ( "pearson", "kendall", "spearman" )
1820

1921
try:
20-
columns = map( int, sys.argv[3].split( ',' ) )
22+
column_string = sys.argv[3]
23+
columns = list()
24+
for col in column_string.split(','):
25+
if '-' in col:
26+
s, e = col.split('-')
27+
col = list(range(int(s), int(e) + 1))
28+
columns.extend(col)
29+
else:
30+
columns.append(int(col))
2131
except:
2232
stop_err( "Problem determining columns, perhaps your query does not contain a column of numerical data." )
23-
33+
2434
matrix = []
2535
skipped_lines = 0
2636
first_invalid_line = 0
2737
invalid_value = ''
2838
invalid_column = 0
2939

30-
for i, line in enumerate( file( sys.argv[1] ) ):
40+
for i, line in enumerate( open( sys.argv[1] ) ):
3141
valid = True
3242
line = line.rstrip('\n\r')
3343

@@ -60,29 +70,32 @@ def main():
6070
first_invalid_line = i+1
6171

6272
if valid:
63-
matrix.append( row )
73+
matrix += row
6474

6575
if skipped_lines < i:
66-
try:
67-
out = open( sys.argv[2], "w" )
68-
except:
69-
stop_err( "Unable to open output file" )
70-
7176
# Run correlation
7277
try:
73-
value = r.cor( array( matrix ), use="pairwise.complete.obs", method=method )
74-
except Exception, exc:
75-
out.close()
76-
stop_err("%s" %str( exc ))
77-
for row in value:
78-
print >> out, "\t".join( map( str, row ) )
79-
out.close()
78+
fv = robjects.FloatVector(matrix)
79+
m = r['matrix'](fv, ncol=len(columns),byrow=True)
80+
rslt_mat = r.cor(m, use="pairwise.complete.obs", method=method )
81+
value = []
82+
for ri in range(1, rslt_mat.nrow + 1):
83+
row = []
84+
for ci in range(1, rslt_mat.ncol + 1):
85+
row.append(rslt_mat.rx(ri,ci)[0])
86+
value.append(row)
87+
except Exception as exc:
88+
stop_err("%s" % str( exc ))
89+
90+
with open( sys.argv[2], "w" ) as out:
91+
for row in value:
92+
out.write("%s\n" % "\t".join( map( str, row ) ))
8093

8194
if skipped_lines > 0:
8295
msg = "..Skipped %d lines starting with line #%d. " %( skipped_lines, first_invalid_line )
8396
if invalid_value and invalid_column > 0:
8497
msg += "Value '%s' in column %d is not numeric." % ( invalid_value, invalid_column )
85-
print msg
98+
print(msg)
8699

87100
if __name__ == "__main__":
88101
main()

tools/correlation/cor.xml

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,21 @@
11
<tool id="cor2" name="Correlation" version="1.0.1">
22
<description>for numeric columns</description>
33
<requirements>
4-
<requirement type="package" version="1.0.3">rpy</requirement>
4+
<requirement type="package" version="2.9.4">rpy2</requirement>
55
</requirements>
66
<command>
7-
cor.py '$input1' '$out_file1' $numeric_columns $method
7+
python '$__tool_directory__/cor.py'
8+
'$input1'
9+
'$out_file1'
10+
$numeric_columns
11+
$method
812
</command>
913
<inputs>
1014
<param format="tabular" name="input1" type="data" label="Dataset" help="Dataset missing? See TIP below"/>
11-
<param name="numeric_columns" label="Numerical columns" type="data_column" numerical="True" multiple="True" data_ref="input1" help="Multi-select list - hold the appropriate key while clicking to select multiple columns" />
15+
<param name="numeric_columns" label="Numerical columns" type="text" multiple="True"
16+
data_ref="input1" help="Multi-select list - hold the appropriate key while clicking to select multiple columns" />
17+
<!--param name="numeric_columns" label="Numerical columns" type="data_column" numerical="True" multiple="True"
18+
data_ref="input1" help="Multi-select list - hold the appropriate key while clicking to select multiple columns" /-->
1219
<param name="method" type="select" label="Method">
1320
<option value="pearson">Pearson</option>
1421
<option value="kendall">Kendall rank</option>
@@ -28,6 +35,12 @@
2835
<param name="method" value="pearson" />
2936
<output name="out_file1" file="cor_out.txt" />
3037
</test>
38+
<test>
39+
<param name="input1" value="cor.tabular" />
40+
<param name="numeric_columns" value="2-3" />
41+
<param name="method" value="pearson" />
42+
<output name="out_file1" file="cor_out.txt" />
43+
</test>
3144
</tests>
3245
<help>
3346

tools/correlation/test-data/cor.tabular

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,4 @@ Person Height Self Esteem
1818
1 65 4.1
1919
1 67 3.8
2020
1 63 3.4
21-
2 61 3.6
21+
2 61 3.6
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
1.0 0.730635686279
2-
0.730635686279 1.0
1+
1.0 0.7306356862792351
2+
0.7306356862792351 1.0

0 commit comments

Comments
 (0)