25
25
from sourmash .plugins import CommandLinePlugin
26
26
27
27
28
- ###
29
-
28
+ ### utility functions
30
29
31
30
def load_labelinfo_csv (filename ):
31
+ "Load file output by 'sourmash compare --labels-to'"
32
32
with sourmash_args .FileInputCSV (filename ) as r :
33
33
labelinfo = list (r )
34
34
@@ -37,12 +37,15 @@ def load_labelinfo_csv(filename):
37
37
38
38
39
39
def load_categories_csv (filename , labelinfo ):
40
+ "Load categories file, integrate with labelinfo => colors"
40
41
with sourmash_args .FileInputCSV (filename ) as r :
41
42
categories = list (r )
42
43
43
44
category_map = {}
44
45
colors = None
45
46
if categories :
47
+ # first, figure out which column is matching between labelinfo
48
+ # and categories file.
46
49
assert labelinfo
47
50
keys = set (categories [0 ].keys ())
48
51
keys -= {"category" }
@@ -54,19 +57,27 @@ def load_categories_csv(filename, labelinfo):
54
57
key = k
55
58
break
56
59
60
+ # found one? awesome. load in all the categories & assign colors.
61
+
57
62
if key :
58
- category_values = list (set ([row ["category" ] for row in categories ]))
59
- category_values .sort ()
63
+ # get distinct categories
64
+ category_values = set ([row ["category" ] for row in categories ])
65
+ category_values = list (sorted (category_values ))
60
66
67
+ # map to colormap colors
61
68
cat_colors = list (map (plt .cm .tab10 , range (len (category_values ))))
69
+
70
+ # build map of category => color
62
71
category_map = {}
63
72
for v , color in zip (category_values , cat_colors ):
64
73
category_map [v ] = color
65
74
75
+ # build map of key => color
66
76
category_map2 = {}
67
77
for row in categories :
68
78
category_map2 [row [key ]] = category_map [row ["category" ]]
69
79
80
+ # build list of colors
70
81
colors = []
71
82
for row in labelinfo :
72
83
value = row [key ]
@@ -82,7 +93,7 @@ def load_categories_csv(filename, labelinfo):
82
93
83
94
84
95
def load_categories_csv_for_labels (filename , queries ):
85
- "Load a categories CSV that must use label name ."
96
+ "Load a categories CSV that uses the ' label' column ."
86
97
with sourmash_args .FileInputCSV (filename ) as r :
87
98
categories = list (r )
88
99
@@ -91,20 +102,24 @@ def load_categories_csv_for_labels(filename, queries):
91
102
if categories :
92
103
key = "label"
93
104
105
+ # load distinct categories
94
106
category_values = list (set ([row ["category" ] for row in categories ]))
95
107
category_values .sort ()
96
108
109
+ # map categories to color
97
110
cat_colors = list (map (plt .cm .tab10 , range (len (category_values ))))
98
111
category_map = {}
99
112
for v , color in zip (category_values , cat_colors ):
100
113
category_map [v ] = color
101
114
115
+ # map label to color
102
116
category_map2 = {}
103
117
for row in categories :
104
118
label = row [key ]
105
119
cat = row ["category" ]
106
120
category_map2 [label ] = category_map [cat ]
107
121
122
+ # build list of colors
108
123
colors = []
109
124
for label , idx in queries :
110
125
color = category_map2 [label ]
@@ -116,10 +131,9 @@ def load_categories_csv_for_labels(filename, queries):
116
131
117
132
118
133
#
119
- # CLI plugin - supports 'sourmash scripts plot2'
134
+ # CLI plugin code
120
135
#
121
136
122
-
123
137
class Command_Plot2 (CommandLinePlugin ):
124
138
command = "plot2" # 'scripts <command>'
125
139
description = (
@@ -247,11 +261,12 @@ def plot_composite_matrix(
247
261
no_labels = not show_labels ,
248
262
get_leaves = True ,
249
263
)
250
- # ax1.set_xticks([])
251
264
265
+ # draw cut point
252
266
if cut_point is not None :
253
267
ax1 .axvline (x = cut_point , c = "red" , linestyle = "dashed" )
254
268
269
+ # draw matrix
255
270
xstart = 0.45
256
271
width = 0.45
257
272
if not show_labels :
@@ -538,7 +553,7 @@ def main(self, args):
538
553
plt .savefig (args .output_figure )
539
554
540
555
541
- # @CTB unused again...
556
+ # @CTB unused code for sparse matrix foo. Revisit!
542
557
def create_sparse_dissimilarity_matrix (tuples , num_objects ):
543
558
# Initialize matrix in LIL format for efficient setup
544
559
similarity_matrix = lil_matrix ((num_objects , num_objects ))
0 commit comments