@@ -449,6 +449,8 @@ def nodes_df(self):
449
449
"time" : ts .nodes_time ,
450
450
"num_mutations" : self .nodes_num_mutations ,
451
451
"ancestors_span" : child_right - child_left ,
452
+ "child_left" : child_left , # FIXME add test for this
453
+ "child_right" : child_right , # FIXME add test for this
452
454
"is_sample" : is_sample ,
453
455
}
454
456
)
@@ -458,6 +460,8 @@ def nodes_df(self):
458
460
"time" : "float64" ,
459
461
"num_mutations" : "int" ,
460
462
"ancestors_span" : "float64" ,
463
+ "child_left" : "float64" ,
464
+ "child_right" : "float64" ,
461
465
"is_sample" : "bool" ,
462
466
}
463
467
)
@@ -584,3 +588,48 @@ def calc_mutations_per_tree(self):
584
588
mutations_per_tree = np .zeros (self .ts .num_trees , dtype = np .int64 )
585
589
mutations_per_tree [unique_values ] = counts
586
590
return mutations_per_tree
591
+
592
+ def compute_ancestor_spans_heatmap_data (self , win_x_size = 1_000_000 , win_y_size = 500 ):
593
+ """
594
+ Calculates the average ancestor span in a genomic-time window
595
+ """
596
+ nodes_df = self .nodes_df [self .nodes_df .ancestors_span != - np .inf ]
597
+ nodes_df = nodes_df .reset_index (drop = True )
598
+ nodes_left = nodes_df .child_left
599
+ nodes_right = nodes_df .child_right
600
+ nodes_time = nodes_df .time
601
+ ancestors_span = nodes_df .ancestors_span
602
+
603
+ num_x_wins = int (np .ceil (nodes_right .max () - nodes_left .min ()) / win_x_size )
604
+ num_y_wins = int (np .ceil (nodes_time .max () / win_y_size ))
605
+ heatmap_sums = np .zeros ((num_x_wins , num_y_wins ))
606
+ heatmap_counts = np .zeros ((num_x_wins , num_y_wins ))
607
+
608
+ for u in range (len (nodes_left )):
609
+ x_start = int (
610
+ np .floor (nodes_left [u ] / win_x_size )
611
+ ) # map the node span to the x-axis bins it overlaps
612
+ x_end = int (np .floor (nodes_right [u ] / win_x_size ))
613
+ y = max (0 , int (np .floor (nodes_time [u ] / win_y_size )) - 1 )
614
+ heatmap_sums [x_start :x_end , y ] += min (ancestors_span [u ], win_x_size )
615
+ heatmap_counts [x_start :x_end , y ] += 1
616
+
617
+ avg_spans = heatmap_sums / heatmap_counts
618
+ indices = np .indices ((num_x_wins , num_y_wins ))
619
+ x_coords = indices [0 ] * win_x_size
620
+ y_coords = indices [1 ] * win_y_size
621
+
622
+ df = pd .DataFrame (
623
+ {
624
+ "genomic_position" : x_coords .flatten (),
625
+ "time" : y_coords .flatten (),
626
+ "average_ancestor_span" : avg_spans .flatten (),
627
+ }
628
+ )
629
+ return df .astype (
630
+ {
631
+ "genomic_position" : "int" ,
632
+ "time" : "int" ,
633
+ "average_ancestor_span" : "float64" ,
634
+ }
635
+ )
0 commit comments