forked from weizhongli/cdhit
-
Notifications
You must be signed in to change notification settings - Fork 0
/
clstr_merge_noorder.pl
executable file
·100 lines (94 loc) · 2.11 KB
/
clstr_merge_noorder.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/usr/bin/perl
# order of clusters don't need to be the same
# but then I have to read everything into memory
my ($master_clstr, @clstr) = @ARGV;
my $clstr_file_no = $#clstr+1;
my %slave_clstr = ();
foreach $file (@clstr) {
my $rep = "";
my $no = "";
my @members = ();
open(TC, $file) || die;
while($ll=<TC>){
if ($ll =~ /^>/) {
if ($no) {
die "format error, no rep before cluster $ll" unless ($rep);
if (not defined($slave_clstr{$rep})) {
$slave_clstr{$rep}=[];
}
push(@{$slave_clstr{$rep}}, @members);
}
$rep = "";
$no = "";
@members = ();
}
else {
my $id = "";
if ($ll =~ /(aa|nt), >(.+)\.\.\./) {
$id = $2;
}
else {
die "format error at $ll\n";
}
chop($ll);
if ($ll =~ /\*$/) { $rep = $id; }
else {
push(@members, $ll); $no++;
}
}
}
if ($no) {
die "format error, no rep before cluster $ll" unless ($rep);
if (not defined($slave_clstr{$rep})) {
$slave_clstr{$rep}=[];
}
push(@{$slave_clstr{$rep}}, @members);
}
close(TC);
}
##########
my $master_rep = "";
my $master_seq = "";
my $rep_no = 0;
open(TMP, $master_clstr) || die "can not open $master_clstr";
while($ll = <TMP>) {
if ($ll =~ /^>/) {
if ($master_rep) {
print $master_seq;
if (defined( $slave_clstr{$master_rep} )) {
foreach $i (@{$slave_clstr{$master_rep}}) {
$i =~ s/^\d+/$rep_no/;
print $i, "\n";
$rep_no++;
}
}
}
$master_rep = "";
$master_seq = $ll;
$rep_no = 0;
}
else {
$master_seq .= $ll;
$rep_no++;
chop($ll);
if ($ll =~ /\*$/) {
if ($ll =~ /(aa|nt), >(.+)\.\.\./) {
$master_rep = $2;
}
else {
die "format error $ll";
}
}
}
}
if ($master_rep) {
print $master_seq;
if (defined( $slave_clstr{$master_rep} )) {
foreach $i (@{$slave_clstr{$master_rep}}) {
$i =~ s/^\d+/$rep_no/;
print $i, "\n";
$rep_no++;
}
}
}
close(TMP);