#!/usr/bin/perl -w
#uage: plantTUCMemAlign.pl AS.alignment 
#output are a set of files, each file is one individual TUC and its cap alignment 

use strict;
if (!$ARGV[0]) {
    print "This program extract the alignment data from the alignment of a TUC with its member ESTs. The results are numbers representing the start and end positions of each EST compared to the consensus sequence.\n\nUsage: TUCMemAlign AlignmentFile\n";
    exit(0);
}

my @TUC;
$/ = ">";
open (FILE, $ARGV[0]) || die ("Can not open the file $ARGV[0].\n");
while (my $TUC = <FILE>) {
    push(@TUC, $TUC);
}
close (FILE);

my ($plant) = $ARGV[0] =~ /(\w+)\.alignment/;
open(TUC, ">$plant.TUC.fasta"); 
foreach my $TUC (@TUC) {
	next unless ($TUC =~ /\w+/);
	$TUC =~ s/>$//; #get rid of the tailing >
	$TUC =~ s/(\w+tuc\S+)/>$1/; #add > to the name
	my $TUCname = $1;
	open(OUT, ">$1.txt") || die ("Cannot open $1 for output");
		print OUT $TUC;
	close(OUT);
	#print STDERR "Extract TUC.fasta from $1.txt\n";
	my $TUCseq = qx(grep consensus $1.txt);
	$TUCseq =~ s/consensus//g;
	$TUCseq =~ s/\W+//g; #include space and -
	print TUC ">$TUCname\n$TUCseq\n";
}
close(TUC);	
    
    


