#!/usr/bin/perl -w
#extract out alignment, tuc, tus from the parallel cluster/cap3 results
#usage: ./ClusterExtract.pl LEclusters
use strict;
#variable

$ARGV[0] =~ s/\W$//;
my $time = qx(date);
#my $time = 'Mon Dec 23 21:29:06 CST 2002';
my @timeArray = split(/\s+/, $time);
my $month = $timeArray[1];
my $day = $timeArray[2];
my $year = $timeArray[5];
$year = substr($year, 2); #get 01 from 2001
SWITCH: {
            $month = "01", last SWITCH if $month =~ /Jan/;
            $month = "02", last SWITCH if $month =~ /Feb/;
            $month = "03", last SWITCH if $month =~ /Mar/;
            $month = "04", last SWITCH if $month =~ /Apr/;
            $month = "05", last SWITCH if $month =~ /May/;
            $month = "06", last SWITCH if $month =~ /Jun/;
            $month = "07", last SWITCH if $month =~ /Jul/;
            $month = "08", last SWITCH if $month =~ /Aug/;
            $month = "09", last SWITCH if $month =~ /Sep/;
            $month = "10", last SWITCH if $month =~ /Oct/;
            $month = "11", last SWITCH if $month =~ /Nov/;
            $month = "12", last SWITCH if $month =~ /Dec/;
        }
SWITCH: {
            $day = "01", last SWITCH if $day == 1;
            $day = "02", last SWITCH if $day == 2;
            $day = "03", last SWITCH if $day == 3;
            $day = "04", last SWITCH if $day == 4;
            $day = "05", last SWITCH if $day == 5;
            $day = "06", last SWITCH if $day == 6;
            $day = "07", last SWITCH if $day == 7;
            $day = "08", last SWITCH if $day == 8;
            $day = "09", last SWITCH if $day == 9;
        }


#open data file to write
open(ALIGNMENT, ">$ARGV[0].alignment") || die ("Can not openthealingment file");
#open(TUC, ">$ARGV[0].TUC.fasta");
#open(TUC_EST, ">$ARGV[0].TUC_EST.txt");

if(-e "$ARGV[0].TUS.fasta"){
	system("rm -f $ARGV[0].TUS.fasta");
}

#open cluster data to read
opendir(DIR, "$ARGV[0]") || die("Unable to open directory $ARGV[0]");
my @Files = readdir(DIR);
closedir(DIR);

my $plantName = $ARGV[0];
#my $contigNum = 0;
my $alignNum = 0;
foreach my $File (@Files){
        next if ($File !~ /\w+/);
	
	if($File =~ /singlets$/){
		#print STDERR "output singlet file ...\n";
		system("cat $ARGV[0]/$File >> $ARGV[0].TUS.fasta");
        }

	my %TUC_EST;
	if($File =~ /Cap3Align$/){
		#print STDERR "output alignment file ...\n";
		$/ = '******* Contig';
		open(ALIGN, "$ARGV[0]/$File") || die ("Can not open align file $File");
		while(<ALIGN>){
        		next unless /consensus/;   #this ought skip the title/firstdesciption
		        my $memberESTs = " ";
			$alignNum++;
			my $tucName = $plantName.'tuc'."$year".'-'."$month".'-'."$day".'.'.$alignNum;
             		s/^[\s\d]*\**/>$tucName/;
            		s/\**.*$//;
            		my $space;
            		while(/gi\|(\d+)\S*\s*([+-])/g) {
                		my $gi = $1;
                		my $sign = $2;
                		$space = " " x (10 - length($gi));
                		s/gi\|$gi\S*\s*[$sign]/$gi$space$sign/g;
		                $memberESTs = $memberESTs." ".$gi if ($memberESTs !~ /$gi/); #add by Dong
            		}
            		#s/consensus\s+/consensus   /g; #no need for RescueMu assembly
             		#$space = " " x 12;
            		#s/\s{22}_/$space\_/g;
             		#$space = " " x 16;
            		#s/\s{26}\./$space\./g;
            		print ALIGNMENT;
# 			$TUC_EST{$tucName} = $memberESTs;
#			my @consensusSeqs = /consensus\s+(\w+)/g; #here shoud be (\S+) instead of (\w), - sign inside consensus
#                        my $consensusSeq = join(' ', @consensusSeqs);
#                        $consensusSeq =~ s/\s+//g;
#			print TUC ">$tucName\n$consensusSeq\n"; something wrong here, just extract AliGNMENT and TUS

    		}#end of while #only need finish checing the CapOutput
		close(ALIGN);
		$/ = "\n";
	}

#	#print memeber EST names for each TUC
#        foreach my $TUC (keys %TUC_EST){
#		#print STDERR "output TUC_EST file ...\n";
#                my @memeberEST = split(/ /, $TUC_EST{$TUC});
#		foreach my $EST (@memeberEST){
#			print TUC_EST "$TUC\t$EST\n" if ($EST =~ /\w+/);
#		}
#	}
}

close(ALIGNMENT);
#close(TUC);
#close(TUC_EST);

#print STDERR "organizing all outputs into $ARGV[0].Result ...\n";
#system("rm -r -f $ARGV[0].Result") if (-e "$ARGV[0].Result");
##system("mkdir $ARGV[0].Result");
#system("mv -f $ARGV[0].alignment $ARGV[1]//");
##system("mv -f $ARGV[0].TUC.fasta $ARGV[1]/");
#system("mv -f $ARGV[0].TUS.fasta $ARGV[1]/");
##system("mv -f $ARGV[0].TUC_EST.txt $ARGV[1]/");


#print STDERR "Done!\n";

