#!/usr/bin/perl -w #extract out alignment, tuc, tus from the parallel cluster/cap3 results #usage: ./ClusterExtract.pl LEclusters use strict; #variable $ARGV[0] =~ s/\W$//; my $time = qx(date); #my $time = 'Mon Dec 23 21:29:06 CST 2002'; my @timeArray = split(/\s+/, $time); my $month = $timeArray[1]; my $day = $timeArray[2]; my $year = $timeArray[5]; $year = substr($year, 2); #get 01 from 2001 SWITCH: { $month = "01", last SWITCH if $month =~ /Jan/; $month = "02", last SWITCH if $month =~ /Feb/; $month = "03", last SWITCH if $month =~ /Mar/; $month = "04", last SWITCH if $month =~ /Apr/; $month = "05", last SWITCH if $month =~ /May/; $month = "06", last SWITCH if $month =~ /Jun/; $month = "07", last SWITCH if $month =~ /Jul/; $month = "08", last SWITCH if $month =~ /Aug/; $month = "09", last SWITCH if $month =~ /Sep/; $month = "10", last SWITCH if $month =~ /Oct/; $month = "11", last SWITCH if $month =~ /Nov/; $month = "12", last SWITCH if $month =~ /Dec/; } SWITCH: { $day = "01", last SWITCH if $day == 1; $day = "02", last SWITCH if $day == 2; $day = "03", last SWITCH if $day == 3; $day = "04", last SWITCH if $day == 4; $day = "05", last SWITCH if $day == 5; $day = "06", last SWITCH if $day == 6; $day = "07", last SWITCH if $day == 7; $day = "08", last SWITCH if $day == 8; $day = "09", last SWITCH if $day == 9; } #open data file to write open(ALIGNMENT, ">$ARGV[0].alignment") || die ("Can not openthealingment file"); #open(TUC, ">$ARGV[0].TUC.fasta"); #open(TUC_EST, ">$ARGV[0].TUC_EST.txt"); if(-e "$ARGV[0].TUS.fasta"){ system("rm -f $ARGV[0].TUS.fasta"); } #open cluster data to read opendir(DIR, "$ARGV[0]") || die("Unable to open directory $ARGV[0]"); my @Files = readdir(DIR); closedir(DIR); my $plantName = $ARGV[0]; #my $contigNum = 0; my $alignNum = 0; foreach my $File (@Files){ next if ($File !~ /\w+/); if($File =~ /singlets$/){ #print STDERR "output singlet file ...\n"; system("cat $ARGV[0]/$File >> $ARGV[0].TUS.fasta"); } my %TUC_EST; if($File =~ /Cap3Align$/){ #print STDERR "output alignment file ...\n"; $/ = '******* Contig'; open(ALIGN, "$ARGV[0]/$File") || die ("Can not open align file $File"); while(){ next unless /consensus/; #this ought skip the title/firstdesciption my $memberESTs = " "; $alignNum++; my $tucName = $plantName.'tuc'."$year".'-'."$month".'-'."$day".'.'.$alignNum; s/^[\s\d]*\**/>$tucName/; s/\**.*$//; my $space; while(/gi\|(\d+)\S*\s*([+-])/g) { my $gi = $1; my $sign = $2; $space = " " x (10 - length($gi)); s/gi\|$gi\S*\s*[$sign]/$gi$space$sign/g; $memberESTs = $memberESTs." ".$gi if ($memberESTs !~ /$gi/); #add by Dong } #s/consensus\s+/consensus /g; #no need for RescueMu assembly #$space = " " x 12; #s/\s{22}_/$space\_/g; #$space = " " x 16; #s/\s{26}\./$space\./g; print ALIGNMENT; # $TUC_EST{$tucName} = $memberESTs; # my @consensusSeqs = /consensus\s+(\w+)/g; #here shoud be (\S+) instead of (\w), - sign inside consensus # my $consensusSeq = join(' ', @consensusSeqs); # $consensusSeq =~ s/\s+//g; # print TUC ">$tucName\n$consensusSeq\n"; something wrong here, just extract AliGNMENT and TUS }#end of while #only need finish checing the CapOutput close(ALIGN); $/ = "\n"; } # #print memeber EST names for each TUC # foreach my $TUC (keys %TUC_EST){ # #print STDERR "output TUC_EST file ...\n"; # my @memeberEST = split(/ /, $TUC_EST{$TUC}); # foreach my $EST (@memeberEST){ # print TUC_EST "$TUC\t$EST\n" if ($EST =~ /\w+/); # } # } } close(ALIGNMENT); #close(TUC); #close(TUC_EST); #print STDERR "organizing all outputs into $ARGV[0].Result ...\n"; #system("rm -r -f $ARGV[0].Result") if (-e "$ARGV[0].Result"); ##system("mkdir $ARGV[0].Result"); #system("mv -f $ARGV[0].alignment $ARGV[1]//"); ##system("mv -f $ARGV[0].TUC.fasta $ARGV[1]/"); #system("mv -f $ARGV[0].TUS.fasta $ARGV[1]/"); ##system("mv -f $ARGV[0].TUC_EST.txt $ARGV[1]/"); #print STDERR "Done!\n";