#!/usr/bin/perl -w
#Now for the RescueMu GSS assmebly; there may exists cap3 singlet xxxXYtuc and xxxEtuc, need to rename those "pseudo" singlet to ZM_RM_GSStucxxxx

#usage: ./RenameGSSassembly.pl ZM_RM_GSS.TUC.fasta CapAlign.TUS.fasta > ZM_RM_GSS.TUG.fasta
#ZM_RM_GSS.TUC.fasta is the output of plantTUCMemAlign.pl
#CapAlign.TUS.fasta is the output of RescueMuClusterExtract.pl

#it also append PlasmidContigRename.TUC_GSS.txt to ZM_RM_GSS.TUC_GSS


use strict;
print STDERR "Reading TUC ...\n";
my ($contig, $contigNum);
$/ = ">";
open(FILE, "$ARGV[0]") || die ("Cannot open TUC");
while(<FILE>){
	next unless /\w/;
	my @lines = split(/\n/, $_);
	my ($ID) = $lines[0] =~ /(\S+)/;
	shift @lines;
	my $Seq = join('', @lines);
	$Seq =~ s/\W+//g;
	print ">$ID\n$Seq\n";
	($contig, $contigNum) = $ID =~ /(ZM_RM_GSStuc\S+)\.(\d+)/;
}
close(FILE);
$/ = "\n";
open(NUM, '>PlasmidContigRename.TUC_GSS.txt') || die ("Cannot open output for write");
print STDERR "Reading TUS and rename pesudoTUS ...\n";
$/='>';
open(FILE, "$ARGV[1]") || die ("Cannot open $ARGV[1]");
while(<FILE>){
        next unless /\w/;
        my @lines = split(/\n/, $_);
        my ($ID) = $lines[0] =~ /(\S+)/;
        shift @lines;
        my $Seq = join('', @lines);
        $Seq =~ s/\W+//g;
	my $len = length($Seq);
	if($ID =~ /(\S+tuc\d*)/){##haha, this is actually plasmid contig, need to rename
		$contigNum++;
		$ID = $contig.'.'.$contigNum;
		print NUM "$ID\t$1\t+\t1\t$len\n";
	}
	print ">$ID\n$Seq\n";
}
close(FILE);
$/="\n";
close(NUM);

print STDERR "append PlasmidContigRename.TUC_GSS.txt to ZM_RM_GSS.TUC_GSS\n";
system("cp ZM_RM_GSS.TUC_GSS ZM_RM_GSS.TUC_GSS.partial");
system("cat PlasmidContigRename.TUC_GSS.txt >> ZM_RM_GSS.TUC_GSS");
system("rm -f PlasmidContigRename.TUC_GSS.txt");

