#!/usr/bin/perl -w
#./TrimSeqInfo.pl /PlantGDB/qfdong/GSSassembly/10-26-03assembly/RM/VectorClean/RescueMuGSS TIGRrepeatTrim_RescueMuGSS > Trim.info
#usage: ./ReAssembleTrimContig.pl Trim.info GSScontig_GSS.txt RescueMuGSS RescueMuPlasmidAssembly.fsa


system("rm -r -f trimTUC") if (-e "trimTUC");
system("mkdir trimTUC");
my $cap3 = './cap3Dong';

my %TrimSeq;
open(FILE, "$ARGV[0]") || die("Cannot open");
while(<FILE>){
    my ($gi) = /gi\|(\d+)/;
    $TrimSeq{$gi} = 1;
   # print STDERR "$gi\n";
}
close(FILE);

my %TrimTUG;
my %TUG_GSS;
open(FILE, "$ARGV[1]") || die("Cannot open");
while(<FILE>){
    my ($TUG, $GSS) = /^(\S+)\s+(\S+)/;
    #print STDERR "$GSS\n";
    $TrimTUG{$TUG} = 1 if(exists $TrimSeq{$GSS});
    if(!exists $TUG_GSS{$TUG}){
	$TUG_GSS{$TUG} = $GSS;
    }else{
	$TUG_GSS{$TUG} .= ":$GSS";
    }
}
close(FILE);

my %SEQ; #original unmask seq
$/ = '>';
open(FILE, "$ARGV[2]") || die("Cannot open");
while(<FILE>){
        next unless /\w/;
        my @lines = split(/\n/, $_);
        my $title = shift(@lines);
        my ($ID) = $title =~ /gi\|(\d+)/;
	#print STDERR "$ID\n";
        my $Seq = join('', @lines);
        $Seq =~ s/\W+//g;
        $SEQ{$ID} = $Seq;
}
close(FILE);
$/ = "\n";

$/ = '>';
open(FILE, "$ARGV[3]") || die("Cannot open");
while(<FILE>){
        next unless /\w/;
        my @lines = split(/\n/, $_);
        my $title = shift(@lines);
        my ($ID) = $title =~ /(\S+)/;
#print STDERR "$ID\n";
        my $Seq = join('', @lines);
        $Seq =~ s/\W+//g;
        $SEQ{$ID} = $Seq;
}
close(FILE);
$/ = "\n";


foreach my $TUG (keys %TrimTUG){
    my @memberGSS = split(/:/, $TUG_GSS{$TUG});
    print STDERR "reassemble $TUG\n";
    open(TEMP, ">$TUG.fsa") || die("Cannot open TEMP");
    foreach my $GSS (@memberGSS){
	if(!exists $SEQ{$GSS}){
	    #print STDERR "GSS = $GSS\n";
	    next;
	}
	my $Seq = $SEQ{$GSS};
	print TEMP ">$GSS\n$Seq\n";
    }
    close(TEMP);
    system("$cap3 $TUG.fsa -p 90 -s 700 > $TUG.Cap3Align");
    system("mv $TUG* trimTUC");

}

