Edit distance problem(By modified LCS):
If input is >>
A
AAAA
Then output is >>
------A
AAAA
where --- means insert. Delete can be handle in same way and replace also.
Complex one is>>
ATCGA
TGAC
out >>
A T C G A -
- T - G A C
Now the code is in perl and is given bellow....
***********************************************************************************
main(@ARGV);
my $co = 0;
my($x,$y);
sub main
{
my @line;
my @a;
my @b;
my $c = 0;
open(MYDATA,"in.txt");
while($line = <MYDATA>)
{
chomp($line);
if($c == 0)
{
@a = split'',$line;
}
else
{
@b = split'',$line;
}
++$c;
}
close MYDATA;
lcs(\@a,\@b);
print"$x\n";
print"$y\n";
}
sub lcs
{
my($s,$t) = @_;
my @a = @{$s};
my @b = @{$t};
my $i;
my $j;
my @mat = [(1...30),[1...30]];
my @tra = [(1...30),(1...30)];
$s = @a;
$t = @b;
my $rt;
# print"@a\n";
# print"@b\n";
if($s > $t)
{
$rt = $s;
}
else
{
$rt = $t;
}
for($i=0;$i<=$s+1;$i++)
{
for($j=0;$j<=$t+1;$j++)
{
$tra[$i][$j] = 0;
}
}
for($i=0;$i<=$rt;$i++)
{
$mat[$i][0] = 0;
$tra[$i][0] = 2;
}
for($i=0;$i<=$rt;$i++)
{
$mat[0][$i] = 0;
$tra[0][$i] = 1;
}
for($i=1;$i<=$s;$i++)
{
for($j=1;$j<=$t;$j++)
{
if($a[$i-1] eq $b[$j-1])
{
$mat[$i][$j] = $mat[$i-1][$j-1]+1;
$tra[$i][$j] = 3;
}
else
{
if($mat[$i-1][$j] >= $mat[$i][$j-1])
{
$mat[$i][$j] = $mat[$i-1][$j];
$tra[$i][$j] = 2;
}
else
{
$mat[$i][$j] = $mat[$i][$j-1];
$tra[$i][$j] = 1;
}
}
}
}
printlcs(\@a,\@b,\@tra,$s,$t);
}
sub printlcs
{
my($s,$t,$r,$i,$j) = @_;
my @a = @{$s};
my @b = @{$t};
my @tra = @{$r};
# print $tra[$i][$j]," ";
if($i == 0 && $j == 0)
{
return $x,$y;
last;
}
elsif($tra[$i][$j] == 3)
{
printlcs(\@a,\@b,\@tra,$i-1,$j-1);
$x = $x.$a[$i-1];
$y = $y.$b[$j-1];
}
elsif($tra[$i][$j] == 2)
{
printlcs(\@a,\@b,\@tra,$i-1,$j);
$x = $x.$a[$i-1];
$y = $y."-";
}
elsif($tra[$i][$j] == 1)
{
printlcs(\@a,\@b,\@tra,$i,$j-1);
$x = $x."-";
$y = $y.$b[$j-1];
}
}
If input is >>
A
AAAA
Then output is >>
------A
AAAA
where --- means insert. Delete can be handle in same way and replace also.
Complex one is>>
ATCGA
TGAC
out >>
A T C G A -
- T - G A C
Now the code is in perl and is given bellow....
***********************************************************************************
main(@ARGV);
my $co = 0;
my($x,$y);
sub main
{
my @line;
my @a;
my @b;
my $c = 0;
open(MYDATA,"in.txt");
while($line = <MYDATA>)
{
chomp($line);
if($c == 0)
{
@a = split'',$line;
}
else
{
@b = split'',$line;
}
++$c;
}
close MYDATA;
lcs(\@a,\@b);
print"$x\n";
print"$y\n";
}
sub lcs
{
my($s,$t) = @_;
my @a = @{$s};
my @b = @{$t};
my $i;
my $j;
my @mat = [(1...30),[1...30]];
my @tra = [(1...30),(1...30)];
$s = @a;
$t = @b;
my $rt;
# print"@a\n";
# print"@b\n";
if($s > $t)
{
$rt = $s;
}
else
{
$rt = $t;
}
for($i=0;$i<=$s+1;$i++)
{
for($j=0;$j<=$t+1;$j++)
{
$tra[$i][$j] = 0;
}
}
for($i=0;$i<=$rt;$i++)
{
$mat[$i][0] = 0;
$tra[$i][0] = 2;
}
for($i=0;$i<=$rt;$i++)
{
$mat[0][$i] = 0;
$tra[0][$i] = 1;
}
for($i=1;$i<=$s;$i++)
{
for($j=1;$j<=$t;$j++)
{
if($a[$i-1] eq $b[$j-1])
{
$mat[$i][$j] = $mat[$i-1][$j-1]+1;
$tra[$i][$j] = 3;
}
else
{
if($mat[$i-1][$j] >= $mat[$i][$j-1])
{
$mat[$i][$j] = $mat[$i-1][$j];
$tra[$i][$j] = 2;
}
else
{
$mat[$i][$j] = $mat[$i][$j-1];
$tra[$i][$j] = 1;
}
}
}
}
printlcs(\@a,\@b,\@tra,$s,$t);
}
sub printlcs
{
my($s,$t,$r,$i,$j) = @_;
my @a = @{$s};
my @b = @{$t};
my @tra = @{$r};
# print $tra[$i][$j]," ";
if($i == 0 && $j == 0)
{
return $x,$y;
last;
}
elsif($tra[$i][$j] == 3)
{
printlcs(\@a,\@b,\@tra,$i-1,$j-1);
$x = $x.$a[$i-1];
$y = $y.$b[$j-1];
}
elsif($tra[$i][$j] == 2)
{
printlcs(\@a,\@b,\@tra,$i-1,$j);
$x = $x.$a[$i-1];
$y = $y."-";
}
elsif($tra[$i][$j] == 1)
{
printlcs(\@a,\@b,\@tra,$i,$j-1);
$x = $x."-";
$y = $y.$b[$j-1];
}
}
No comments:
Post a Comment