<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
	<id>http://Opengenome.net/index.php?action=history&amp;feed=atom&amp;title=Perl_script_for_ATM</id>
	<title>Perl script for ATM - Revision history</title>
	<link rel="self" type="application/atom+xml" href="http://Opengenome.net/index.php?action=history&amp;feed=atom&amp;title=Perl_script_for_ATM"/>
	<link rel="alternate" type="text/html" href="http://Opengenome.net/index.php?title=Perl_script_for_ATM&amp;action=history"/>
	<updated>2026-05-12T12:00:39Z</updated>
	<subtitle>Revision history for this page on the wiki</subtitle>
	<generator>MediaWiki 1.31.3</generator>
	<entry>
		<id>http://Opengenome.net/index.php?title=Perl_script_for_ATM&amp;diff=9904&amp;oldid=prev</id>
		<title>210.218.222.82 at 08:52, 12 May 2006</title>
		<link rel="alternate" type="text/html" href="http://Opengenome.net/index.php?title=Perl_script_for_ATM&amp;diff=9904&amp;oldid=prev"/>
		<updated>2006-05-12T08:52:16Z</updated>

		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;table class=&quot;diff diff-contentalign-left&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;Revision as of 08:52, 12 May 2006&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l1&quot; &gt;Line 1:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 1:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;del class=&quot;diffchange diffchange-inline&quot;&gt;&amp;amp;&lt;/del&gt;&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;#&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>210.218.222.82</name></author>
		
	</entry>
	<entry>
		<id>http://Opengenome.net/index.php?title=Perl_script_for_ATM&amp;diff=9903&amp;oldid=prev</id>
		<title>210.218.222.82 at 08:51, 12 May 2006</title>
		<link rel="alternate" type="text/html" href="http://Opengenome.net/index.php?title=Perl_script_for_ATM&amp;diff=9903&amp;oldid=prev"/>
		<updated>2006-05-12T08:51:59Z</updated>

		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;a href=&quot;http://Opengenome.net/index.php?title=Perl_script_for_ATM&amp;amp;diff=9903&amp;amp;oldid=1304&quot;&gt;Show changes&lt;/a&gt;</summary>
		<author><name>210.218.222.82</name></author>
		
	</entry>
	<entry>
		<id>http://Opengenome.net/index.php?title=Perl_script_for_ATM&amp;diff=1304&amp;oldid=prev</id>
		<title>211.211.234.134 at 00:33, 26 August 2005</title>
		<link rel="alternate" type="text/html" href="http://Opengenome.net/index.php?title=Perl_script_for_ATM&amp;diff=1304&amp;oldid=prev"/>
		<updated>2005-08-26T00:33:59Z</updated>

		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;p&gt;&lt;b&gt;New page&lt;/b&gt;&lt;/p&gt;&lt;div&gt;&amp;lt;pre&amp;gt;&lt;br /&gt;
#!/usr/bin/perl -w&lt;br /&gt;
&lt;br /&gt;
use strict;&lt;br /&gt;
&lt;br /&gt;
doMain();&lt;br /&gt;
&lt;br /&gt;
sub doMain&lt;br /&gt;
{&lt;br /&gt;
    my $baseDirPath = shift @ARGV || &amp;quot;&amp;quot;;&lt;br /&gt;
    my $pirAlignmentFile = shift @ARGV || &amp;quot;&amp;quot;;&lt;br /&gt;
    my $targetProteinCode = shift @ARGV || &amp;quot;&amp;quot;;&lt;br /&gt;
    my $pdbFiles_ref = \@ARGV || ();&lt;br /&gt;
&lt;br /&gt;
    print &amp;quot;----\n&amp;quot;;&lt;br /&gt;
    print &amp;quot;\nChecking command-line parameters ...\n&amp;quot;;&lt;br /&gt;
    printUsageAndExit() unless ( $baseDirPath &amp;amp;&amp;amp; -d $baseDirPath );&lt;br /&gt;
    $baseDirPath =~ s/\/$//; # remove the ending slash if exists&lt;br /&gt;
&lt;br /&gt;
    printUsageAndExit() unless ( $pirAlignmentFile &amp;amp;&amp;amp; -f &amp;quot;$baseDirPath/$pirAlignmentFile&amp;quot; &amp;amp;&amp;amp; $pirAlignmentFile =~ /\.pir$/ );&lt;br /&gt;
    printUsageAndExit() unless ( $targetProteinCode &amp;amp;&amp;amp; length($targetProteinCode) == 4 );&lt;br /&gt;
    printUsageAndExit() unless ( @$pdbFiles_ref &amp;gt; 0 );&lt;br /&gt;
&lt;br /&gt;
    foreach my $pdbFile ( @$pdbFiles_ref )&lt;br /&gt;
    {&lt;br /&gt;
        printUsageAndExit() unless ( -f &amp;quot;$baseDirPath/$pdbFile&amp;quot; &amp;amp;&amp;amp; $pdbFile =~ /\.pdb$/ );&lt;br /&gt;
    }&lt;br /&gt;
&lt;br /&gt;
    print &amp;quot;Making modeller input files ...\n&amp;quot;;&lt;br /&gt;
    makeModellerInput( $baseDirPath, $pirAlignmentFile, $targetProteinCode, $pdbFiles_ref ); &lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
sub printUsageAndExit&lt;br /&gt;
{&lt;br /&gt;
    print STDERR &amp;quot;Error!\n\n&amp;quot;;&lt;br /&gt;
    print STDERR &amp;quot;Usage: perl -w $0 A1_baseDirectoryPath A2_pirAlignmentFile A3_targetProteinCode A4_pdbFiles\n\n&amp;quot;; &lt;br /&gt;
    print STDERR &amp;quot;    A1_baseDirectoryPath: directory path containing all the PIR alignment file and pdb files\n\n&amp;quot;;&lt;br /&gt;
    print STDERR &amp;quot;    A2_pirAlignmentFile: PIR-format output file (with the extension, '.pir') of multiple sequence alignment including a four-letter code in each header line (e.g. cdk2HumanHomologs)\n\n&amp;quot;;&lt;br /&gt;
    print STDERR &amp;quot;    A3_targetProteinCode: the same four-letter code of your target protein as that in the alignment file (e.g. trgt or mine)\n\n&amp;quot;;&lt;br /&gt;
    print STDERR &amp;quot;    A4_pdbFiles: pdb files (with the extension, '.pdb') separated by blank of which file name consists of the same four-letter code as that in the alignment file (e.g. 1erk.pdb 1p38.pdb)\n\n&amp;quot;;&lt;br /&gt;
    print STDERR &amp;quot;----\n&amp;quot;;&lt;br /&gt;
    exit;&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
sub makeModellerInput&lt;br /&gt;
{&lt;br /&gt;
    my ( $baseDirPath, $pirAlignmentFile, $targetProteinCode, $pdbFiles_ref ) = @_;&lt;br /&gt;
    print &amp;quot;\tchanging the directory to $baseDirPath ...\n&amp;quot;;&lt;br /&gt;
    chdir $baseDirPath or die &amp;quot;Dir Change Error, $baseDirPath: $!&amp;quot;;&lt;br /&gt;
&lt;br /&gt;
    my ( $outputPrefix ) = $pirAlignmentFile =~ /^(\S+)\.\S+$/;&lt;br /&gt;
&lt;br /&gt;
    # modeller input files&lt;br /&gt;
    my $alignmentFile = $outputPrefix. &amp;quot;.ali&amp;quot;;&lt;br /&gt;
    my $topScriptFile = $outputPrefix. &amp;quot;.top&amp;quot;;&lt;br /&gt;
    my %pdbCode2AtomFile = get_pdbCode2File( $pdbFiles_ref, &amp;quot;atm&amp;quot; );&lt;br /&gt;
&lt;br /&gt;
    my %pdbCode2PdbFile = get_pdbCode2File( $pdbFiles_ref, &amp;quot;pdb&amp;quot; );&lt;br /&gt;
    my %pdbCode2PirAlignment = get_pdbCode2PirAlignment( $pirAlignmentFile, $targetProteinCode, \%pdbCode2PdbFile );&lt;br /&gt;
&lt;br /&gt;
    # parse the target protein's alignment and information first&lt;br /&gt;
    my $alignmentFileContent = getTargetProteinAlignment( $targetProteinCode, $pdbCode2PirAlignment{$targetProteinCode} );&lt;br /&gt;
&lt;br /&gt;
    # parse alignments and information of known proteins and make atom files of them&lt;br /&gt;
    foreach my $pdbCode ( keys %pdbCode2PdbFile )&lt;br /&gt;
    {&lt;br /&gt;
        my $atomFile = $pdbCode2AtomFile{ $pdbCode };&lt;br /&gt;
        my $pdbFile = $pdbCode2PdbFile{ $pdbCode };&lt;br /&gt;
        my $pirAlignment = $pdbCode2PirAlignment{ $pdbCode };&lt;br /&gt;
&lt;br /&gt;
        # make the atom files&lt;br /&gt;
        print &amp;quot;\tmaking the atom file, $atomFile ...\n&amp;quot;;&lt;br /&gt;
        makeAtomFile( $pdbFile, $atomFile );&lt;br /&gt;
&lt;br /&gt;
        $alignmentFileContent .= get_eachAlignmentContent( $pdbCode, $pirAlignment, $pdbFile );&lt;br /&gt;
    }&lt;br /&gt;
&lt;br /&gt;
    # make the alignment file&lt;br /&gt;
    print &amp;quot;\tmaking the alignment file, $alignmentFile ...\n&amp;quot;;&lt;br /&gt;
    makeAlignmentFile( $alignmentFile, $alignmentFileContent );&lt;br /&gt;
&lt;br /&gt;
    # make the top script file &lt;br /&gt;
    print &amp;quot;\tmaking the top script file, $topScriptFile ...\n&amp;quot;;&lt;br /&gt;
    makeTopScriptFile( $topScriptFile, $alignmentFile, $targetProteinCode, \%pdbCode2AtomFile );&lt;br /&gt;
&lt;br /&gt;
    print &amp;quot;Finished!\n\n&amp;quot;;&lt;br /&gt;
    print &amp;quot;----\n&amp;quot;;&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
sub get_pdbCode2File&lt;br /&gt;
{&lt;br /&gt;
    my ( $pdbFiles_ref, $fileExtension ) = @_;&lt;br /&gt;
    my %pdbCode2File = ();&lt;br /&gt;
&lt;br /&gt;
    foreach my $pdbFile ( @$pdbFiles_ref )&lt;br /&gt;
    {&lt;br /&gt;
        my ( $pdbCode ) = $pdbFile =~ /(\w{4})\.pdb$/; # extract only the four-letters&lt;br /&gt;
        my $file = $pdbCode. &amp;quot;.&amp;quot;. $fileExtension;&lt;br /&gt;
        $pdbCode2File{ $pdbCode } = $file;&lt;br /&gt;
    }&lt;br /&gt;
&lt;br /&gt;
    return %pdbCode2File;&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
sub get_pdbCode2PirAlignment&lt;br /&gt;
{&lt;br /&gt;
    my ( $pirAlignmentFile, $targetProteinCode, $pdbCode2PdbFile_ref ) = @_;&lt;br /&gt;
    my %pdbCode2PirAlignment = ();&lt;br /&gt;
&lt;br /&gt;
    my @pdbCodes = ();&lt;br /&gt;
    push @pdbCodes, $targetProteinCode;&lt;br /&gt;
    push @pdbCodes, (sort keys %$pdbCode2PdbFile_ref );&lt;br /&gt;
&lt;br /&gt;
    $/ = &amp;quot;*\n&amp;quot;; # change the reading-block separator&lt;br /&gt;
    open FI, $pirAlignmentFile or die &amp;quot;File Open Error, $pirAlignmentFile: $!\n&amp;quot;;&lt;br /&gt;
&lt;br /&gt;
    while ( my $entry = &amp;lt;FI&amp;gt; )&lt;br /&gt;
    {&lt;br /&gt;
        chomp $entry;&lt;br /&gt;
        next if $entry =~ /^\s*$/;&lt;br /&gt;
&lt;br /&gt;
        my @lines = split /\n/, $entry;&lt;br /&gt;
&lt;br /&gt;
        my $pdbCode = &amp;quot;&amp;quot;;&lt;br /&gt;
        foreach my $code ( @pdbCodes )&lt;br /&gt;
        {&lt;br /&gt;
            if ( $lines[0] =~ /$code/ )&lt;br /&gt;
            {&lt;br /&gt;
                $pdbCode = $code;&lt;br /&gt;
                last;&lt;br /&gt;
            }&lt;br /&gt;
        }&lt;br /&gt;
&lt;br /&gt;
        if ( $pdbCode eq &amp;quot;&amp;quot; )&lt;br /&gt;
        {&lt;br /&gt;
            print STDERR &amp;quot;PDB code mistmatch with pdb files on this entry!\n&amp;quot;;&lt;br /&gt;
            print STDERR &amp;quot;pdbCode: $pdbCode\n&amp;quot;;&lt;br /&gt;
            print STDERR &amp;quot;entry: $entry\n&amp;quot;;&lt;br /&gt;
        }&lt;br /&gt;
&lt;br /&gt;
        $pdbCode2PirAlignment{ $pdbCode } = &amp;quot;$entry*\n&amp;quot;;&lt;br /&gt;
        print &amp;quot;\nPDB code To PIR alignment found: $pdbCode =&amp;gt;\n$entry\n&amp;quot;;&lt;br /&gt;
    }&lt;br /&gt;
&lt;br /&gt;
    close FI;&lt;br /&gt;
    $/ = &amp;quot;\n&amp;quot;; # restore the reading-block separator&lt;br /&gt;
&lt;br /&gt;
    return %pdbCode2PirAlignment;&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
sub getTargetProteinAlignment&lt;br /&gt;
{&lt;br /&gt;
    my ( $targetProteinCode, $targetProteinAlignment ) = @_;&lt;br /&gt;
    my $targetProteinAlignmentContent = &amp;quot;&amp;quot;;&lt;br /&gt;
&lt;br /&gt;
    my @lines = split /\n/, $targetProteinAlignment;&lt;br /&gt;
    shift @lines; # remove header line&lt;br /&gt;
    shift @lines; # remove blank line&lt;br /&gt;
    my $sequenceAlignmentOnly = join &amp;quot;\n&amp;quot;, @lines;&lt;br /&gt;
&lt;br /&gt;
    $targetProteinAlignmentContent .= &amp;quot;&amp;gt;P1;$targetProteinCode\n&amp;quot;;&lt;br /&gt;
    $targetProteinAlignmentContent .= &amp;quot;sequence: : : : : : : : :\n&amp;quot;;&lt;br /&gt;
    $targetProteinAlignmentContent .= &amp;quot;$sequenceAlignmentOnly\n&amp;quot;;&lt;br /&gt;
&lt;br /&gt;
    return $targetProteinAlignmentContent;&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
sub makeAtomFile&lt;br /&gt;
{&lt;br /&gt;
    my ( $pdbFile, $atomFile ) = @_;&lt;br /&gt;
    !system(&amp;quot;cp $pdbFile $atomFile&amp;quot;) or die &amp;quot;File Copy Error: $!&amp;quot;;&lt;br /&gt;
    print &amp;quot;\tcopying $pdbFile to $atomFile ...\n&amp;quot;;&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
sub get_eachAlignmentContent&lt;br /&gt;
{&lt;br /&gt;
    my ( $pdbCode, $pirAlignment, $pdbFile ) = @_;&lt;br /&gt;
    my $eachAlignmentContent = &amp;quot;&amp;quot;;&lt;br /&gt;
&lt;br /&gt;
    my @lines = split /\n/, $pirAlignment;&lt;br /&gt;
    shift @lines; # remove header line&lt;br /&gt;
    shift @lines; # remove blank line&lt;br /&gt;
    pop @lines; # remove ending *&lt;br /&gt;
    my $sequenceAlignmentOnly = join &amp;quot;\n&amp;quot;, @lines;&lt;br /&gt;
    $sequenceAlignmentOnly =~ s/[Xx]/-/g;&lt;br /&gt;
&lt;br /&gt;
    my ( $structureInfoLine, $adjustedAlignmentSequence ) &lt;br /&gt;
        = get_structureInfoAndAdjustedSequence( $pdbCode, $sequenceAlignmentOnly, $pdbFile );&lt;br /&gt;
    $eachAlignmentContent .= &amp;quot;&amp;gt;P1;$pdbCode\n&amp;quot;;&lt;br /&gt;
    $eachAlignmentContent .= $structureInfoLine;&lt;br /&gt;
    $eachAlignmentContent .= &amp;quot;$adjustedAlignmentSequence\n*\n&amp;quot;;&lt;br /&gt;
&lt;br /&gt;
    return $eachAlignmentContent;&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
sub get_structureInfoAndAdjustedSequence&lt;br /&gt;
{&lt;br /&gt;
    my ( $pdbCode, $sequenceAlignmentOnly, $pdbFile ) = @_;&lt;br /&gt;
    my ( $structureInfoLine, $adjustedAlignmentSequence ) = ( &amp;quot;&amp;quot;, &amp;quot;&amp;quot; );&lt;br /&gt;
&lt;br /&gt;
    my ( $startResNum, $startChain, $endResNum, $endChain ) = (&amp;quot;&amp;quot;, &amp;quot;&amp;quot;, &amp;quot;&amp;quot;, &amp;quot;&amp;quot;);&lt;br /&gt;
    my ( $structureXorN, $proteinName, $source, $resolution, $rFactor ) = (&amp;quot;&amp;quot;, &amp;quot;&amp;quot;, &amp;quot;&amp;quot;, &amp;quot;&amp;quot;, &amp;quot;&amp;quot;);&lt;br /&gt;
    my $allResiduesInPdb = &amp;quot;&amp;quot;;&lt;br /&gt;
    my $includedResNum = 0;&lt;br /&gt;
    my $isChainExist = 1; # true&lt;br /&gt;
        &lt;br /&gt;
&lt;br /&gt;
    open FI, $pdbFile or die &amp;quot;File Open Error, $pdbFile: $!\n&amp;quot;;&lt;br /&gt;
    while ( my $line = &amp;lt;FI&amp;gt; )&lt;br /&gt;
    {&lt;br /&gt;
        chomp $line;&lt;br /&gt;
&lt;br /&gt;
        if ($line =~ /^COMPND.*MOLECULE:\s*(\S.*\S);\s*$/)&lt;br /&gt;
        {&lt;br /&gt;
            $proteinName = $1;&lt;br /&gt;
        }&lt;br /&gt;
        elsif ($line =~ /^COMPND.*CHAIN:\s+NULL;\s*$/)&lt;br /&gt;
        {&lt;br /&gt;
            $isChainExist = 0; # false&lt;br /&gt;
        }&lt;br /&gt;
        elsif ($line =~ /ORGANISM_SCIENTIFIC:\s*(\S.*\S);\s*$/)&lt;br /&gt;
        {&lt;br /&gt;
            $source = $1;&lt;br /&gt;
        }&lt;br /&gt;
        elsif ($line =~ /^EXPDTA\s+(\S.*\S)\s*$/)&lt;br /&gt;
        {&lt;br /&gt;
            my $method = $1;&lt;br /&gt;
            $structureXorN = &lt;br /&gt;
                ( $method =~ /X-RAY/ ) ?  &amp;quot;structureX&amp;quot; :&lt;br /&gt;
                    ( $method =~ /NMR/ ) ? &amp;quot;structureN&amp;quot; :&lt;br /&gt;
                        &amp;quot;structure&amp;quot;;&lt;br /&gt;
        }&lt;br /&gt;
        elsif ($line =~ /^REMARK.*RESOLUTION.\s+([\d\.]+)\s+ANGSTROM.*$/)&lt;br /&gt;
        {&lt;br /&gt;
            $resolution = $1;&lt;br /&gt;
        }&lt;br /&gt;
        elsif ($line =~ /^REMARK\s+\d+\s+R VALUE\s+\(WORKING SET\)\s*:\s*([\d\.\-]+)\s*$/)&lt;br /&gt;
        {&lt;br /&gt;
            $rFactor = $1;&lt;br /&gt;
        }&lt;br /&gt;
        elsif ($line =~ /^ATOM/)&lt;br /&gt;
        {&lt;br /&gt;
            my @values = split /\s+/, $line;&lt;br /&gt;
            my $threeLetterAminoAcidCode = $values[3];&lt;br /&gt;
            my $oneLetterAminoAcidCode = convertThreeToOneLetterAminoAcidCode($threeLetterAminoAcidCode);&lt;br /&gt;
&lt;br /&gt;
            if ( $startResNum eq &amp;quot;&amp;quot; &amp;amp;&amp;amp; $isChainExist )&lt;br /&gt;
            {&lt;br /&gt;
                ( $startChain, $startResNum ) = ( $values[4], $values[5] );&lt;br /&gt;
            }&lt;br /&gt;
            elsif ( $startResNum eq &amp;quot;&amp;quot; )&lt;br /&gt;
            {&lt;br /&gt;
                $startResNum = $values[4];&lt;br /&gt;
            }&lt;br /&gt;
&lt;br /&gt;
            if ( $isChainExist )&lt;br /&gt;
            {&lt;br /&gt;
                ( $endChain, $endResNum ) = ( $values[4], $values[5] );&lt;br /&gt;
            }&lt;br /&gt;
            else&lt;br /&gt;
            {&lt;br /&gt;
                $endResNum = $values[4];&lt;br /&gt;
            }&lt;br /&gt;
&lt;br /&gt;
            if ( $endResNum ne $includedResNum )&lt;br /&gt;
            {&lt;br /&gt;
                $allResiduesInPdb .= $oneLetterAminoAcidCode;&lt;br /&gt;
                $includedResNum = $endResNum;&lt;br /&gt;
            }&lt;br /&gt;
        }&lt;br /&gt;
        elsif ($line =~ /^TER/) # read only the atom list ( if multi-chain protein, only the first chain is read )&lt;br /&gt;
        {&lt;br /&gt;
            last;&lt;br /&gt;
        }&lt;br /&gt;
    }&lt;br /&gt;
    close FI;&lt;br /&gt;
&lt;br /&gt;
    $structureInfoLine = &amp;quot;$structureXorN:$pdbCode:$startResNum :$startChain :$endResNum :$endChain &amp;quot;.&lt;br /&gt;
        &amp;quot;:$proteinName :$source :$resolution :$rFactor \n&amp;quot;;&lt;br /&gt;
    $adjustedAlignmentSequence = get_adjustedAlignmentSequence( $sequenceAlignmentOnly, $allResiduesInPdb );&lt;br /&gt;
&lt;br /&gt;
    return ( $structureInfoLine, $adjustedAlignmentSequence );&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
sub convertThreeToOneLetterAminoAcidCode&lt;br /&gt;
{&lt;br /&gt;
    my ( $threeLetterAminoAcidCode ) = @_;&lt;br /&gt;
    return&lt;br /&gt;
        ( $threeLetterAminoAcidCode =~ /gly/i ) ? &amp;quot;G&amp;quot; :&lt;br /&gt;
        ( $threeLetterAminoAcidCode =~ /pro/i ) ? &amp;quot;P&amp;quot; :&lt;br /&gt;
        ( $threeLetterAminoAcidCode =~ /ala/i ) ? &amp;quot;A&amp;quot; :&lt;br /&gt;
        ( $threeLetterAminoAcidCode =~ /val/i ) ? &amp;quot;V&amp;quot; :&lt;br /&gt;
        ( $threeLetterAminoAcidCode =~ /leu/i ) ? &amp;quot;L&amp;quot; :&lt;br /&gt;
        ( $threeLetterAminoAcidCode =~ /ile/i ) ? &amp;quot;I&amp;quot; :&lt;br /&gt;
        ( $threeLetterAminoAcidCode =~ /met/i ) ? &amp;quot;M&amp;quot; :&lt;br /&gt;
        ( $threeLetterAminoAcidCode =~ /cys/i ) ? &amp;quot;C&amp;quot; :&lt;br /&gt;
        ( $threeLetterAminoAcidCode =~ /phe/i ) ? &amp;quot;F&amp;quot; :&lt;br /&gt;
        ( $threeLetterAminoAcidCode =~ /tyr/i ) ? &amp;quot;Y&amp;quot; :&lt;br /&gt;
        ( $threeLetterAminoAcidCode =~ /trp/i ) ? &amp;quot;W&amp;quot; :&lt;br /&gt;
        ( $threeLetterAminoAcidCode =~ /his/i ) ? &amp;quot;H&amp;quot; :&lt;br /&gt;
        ( $threeLetterAminoAcidCode =~ /lys/i ) ? &amp;quot;K&amp;quot; :&lt;br /&gt;
        ( $threeLetterAminoAcidCode =~ /arg/i ) ? &amp;quot;R&amp;quot; :&lt;br /&gt;
        ( $threeLetterAminoAcidCode =~ /gln/i ) ? &amp;quot;Q&amp;quot; :&lt;br /&gt;
        ( $threeLetterAminoAcidCode =~ /asn/i ) ? &amp;quot;N&amp;quot; :&lt;br /&gt;
        ( $threeLetterAminoAcidCode =~ /glu/i ) ? &amp;quot;E&amp;quot; :&lt;br /&gt;
        ( $threeLetterAminoAcidCode =~ /asp/i ) ? &amp;quot;D&amp;quot; :&lt;br /&gt;
        ( $threeLetterAminoAcidCode =~ /ser/i ) ? &amp;quot;S&amp;quot; :&lt;br /&gt;
        ( $threeLetterAminoAcidCode =~ /thr/i ) ? &amp;quot;T&amp;quot; :&lt;br /&gt;
        &amp;quot;-&amp;quot;;&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
sub get_adjustedAlignmentSequence&lt;br /&gt;
{&lt;br /&gt;
    my ( $sequenceAlignmentOnly, $allResiduesInPdb ) = @_;&lt;br /&gt;
    my $adjustedAlignmentSequence = &amp;quot;&amp;quot;;&lt;br /&gt;
&lt;br /&gt;
    my @lines = split /\n/, $sequenceAlignmentOnly;&lt;br /&gt;
    my $charactersPerLine = length $lines[0];&lt;br /&gt;
    $sequenceAlignmentOnly =~ s/\n//g; # remove new line characters&lt;br /&gt;
&lt;br /&gt;
    # substitute front and end residues without atom coordinates to '-'s &lt;br /&gt;
    my $startPos = get_startPos( $sequenceAlignmentOnly, $allResiduesInPdb ); &lt;br /&gt;
    my $endPos = get_endPos( $sequenceAlignmentOnly, $allResiduesInPdb ); &lt;br /&gt;
    $adjustedAlignmentSequence = &amp;quot;-&amp;quot; x ($startPos - 1).&lt;br /&gt;
        substr( $sequenceAlignmentOnly, $startPos - 1, $endPos - $startPos + 1 ). # substr is zero-based&lt;br /&gt;
        &amp;quot;-&amp;quot; x ( length($sequenceAlignmentOnly) - $endPos );&lt;br /&gt;
&lt;br /&gt;
    # substitute loop residues without atom coordinates to '-'s &lt;br /&gt;
    my @loopPositions = get_loopPositions( $adjustedAlignmentSequence, $allResiduesInPdb );&lt;br /&gt;
    foreach my $loopPosition ( @loopPositions )&lt;br /&gt;
    {&lt;br /&gt;
        my ( $loopStartPos, $loopEndPos ) = split /-/, $loopPosition; # e.g. loop residues 45-56&lt;br /&gt;
        $adjustedAlignmentSequence = &lt;br /&gt;
            substr( $adjustedAlignmentSequence, 0, $loopStartPos - 1 ).&lt;br /&gt;
            &amp;quot;-&amp;quot; x ( $loopEndPos - $loopStartPos + 1).&lt;br /&gt;
            substr( $adjustedAlignmentSequence, $loopEndPos, length($adjustedAlignmentSequence) - $loopEndPos );&lt;br /&gt;
    }&lt;br /&gt;
&lt;br /&gt;
    my $dashRemovedAdjustedAlignmentSequence = $adjustedAlignmentSequence;&lt;br /&gt;
    $dashRemovedAdjustedAlignmentSequence =~ s/-//g;&lt;br /&gt;
&lt;br /&gt;
    if ( $dashRemovedAdjustedAlignmentSequence ne $allResiduesInPdb )&lt;br /&gt;
    {&lt;br /&gt;
        print STDERR &amp;quot;\nError!!!\n&amp;quot;;&lt;br /&gt;
        print STDERR &amp;quot;Number of residues in the PIR-alignment file and the PDB file mismatches !\n&amp;quot;;&lt;br /&gt;
        print STDERR &amp;quot;Substitue residues in the PIR-alignment file without atom coordinates to dashes('-'s) manually!\n\n&amp;quot;;&lt;br /&gt;
        print STDERR &amp;quot;dashRemovedAdjustedAlignmentSequence: $dashRemovedAdjustedAlignmentSequence\n&amp;quot;;&lt;br /&gt;
        print STDERR &amp;quot;allResiduesInPdb: $allResiduesInPdb\n&amp;quot;;&lt;br /&gt;
    }&lt;br /&gt;
&lt;br /&gt;
    return formatFastaSequence($adjustedAlignmentSequence, $charactersPerLine);&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
sub get_startPos&lt;br /&gt;
{&lt;br /&gt;
    my ( $sequenceAlignmentOnly, $allResiduesInPdb ) = @_;&lt;br /&gt;
&lt;br /&gt;
    # Hypothesis: if ten residues match in the front end, we can determine the start position in the alignment&lt;br /&gt;
    #             matching with the residues in the PDB file   &lt;br /&gt;
    my $firstTenAminoAcidsOfAllResiduesInPdb = substr( $allResiduesInPdb, 0, 10 );&lt;br /&gt;
&lt;br /&gt;
    my $startPos = &amp;quot;&amp;quot;;&lt;br /&gt;
    my @allAminoAcidsInTheAlignmentSequence = split //, $sequenceAlignmentOnly;&lt;br /&gt;
    my $pos = 0;&lt;br /&gt;
&lt;br /&gt;
    foreach my $aa ( @allAminoAcidsInTheAlignmentSequence )&lt;br /&gt;
    {&lt;br /&gt;
        $pos++;&lt;br /&gt;
        next if $aa eq &amp;quot;-&amp;quot;;&lt;br /&gt;
&lt;br /&gt;
        my $frontRemovedSeq = substr( $sequenceAlignmentOnly, $pos - 1, length($sequenceAlignmentOnly) - $pos + 1 );&lt;br /&gt;
        $frontRemovedSeq =~ s/-//g; # remove all dashes&lt;br /&gt;
&lt;br /&gt;
        if ( $frontRemovedSeq =~ /^$firstTenAminoAcidsOfAllResiduesInPdb/i )&lt;br /&gt;
        {&lt;br /&gt;
            $startPos = $pos;&lt;br /&gt;
            print &amp;quot;\tstart position or non-loop start position determined: $pos\n&amp;quot;;&lt;br /&gt;
            last;&lt;br /&gt;
        }&lt;br /&gt;
    }&lt;br /&gt;
&lt;br /&gt;
    if ( $startPos eq &amp;quot;&amp;quot; )&lt;br /&gt;
    {&lt;br /&gt;
        print STDERR &amp;quot;\nError!\n&amp;quot;;&lt;br /&gt;
        print STDERR &amp;quot;Start position mismatches!\n&amp;quot;;&lt;br /&gt;
        print STDERR &amp;quot;Check the start amino acids of the alignment!\n\n&amp;quot;;&lt;br /&gt;
        print STDERR &amp;quot;sequenceAlignment: $sequenceAlignmentOnly\n&amp;quot;;&lt;br /&gt;
        print STDERR &amp;quot;residuesInPdb: $allResiduesInPdb\n\n&amp;quot;;&lt;br /&gt;
    }&lt;br /&gt;
&lt;br /&gt;
    return $startPos;&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
sub get_endPos&lt;br /&gt;
{&lt;br /&gt;
    my ( $sequenceAlignmentOnly, $allResiduesInPdb ) = @_;&lt;br /&gt;
&lt;br /&gt;
    # Hypothesis: if ten residues match in the rear end, we can determine the end position in the alignment&lt;br /&gt;
    #             matching with the residues in the PDB file   &lt;br /&gt;
    my $reversedLastTenAminoAcidsOfAllResiduesInPdb = &lt;br /&gt;
        reverseMyString(substr( $allResiduesInPdb, length($allResiduesInPdb) - 10, 10 ));&lt;br /&gt;
&lt;br /&gt;
    my $endPos = &amp;quot;&amp;quot;;&lt;br /&gt;
    my @allAminoAcidsInTheAlignmentSequence = split //, $sequenceAlignmentOnly;&lt;br /&gt;
    my $pos = @allAminoAcidsInTheAlignmentSequence + 1;&lt;br /&gt;
&lt;br /&gt;
    foreach my $aa ( reverse @allAminoAcidsInTheAlignmentSequence )&lt;br /&gt;
    {&lt;br /&gt;
        $pos--;&lt;br /&gt;
        next if $aa eq &amp;quot;-&amp;quot;;&lt;br /&gt;
&lt;br /&gt;
        my $endRemovedReverseSeq = reverseMyString( substr( $sequenceAlignmentOnly, 0, $pos ) );&lt;br /&gt;
        $endRemovedReverseSeq =~ s/-//g; # remove all dashes&lt;br /&gt;
&lt;br /&gt;
        if ( $endRemovedReverseSeq =~ /^$reversedLastTenAminoAcidsOfAllResiduesInPdb/i )&lt;br /&gt;
        {&lt;br /&gt;
            $endPos = $pos;&lt;br /&gt;
            print &amp;quot;\tend position determined: $pos\n&amp;quot;;&lt;br /&gt;
            last;&lt;br /&gt;
        }&lt;br /&gt;
    }&lt;br /&gt;
&lt;br /&gt;
    if ( $endPos eq &amp;quot;&amp;quot; )&lt;br /&gt;
    {&lt;br /&gt;
        print STDERR &amp;quot;\nError!\n&amp;quot;;&lt;br /&gt;
        print STDERR &amp;quot;End position mismatches between the alignment file and the pdb file!\n&amp;quot;;&lt;br /&gt;
        print STDERR &amp;quot;Check the end amino acids of the alignment!\n\n&amp;quot;;&lt;br /&gt;
        print STDERR &amp;quot;sequenceAlignmentOnly: $sequenceAlignmentOnly\n&amp;quot;;&lt;br /&gt;
        print STDERR &amp;quot;allResiduesInPdb: $allResiduesInPdb\n\n&amp;quot;;&lt;br /&gt;
    }&lt;br /&gt;
&lt;br /&gt;
    return $endPos;&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
sub reverseMyString&lt;br /&gt;
{&lt;br /&gt;
    my ( $string ) = @_;&lt;br /&gt;
    my @allChars = split //, $string;&lt;br /&gt;
&lt;br /&gt;
    return join(&amp;quot;&amp;quot;, reverse @allChars );&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
sub get_loopPositions&lt;br /&gt;
{&lt;br /&gt;
    my ( $adjustedAlignmentSequence, $allResiduesInPdb ) = @_;&lt;br /&gt;
    my @loopPositions = ();&lt;br /&gt;
&lt;br /&gt;
    my $dashRemovedAdjustedSequence = $adjustedAlignmentSequence;&lt;br /&gt;
    $dashRemovedAdjustedSequence =~ s/-//g;&lt;br /&gt;
&lt;br /&gt;
    if ( $dashRemovedAdjustedSequence eq $allResiduesInPdb )&lt;br /&gt;
    {&lt;br /&gt;
        return @loopPositions;&lt;br /&gt;
    }&lt;br /&gt;
&lt;br /&gt;
    my @allAminoAcidsInTheAlignmentSequence = split //, $adjustedAlignmentSequence;&lt;br /&gt;
    my @allAminoAcidsInThePdbFile = split //, $allResiduesInPdb;&lt;br /&gt;
    my ( $alignmentResiduePos, $pdbFileResiduePos ) = ( 0, 0 );&lt;br /&gt;
&lt;br /&gt;
    while ( $alignmentResiduePos &amp;lt;= $#allAminoAcidsInTheAlignmentSequence ) # '$#' means the zero-based last entry number&lt;br /&gt;
    {&lt;br /&gt;
        $alignmentResiduePos++;&lt;br /&gt;
        next if $allAminoAcidsInTheAlignmentSequence[$alignmentResiduePos-1] eq &amp;quot;-&amp;quot;;&lt;br /&gt;
        $pdbFileResiduePos++;&lt;br /&gt;
&lt;br /&gt;
        if ( $allAminoAcidsInTheAlignmentSequence[$alignmentResiduePos-1] ne &lt;br /&gt;
                $allAminoAcidsInThePdbFile[$pdbFileResiduePos - 1] )&lt;br /&gt;
        {&lt;br /&gt;
            my $frontRemovedAlignmentSeq = &lt;br /&gt;
                substr( $adjustedAlignmentSequence, &lt;br /&gt;
                        $alignmentResiduePos - 1, &lt;br /&gt;
                        length($adjustedAlignmentSequence) - $alignmentResiduePos + 1 );&lt;br /&gt;
            my $frontRemovedPdbResidueSeq = &lt;br /&gt;
                substr( $allResiduesInPdb,&lt;br /&gt;
                        $pdbFileResiduePos - 1,&lt;br /&gt;
                        length($allResiduesInPdb) - $pdbFileResiduePos + 1 );&lt;br /&gt;
&lt;br /&gt;
            my $loopLength = get_startPos($frontRemovedAlignmentSeq, $frontRemovedPdbResidueSeq) - 1;&lt;br /&gt;
            my ( $loopStartPos, $loopEndPos ) = ( $alignmentResiduePos, $alignmentResiduePos + $loopLength - 1);&lt;br /&gt;
&lt;br /&gt;
            if ( $loopLength ne &amp;quot;&amp;quot; &amp;amp;&amp;amp; $loopStartPos &amp;lt;= $loopEndPos )&lt;br /&gt;
            {&lt;br /&gt;
                push @loopPositions, &amp;quot;$loopStartPos-$loopEndPos&amp;quot;;&lt;br /&gt;
                print &amp;quot;\tloop pisition found: $loopStartPos-$loopEndPos, length=$loopLength\n&amp;quot;;&lt;br /&gt;
&lt;br /&gt;
                $alignmentResiduePos += $loopLength - 1;&lt;br /&gt;
                $pdbFileResiduePos--;&lt;br /&gt;
            }&lt;br /&gt;
        }&lt;br /&gt;
    }&lt;br /&gt;
&lt;br /&gt;
    return @loopPositions;&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
sub formatFastaSequence&lt;br /&gt;
{&lt;br /&gt;
    my ($seq, $charactersPerLine) = @_;&lt;br /&gt;
    $seq =~ s/(.{$charactersPerLine})/$1\n/g;&lt;br /&gt;
    return $seq;&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
sub makeAlignmentFile&lt;br /&gt;
{&lt;br /&gt;
    my ( $alignmentFile, $alignmentFileContent ) = @_;&lt;br /&gt;
&lt;br /&gt;
    open FO, &amp;quot;&amp;gt;$alignmentFile&amp;quot; or die &amp;quot;File Write Error, $alignmentFile: $!&amp;quot;;&lt;br /&gt;
    print FO $alignmentFileContent;&lt;br /&gt;
    close FO;&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
sub makeTopScriptFile&lt;br /&gt;
{&lt;br /&gt;
    my ( $topScriptFile, $alignmentFile, $targetProteinCode, $pdbCode2AtomFile_ref ) = @_;&lt;br /&gt;
&lt;br /&gt;
    open FO, &amp;quot;&amp;gt;$topScriptFile&amp;quot; or die &amp;quot;File Write Error, $topScriptFile: $!&amp;quot;;&lt;br /&gt;
    print FO printTopScriptInvariableContentFront();&lt;br /&gt;
&lt;br /&gt;
    # print variable contents of the top script file&lt;br /&gt;
    print FO &amp;quot;SET ALNFILE  = '$alignmentFile'      # alignment filename\n&amp;quot;;&lt;br /&gt;
    print FO &amp;quot;SET KNOWNS   = &amp;quot;;&lt;br /&gt;
    foreach my $pdbCode ( sort keys %$pdbCode2AtomFile_ref )&lt;br /&gt;
    {&lt;br /&gt;
        print FO &amp;quot;'$pdbCode' &amp;quot;;&lt;br /&gt;
    }&lt;br /&gt;
    print FO &amp;quot;               # codes of the templates\n&amp;quot;;&lt;br /&gt;
    print FO &amp;quot;SET SEQUENCE = '$targetProteinCode'               # code of the target\n&amp;quot;;&lt;br /&gt;
&lt;br /&gt;
    print FO printTopScriptInvariableContentEnd();&lt;br /&gt;
    close FO;&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
sub printTopScriptInvariableContentFront&lt;br /&gt;
{&lt;br /&gt;
    return &amp;quot;&amp;quot;.&lt;br /&gt;
        &amp;quot;# Homology modelling by the MODELLER TOP routine 'model'.\n&amp;quot;.&lt;br /&gt;
        &amp;quot;\n&amp;quot;.&lt;br /&gt;
        &amp;quot;INCLUDE                             # Include the predefined TOP routines\n&amp;quot;.&lt;br /&gt;
        &amp;quot;\n&amp;quot;.&lt;br /&gt;
        &amp;quot;SET OUTPUT_CONTROL = 1 1 1 1 1      # uncomment to produce a large log file\n&amp;quot;.&lt;br /&gt;
        &amp;quot;\n&amp;quot;;&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
sub printTopScriptInvariableContentEnd&lt;br /&gt;
{&lt;br /&gt;
    return &amp;quot;&amp;quot;.&lt;br /&gt;
        &amp;quot;SET ATOM_FILES_DIRECTORY = './' # directories for input atom files\n&amp;quot;.&lt;br /&gt;
        &amp;quot;SET STARTING_MODEL= 1               # index of the first model\n&amp;quot;.&lt;br /&gt;
        &amp;quot;SET ENDING_MODEL  = 1               # index of the last model\n&amp;quot;.&lt;br /&gt;
        &amp;quot;                                    # (determines how many models to calculate)\n&amp;quot;.&lt;br /&gt;
        &amp;quot;CALL ROUTINE = 'model'              # do homology modelling\n&amp;quot;.&lt;br /&gt;
        &amp;quot;\n&amp;quot;;&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;/div&gt;</summary>
		<author><name>211.211.234.134</name></author>
		
	</entry>
</feed>