Changes

From Opengenome.net

Perl script for ATM

4,305 bytes added, 17:51, 12 May 2006
no edit summary
<&lt;pre>#!/usr/bin/perl -w <br /><br />use strict; <br /><br />doMain(); <br /><br />sub doMain<br />{ <br /> my $baseDirPath = shift @ARGV || ""&quot;&quot; ;<br /> my $pirAlignmentFile = shift @ARGV || ""&quot;&quot;; <br /> my $targetProteinCode = shift @ARGV || ""&quot; my &quot;;<br /> my $pdbFiles_ref = \@ARGV || ();  <br /><br /> print "&quot;----\n"&quot;; <br /> print "&quot;\nChecking command-line parameters ...\n"&quot; ;<br /> printUsageAndExit() unless ( $baseDirPath &amp;& amp; -d $baseDirPath ); <br /> $baseDirPath =~ s/\/$//; # remove the ending slash if exists  <br /><br /> printUsageAndExit() unless ( $pirAlignmentFile &amp;& amp; -f "&quot;$baseDirPath/$pirAlignmentFile" &quot; &amp;& amp; $pirAlignmentFile =~ /\.pir$/ ); <br /> printUsageAndExit() unless ( $targetProteinCode &amp;& amp; length($targetProteinCode) == 4 ); <br /> printUsageAndExit() unless ( @$pdbFiles_ref > &gt; 0 );  <br /><br /> foreach my $pdbFile ( @$pdbFiles_ref ) <br /> { <br /> printUsageAndExit() unless ( -f "&quot;$baseDirPath/$pdbFile" &quot; & amp;&amp; $pdbFile =~ /\.pdb$/ ); <br /> print "Making modeller <br /><br /> print &quot;Making modeller input files ...\n"&quot; ;<br /> makeModellerInput( $baseDirPath, $pirAlignmentFile, $targetProteinCode, $pdbFiles_ref ); <br /><br /><br />sub printUsageAndExit<br />{ <br /> print STDERR "&quot;Error!\n\n"&quot; ;<br /> print STDERR "&quot;Usage: perl -w $0 A1_baseDirectoryPath A2_pirAlignmentFile A3_targetProteinCode A4_pdbFiles\n\n"&quot; ; <br /> print STDERR " &quot; A1_baseDirectoryPath: directory path containing all the PIR alignment file and pdb files\n\n"&quot;; <br /> print STDERR " &quot; A2_pirAlignmentFile: PIR-format output file (with the extension, '.pir') of multiple sequence alignment including a four-letter code in each header line (e.g. cdk2HumanHomologs)\n\n"&quot;; <br /> print STDERR " &quot; A3_targetProteinCode: the same four-letter code of your target protein as that in the alignment file (e.g. trgt or mine)\n\n"&quot; ;<br /> print STDERR " &quot; A4_pdbFiles: pdb files (with the extension, '.pdb') separated by blank of which file name consists of the same four-letter code as that in the alignment file (e.g. 1erk.pdb 1p38.pdb)\n\n"&quot;; <br /> print STDERR "&quot;----\n"&quot; ;<br /> exit;<br />sub makeModellerInput<br /><br />sub makeModellerInput<br />{ <br /> my ( $baseDirPath, $pirAlignmentFile, $targetProteinCode, $pdbFiles_ref ) = @_; <br /> print "&quot;\tchanging the directory to $baseDirPath ...\n"&quot;; <br /> chdir $baseDirPath or die "&quot;Dir Change Error, $baseDirPath: $!"&quot ;<br /><br /> my ( $outputPrefix ) = $pirAlignmentFile =~ /^(\S+)\.\S+$/;  <br /><br /> # modeller input files <br /> my $alignmentFile = $outputPrefix. "&quot;.ali"&quot;; <br /> my $topScriptFile = $outputPrefix. "&quot;.top"&quot;; <br /> my %pdbCode2AtomFile = get_pdbCode2File( $pdbFiles_ref, "&quot;atm" &quot; );  <br /><br /> my %pdbCode2PdbFile = get_pdbCode2File( $pdbFiles_ref, "&quot;pdb" &quot; ); <br /> my %pdbCode2PirAlignment = get_pdbCode2PirAlignment( $pirAlignmentFile, $targetProteinCode, \%pdbCode2PdbFile );  <br /><br /> # parse the target protein's alignment and information first <br /> my $alignmentFileContent = getTargetProteinAlignment( $targetProteinCode, $pdbCode2PirAlignment{$targetProteinCode} );  <br /><br /> # parse alignments and information of known proteins and make atom files of them <br /> foreach my $pdbCode ( keys %pdbCode2PdbFile ) <br /> { <br /> my $atomFile = $pdbCode2AtomFile{ $pdbCode }; <br /> my $pdbFile = $pdbCode2PdbFile{ $pdbCode }; <br /> my $pirAlignment = $pdbCode2PirAlignment{ $pdbCode };  <br /><br /> # make the atom files <br /> print "&quot;\tmaking the atom file, $atomFile ...\n"&quot; ;<br /> makeAtomFile( $pdbFile, $atomFile );  <br /><br /> $alignmentFileContent .= get_eachAlignmentContent( $pdbCode, $pirAlignment, $pdbFile ); }  <br /> }<br /><br /> # make the alignment file <br /> print "&quot;\tmaking the alignment file, $alignmentFile ...\n"&quot;; <br /> makeAlignmentFile( $alignmentFile, $alignmentFileContent );  <br /><br /> # make the top script file <br /> print "&quot;\tmaking the top script file, $topScriptFile ...\n"&quot; ;<br /> makeTopScriptFile( $topScriptFile, $alignmentFile, $targetProteinCode, \%pdbCode2AtomFile );  print "Finished<br /><br /> print &quot;Finished!\n\n"&quot;; <br /> print "&quot;----\n"&quot;;<br /><br /><br />sub get_pdbCode2File<br />{ <br /> my ( $pdbFiles_ref, $fileExtension ) = @_; <br /> my %pdbCode2File = ();  <br /><br /> foreach my $pdbFile ( @$pdbFiles_ref ) { <br /> {<br /> my ( $pdbCode ) = $pdbFile =~ /(\w{4})\.pdb$/; # extract only the four-letters <br /> my $file = $pdbCode. "&quot;."&quot;. $fileExtension; <br /> $pdbCode2File{ $pdbCode } = $file; <br /> <br /><br /> return %pdbCode2File;<br /><br /><br />sub get_pdbCode2PirAlignment<br />{ <br /> my ( $pirAlignmentFile, $targetProteinCode, $pdbCode2PdbFile_ref ) = @_; my <br /> my %pdbCode2PirAlignment = ();  <br /><br /> my @pdbCodes = (); <br /> push @pdbCodes, $targetProteinCode; <br /> push @pdbCodes, (sort keys %$pdbCode2PdbFile_ref );  <br /><br /> $/ = "&quot;*\n"&quot;; # change the reading-block separator <br /> open FI, $pirAlignmentFile or die "&quot;File Open Error, $pirAlignmentFile: $!\n"&quot ;<br /><br /> while ( my $entry = <FIfi> ) { <br /> {<br /> chomp $entry; <br /> next if $entry =~ /^\s*$/;  <br /><br /> my @lines = split /\n/, $entry;  <br /><br /> my $pdbCode = ""&quot; &quot;;<br /> foreach my $code ( @pdbCodes ) <br /> { <br /> if ( $lines[0] =~ /$code/ ) <br /> { <br /> $pdbCode = $code; <br /> last; <br /> } <br /> <br /><br /> if ( $pdbCode eq "" &quot;&quot; ) <br /> { <br /> print STDERR "&quot;PDB code mistmatch with pdb files on this entry!\n"&quot;; <br /> print STDERR "&quot;pdbCode: $pdbCode\n"&quot; ;<br /> print STDERR "&quot;entry: $entry\n"&quot; ;<br /> <br /><br /> $pdbCode2PirAlignment{ $pdbCode } = "&quot;$entry*\n"&quot;; <br /> print "&quot;\nPDB code To PIR alignment found: $pdbCode =>&gt;\n$entry\n"&quot;; <br /> <br /><br /> close FI; <br /> $/ = "&quot;\n"&quot;; # restore the reading-block separator  <br /><br /> return %pdbCode2PirAlignment;} <br />}<br /><br />sub getTargetProteinAlignment<br />{ <br /> my ( $targetProteinCode, $targetProteinAlignment ) = @_; <br /> my $targetProteinAlignmentContent = ""&quot;&quot ;<br /><br /> my @lines = split /\n/, $targetProteinAlignment; <br /> shift @lines; # remove header line <br /> shift @lines; # remove blank line <br /> my $sequenceAlignmentOnly = join "&quot;\n"&quot;, @lines;  <br /><br /> $targetProteinAlignmentContent .= ">&quot;&gt;P1;$targetProteinCode\n"&quot; ;<br /> $targetProteinAlignmentContent .= "&quot;sequence: : : : : : : : :\n"&quot;; <br /> $targetProteinAlignmentContent .= "&quot;$sequenceAlignmentOnly\n"&quot; <br /><br /> return $targetProteinAlignmentContent;<br /><br /><br />sub makeAtomFile<br />{ <br /> my ( $pdbFile, $atomFile ) = @_; <br /> !system("&quot;cp $pdbFile $atomFile"&quot;) or die "&quot;File Copy Error: $!"&quot;; <br /> print "&quot;\tcopying $pdbFile to $atomFile ...\n"&quot;;<br /><br /><br />sub get_eachAlignmentContent<br />{ <br /> my ( $pdbCode, $pirAlignment, $pdbFile ) = @_; <br /> my $eachAlignmentContent = ""&quot &quot;;<br /><br /> my @lines = split /\n/, $pirAlignment; <br /> shift @lines; # remove header line <br /> shift @lines; # remove blank line <br /> pop @lines; # remove ending * <br /> my $sequenceAlignmentOnly = join "&quot;\n"&quot;, @lines; <br /> $sequenceAlignmentOnly =~ s/[Xx]/-/g;  <br /><br /> my ( $structureInfoLine, $adjustedAlignmentSequence ) <br /> = get_structureInfoAndAdjustedSequence( $pdbCode, $sequenceAlignmentOnly, $pdbFile ); $<br /> $eachAlignmentContent .= ">&quot;&gt;P1;$pdbCode\n"&quot;; <br /> $eachAlignmentContent .= $structureInfoLine; <br /> $eachAlignmentContent .= "&quot;$adjustedAlignmentSequence\n*\n"&quot; <br /><br /> return $eachAlignmentContent;<br /><br /><br />sub get_structureInfoAndAdjustedSequence<br />{ <br /> my ( $pdbCode, $sequenceAlignmentOnly, $pdbFile ) = @_; my <br /> my ( $structureInfoLine, $adjustedAlignmentSequence ) = ( ""&quot;&quot;, "" &quot;&quot; );  <br /><br /> my ( $startResNum, $startChain, $endResNum, $endChain ) = (""&quot;&quot;, ""&quot;&quot;, ""&quot;&quot;, ""&quot;&quot;); <br /> my ( $structureXorN, $proteinName, $source, $resolution, $rFactor ) = (""&quot;&quot;, ""&quot;&quot;, ""&quot;&quot;, ""&quot;&quot;, ""&quot;&quot;); <br /> my $allResiduesInPdb = ""&quot;&quot; ;<br /> my $includedResNum = 0; <br /> my $isChainExist = 1; # true   <br /> <br /><br /> open FI, $pdbFile or die "&quot;File Open Error, $pdbFile: $!\n"&quot; ;<br /> while ( my $line = <FIfi> ) <br /> { <br /> chomp $line;  <br /><br /> if ($line =~ /^COMPND.*MOLECULE:\s*(\S.*\S);\s*$/) <br /> { <br /> $proteinName = $1; <br /> } <br /> elsif ($line =~ /^COMPND.*CHAIN:\s+NULL;\s*$/) <br /> { <br /> $isChainExist = 0; # false <br /> } <br /> elsif ($line =~ /ORGANISM_SCIENTIFIC:\s*(\S.*\S);\s*$/) <br /> { <br /> $source = $1; <br /> } <br /> elsif ($line =~ /^EXPDTA\s+(\S.*\S)\s*$/) <br /> { <br /> my $method = $1; <br /> $structureXorN = <br /> ( $method =~ /X-RAY/ ) ? "&quot;structureX" &quot; : <br /> ( $method =~ /NMR/ ) ? "&quot;structureN" &quot; : "<br /> &quot;structure"&quot; ;<br /> } <br /> elsif ($line =~ /^REMARK.*RESOLUTION.\s+([\d\.]+)\s+ANGSTROM.*$/) <br /> { <br /> $resolution = $1; <br /> } <br /> elsif ($line =~ /^REMARK\s+\d+\s+R VALUE\s+\(WORKING SET\)\s*:\s*([\d\.\-]+)\s*$/) <br /> { <br /> $rFactor = $1; <br /> } <br /> elsif ($line =~ /^ATOM/) <br /> { <br /> my @values = split /\s+/, $line; <br /> my $threeLetterAminoAcidCode = $values[3]; <br /> my $oneLetterAminoAcidCode = convertThreeToOneLetterAminoAcidCode($threeLetterAminoAcidCode);  <br /><br /> if ( $startResNum eq "" &quot;& $quot; &amp;&amp; $isChainExist ) <br /> { <br /> ( $startChain, $startResNum ) = ( $values[4], $values[5] ); <br /> } <br /> elsif ( $startResNum eq "" &quot;&quot; ) <br /> { <br /> $startResNum = $values[4]; <br /> <br /><br /> if ( $isChainExist ) <br /> { <br /> ( $endChain, $endResNum ) = ( $values[4], $values[5] ); <br /> } <br /> else <br /> { <br /> $endResNum = $values[4]; <br /> <br /><br /> if ( $endResNum ne $includedResNum ) <br /> { <br /> $allResiduesInPdb .= $oneLetterAminoAcidCode; <br /> $includedResNum = $endResNum; <br /> } <br /> } elsif <br /> elsif ($line =~ /^TER/) # read only the atom list ( if multi-chain protein, only the first chain is read ) <br /> { <br /> last; <br /> } <br /> } <br /> close FI;  <br /><br /> $structureInfoLine = "&quot;$structureXorN:$pdbCode:$startResNum :$startChain :$endResNum :$endChain "&quot;. "<br /> &quot;:$proteinName :$source :$resolution :$rFactor \n"&quot;; <br /> $adjustedAlignmentSequence = get_adjustedAlignmentSequence( $sequenceAlignmentOnly, $allResiduesInPdb );  return ( <br /><br /> return ( $structureInfoLine, $adjustedAlignmentSequence );<br /><br /><br />sub convertThreeToOneLetterAminoAcidCode<br />{ <br /> my ( $threeLetterAminoAcidCode ) = @_; <br /> return <br /> ( $threeLetterAminoAcidCode =~ /gly/i ) ? "&quot;G" &quot; : <br /> ( $threeLetterAminoAcidCode =~ /pro/i ) ? "&quot;P" &quot; : <br /> ( $threeLetterAminoAcidCode =~ /ala/i ) ? "&quot;A" &quot; : <br /> ( $threeLetterAminoAcidCode =~ /val/i ) ? "&quot;V" &quot; : <br /> ( $threeLetterAminoAcidCode =~ /leu/i ) ? "&quot;L" &quot; : <br /> ( $threeLetterAminoAcidCode =~ /ile/i ) ? "&quot;I" &quot; : <br /> ( $threeLetterAminoAcidCode =~ /met/i ) ? "&quot;M" &quot; : <br /> ( $threeLetterAminoAcidCode =~ /cys/i ) ? "&quot;C" &quot; : <br /> ( $threeLetterAminoAcidCode =~ /phe/i ) ? "&quot;F" &quot; : <br /> ( $threeLetterAminoAcidCode =~ /tyr/i ) ? "&quot;Y" &quot; : <br /> ( $threeLetterAminoAcidCode =~ /trp/i ) ? "&quot;W" &quot; : <br /> ( $threeLetterAminoAcidCode =~ /his/i ) ? "&quot;H" &quot; : <br /> ( $threeLetterAminoAcidCode =~ /lys/i ) ? "&quot;K" &quot; : <br /> ( $threeLetterAminoAcidCode =~ /arg/i ) ? "&quot;R" &quot; : <br /> ( $threeLetterAminoAcidCode =~ /gln/i ) ? "&quot;Q" &quot; : <br /> ( $threeLetterAminoAcidCode =~ /asn/i ) ? "&quot;N" &quot; : ( <br /> ( $threeLetterAminoAcidCode =~ /glu/i ) ? "&quot;E" &quot; : <br /> ( $threeLetterAminoAcidCode =~ /asp/i ) ? "&quot;D" &quot; : <br /> ( $threeLetterAminoAcidCode =~ /ser/i ) ? "&quot;S" &quot; : <br /> ( $threeLetterAminoAcidCode =~ /thr/i ) ? "&quot;T" &quot; : "<br /> &quot;-"&quot;;<br /><br /><br />sub get_adjustedAlignmentSequence<br />{ <br /> my ( $sequenceAlignmentOnly, $allResiduesInPdb ) = @_; <br /> my $adjustedAlignmentSequence = ""&quot;&quot; <br /><br /> my @lines = split /\n/, $sequenceAlignmentOnly; <br /> my $charactersPerLine = length $lines[0]; <br /> $sequenceAlignmentOnly =~ s/\n//g; # remove new line characters  <br /><br /> # substitute front and end residues without atom coordinates to '-'s <br /> my $startPos = get_startPos( $sequenceAlignmentOnly, $allResiduesInPdb ); <br /> my $endPos = get_endPos( $sequenceAlignmentOnly, $allResiduesInPdb ); <br /> $adjustedAlignmentSequence = "&quot;-" &quot; x ($startPos - 1). <br /> substr( $sequenceAlignmentOnly, $startPos - 1, $endPos - $startPos + 1 ). # substr is zero-based "<br /> &quot;-" &quot; x ( length($sequenceAlignmentOnly) - $endPos );  <br /><br /> # substitute loop residues without atom coordinates to '-'s <br /> my @loopPositions = get_loopPositions( $adjustedAlignmentSequence, $allResiduesInPdb ); <br /> foreach my $loopPosition ( @loopPositions ) <br /> { <br /> my ( $loopStartPos, $loopEndPos ) = split /-/, $loopPosition; # e.g. loop residues 45-56 <br /> $adjustedAlignmentSequence = <br /> substr( $adjustedAlignmentSequence, 0, $loopStartPos - 1 ). "-" <br /> &quot;-&quot; x ( $loopEndPos - $loopStartPos + 1). <br /> substr( $adjustedAlignmentSequence, $loopEndPos, length($adjustedAlignmentSequence) - $loopEndPos ); <br /> <br /><br /> my $dashRemovedAdjustedAlignmentSequence = $adjustedAlignmentSequence; <br /> $dashRemovedAdjustedAlignmentSequence =~ s/-//g;  <br /><br /> if ( $dashRemovedAdjustedAlignmentSequence ne $allResiduesInPdb ) <br /> { <br /> print STDERR "&quot;\nError!!!\n"&quot; ;<br /> print STDERR "&quot;Number of residues in the PIR-alignment file and the PDB file mismatches !\n"&quot; ;<br /> print STDERR "&quot;Substitue residues in the PIR-alignment file without atom coordinates to dashes('-'s) manually!\n\n"&quot;; <br /> print STDERR "&quot;dashRemovedAdjustedAlignmentSequence: $dashRemovedAdjustedAlignmentSequence\n"&quot;; <br /> print STDERR "&quot;allResiduesInPdb: $allResiduesInPdb\n"&quot; ;<br /> <br /><br /> return formatFastaSequence($adjustedAlignmentSequence, $charactersPerLine);<br /><br /><br />sub get_startPos<br />{ <br /> my ( $sequenceAlignmentOnly, $allResiduesInPdb ) = @_;  <br /><br /> # Hypothesis: if ten residues match in the front end, we can determine the start position in the alignment <br /> # matching with the residues in the PDB file <br /> my $firstTenAminoAcidsOfAllResiduesInPdb = substr( $allResiduesInPdb, 0, 10 );  <br /><br /> my $startPos = ""&quot;&quot; ;<br /> my @allAminoAcidsInTheAlignmentSequence = split //, $sequenceAlignmentOnly; <br /> my $pos = 0;  <br /><br /> foreach my $aa ( @allAminoAcidsInTheAlignmentSequence ) { <br /> {<br /> $pos++; <br /> next if $aa eq "&quot;-"&quot; <br /><br /> my $frontRemovedSeq = substr( $sequenceAlignmentOnly, $pos - 1, length($sequenceAlignmentOnly) - $pos + 1 ); <br /> $frontRemovedSeq =~ s/-//g; # remove all dashes  <br /><br /> if ( $frontRemovedSeq =~ /^$firstTenAminoAcidsOfAllResiduesInPdb/i ) <br /> { <br /> $startPos = $pos; <br /> print "&quot;\tstart position or non-loop start position determined: $pos\n"&quot; ;<br /> last; <br /> } <br /> <br /><br /> if ( $startPos eq "" &quot;&quot; ) <br /> { <br /> print STDERR "&quot;\nError!\n"&quot; ;<br /> print STDERR "&quot;Start position mismatches!\n"&quot; ;<br /> print STDERR "&quot;Check the start amino acids of the alignment!\n\n"&quot; ;<br /> print STDERR "&quot;sequenceAlignment: $sequenceAlignmentOnly\n"&quot;; <br /> print STDERR "&quot;residuesInPdb: $allResiduesInPdb\n\n"&quot;; <br /> <br /><br /> return $startPos;<br /><br /><br />sub get_endPos<br />{ <br /> my ( $sequenceAlignmentOnly, $allResiduesInPdb ) = @_;  <br /><br /> # Hypothesis: if ten residues match in the rear end, we can determine the end position in the alignment # <br /> # matching with the residues in the PDB file <br /> my $reversedLastTenAminoAcidsOfAllResiduesInPdb = <br /> reverseMyString(substr( $allResiduesInPdb, length($allResiduesInPdb) - 10, 10 ));  <br /><br /> my $endPos = ""&quot;&quot;; <br /> my @allAminoAcidsInTheAlignmentSequence = split //, $sequenceAlignmentOnly; <br /> my $pos = @allAminoAcidsInTheAlignmentSequence + 1;  <br /><br /> foreach my $aa ( reverse @allAminoAcidsInTheAlignmentSequence ) <br /> { <br /> $pos--; <br /> next if $aa eq "&quot;-"&quot; <br /><br /> my $endRemovedReverseSeq = reverseMyString( substr( $sequenceAlignmentOnly, 0, $pos ) ); <br /> $endRemovedReverseSeq =~ s/-//g; # remove all dashes  <br /><br /> if ( $endRemovedReverseSeq =~ /^$reversedLastTenAminoAcidsOfAllResiduesInPdb/i ) <br /> { <br /> $endPos = $pos; <br /> print "&quot;\tend position determined: $pos\n"&quot;; <br /> last; <br /> } <br /> if <br /><br /> if ( $endPos eq "" &quot;&quot; ) <br /> { <br /> print STDERR "&quot;\nError!\n"&quot;; <br /> print STDERR "&quot;End position mismatches between the alignment file and the pdb file!\n"&quot;; <br /> print STDERR "&quot;Check the end amino acids of the alignment!\n\n"&quot; ;<br /> print STDERR "&quot;sequenceAlignmentOnly: $sequenceAlignmentOnly\n"&quot; ;<br /> print STDERR "&quot;allResiduesInPdb: $allResiduesInPdb\n\n"&quot; ;<br /> <br /><br /> return $endPos;<br /><br /><br />sub reverseMyString<br />{ <br /> my ( $string ) = @_; <br /> my @allChars = split //, $string;  <br /><br /> return join(""&quot;&quot;, reverse @allChars );<br /><br /><br />sub get_loopPositions<br />{ <br /> my ( $adjustedAlignmentSequence, $allResiduesInPdb ) = @_; <br /> my @loopPositions = ();  <br /><br /> my $dashRemovedAdjustedSequence = $adjustedAlignmentSequence; <br /> $dashRemovedAdjustedSequence =~ s/-//g;  <br /><br /> if ( $dashRemovedAdjustedSequence eq $allResiduesInPdb ) <br /> { <br /> return @loopPositions; <br /> <br /><br /> my @allAminoAcidsInTheAlignmentSequence = split //, $adjustedAlignmentSequence; <br /> my @allAminoAcidsInThePdbFile = split //, $allResiduesInPdb; <br /> my ( $alignmentResiduePos, $pdbFileResiduePos ) = ( 0, 0 );  <br /><br /> while ( $alignmentResiduePos <&lt;= $#allAminoAcidsInTheAlignmentSequence ) # '$#' means the zero-based last entry number <br /> { <br /> $alignmentResiduePos++; <br /> next if $allAminoAcidsInTheAlignmentSequence[$alignmentResiduePos-1] eq "&quot;-"&quot; ;<br /> $pdbFileResiduePos++;  <br /><br /> if ( $allAminoAcidsInTheAlignmentSequence[$alignmentResiduePos-1] ne <br /> $allAminoAcidsInThePdbFile[$pdbFileResiduePos - 1] ) <br /> { <br /> my $frontRemovedAlignmentSeq = <br /> substr( $adjustedAlignmentSequence, <br /> $alignmentResiduePos - 1, <br /> length($adjustedAlignmentSequence) - $alignmentResiduePos + 1 ); <br /> my $frontRemovedPdbResidueSeq = <br /> substr( $allResiduesInPdb, <br /> $pdbFileResiduePos - 1, <br /> length($allResiduesInPdb) - $pdbFileResiduePos + 1 );  <br /><br /> my $loopLength = get_startPos($frontRemovedAlignmentSeq, $frontRemovedPdbResidueSeq) - 1; <br /> my ( $loopStartPos, $loopEndPos ) = ( $alignmentResiduePos, $alignmentResiduePos + $loopLength - 1);  if <br /><br /> if ( $loopLength ne "" &quot;&quot; &amp;& amp; $loopStartPos <&lt;= $loopEndPos ) <br /> { <br /> push @loopPositions, "&quot;$loopStartPos-$loopEndPos"&quot; ;<br /> print "&quot;\tloop pisition found: $loopStartPos-$loopEndPos, length=$loopLength\n"&quot ;<br /><br /> $alignmentResiduePos += $loopLength - 1; <br /> $pdbFileResiduePos--; <br /> } } <br /> }<br /> <br /><br /> return @loopPositions;<br /><br /><br />sub formatFastaSequence<br />{ <br /> my ($seq, $charactersPerLine) = @_; <br /> $seq =~ s/(.{$charactersPerLine})/$1\n/g; <br /> return $seq;<br /><br /><br />sub makeAlignmentFile<br />{ <br /> my ( $alignmentFile, $alignmentFileContent ) = @_;  <br /><br /> open FO, ">&quot;&gt;$alignmentFile" &quot; or die "&quot;File Write Error, $alignmentFile: $!"&quot; ;<br /> print FO $alignmentFileContent; <br /> close FO;<br /><br /><br />sub makeTopScriptFile<br />{ my <br /> my ( $topScriptFile, $alignmentFile, $targetProteinCode, $pdbCode2AtomFile_ref ) = @_;  <br /><br /> open FO, ">&quot;&gt;$topScriptFile" &quot; or die "&quot;File Write Error, $topScriptFile: $!"&quot;; <br /> print FO printTopScriptInvariableContentFront();  <br /><br /> # print variable contents of the top script file <br /> print FO "&quot;SET ALNFILE = '$alignmentFile' # alignment filename\n"&quot; ;<br /> print FO "&quot;SET KNOWNS = "&quot; ;<br /> foreach my $pdbCode ( sort keys %$pdbCode2AtomFile_ref ) <br /> { <br /> print FO "&quot;'$pdbCode' "&quot; ;<br /> } <br /> print FO " &quot; # codes of the templates\n"&quot; ;<br /> print FO "&quot;SET SEQUENCE = '$targetProteinCode' # code of the target\n"&quot ;<br /><br /> print FO printTopScriptInvariableContentEnd(); close <br /> close FO;<br /><br /><br />sub printTopScriptInvariableContentFront<br />{ <br /> return ""&quot;&quot;. "<br /> &quot;# Homology modelling by the MODELLER TOP routine 'model'.\n"&quot;. "<br /> &quot;\n"&quot;. "<br /> &quot;INCLUDE # Include the predefined TOP routines\n"&quot;. "\<br /> &quot;\n"&quot;. "<br /> &quot;SET OUTPUT_CONTROL = 1 1 1 1 1 # uncomment to produce a large log file\n"&quot;. "<br /> &quot;\n"&quot;;<br /><br /><br />sub printTopScriptInvariableContentEnd<br />{ <br /> return ""&quot;&quot;. "<br /> &quot;SET ATOM_FILES_DIRECTORY = './' # directories for input atom files\n"&quot;. "<br /> &quot;SET STARTING_MODEL= 1 # index of the first model\n"&quot;. "<br /> &quot;SET ENDING_MODEL = 1 # index of the last model\n"&quot;. " <br /> &quot; # (determines how many models to calculate)\n"&quot;. "<br /> &quot;CALL ROUTINE = 'model' # do homology modelling\n"&quot;. "<br /> &quot;\n"&quot;;<br />} <br /><br /></fi></fi>&lt;/pre>
Anonymous user

Navigation menu