|
// +----------------------------------------------------------------------+
//
// $Id$
//
/**
* A self-contained class to parse a PDB file into an array of residues
* each containing an array of atoms
*
* Useful when dealing with big PDB files, where using the Science_Chemistry_PDBFile
* class will generate out of memory errors.
*
* @author Jesus M. Castagnetto
* @version 1.0
* @access public
* @package Science_Chemistry
* @see Science_Chemistry_PDBFile
*/
class Science_Chemistry_PDBParser {
/**
* PDB ID
*
* @var string
* @access private
*/
var $pdb;
/**
* Full path to PDB file
*
* @var string
* @access private
*/
var $file;
/**
* PDB file's date
*
* @var date
* @access private
*/
var $date;
/**
* PDB macromolecule(s) class
*
* @var string
* @access private
*/
var $class;
/**
* Array of macromolecules
*
* @var array
* @access private
*/
var $macromolecules;
/**
* Number of macromolecules
*
* @var int
* @access private
*/
var $num_macromolecules;
/**
* Constructor for the class, requires a PDB filename
*
* @param string $filename PDB filename
* @param boolean $multi whether to parse all models in a multi-model file
* @param boolean $meta whether to store the PDB file meta information
* @param boolean $full whether to store the full set of fields per atom
* @return object PDBParser
* @access public
* @see parseResidues()
*/
function Science_Chemistry_PDBParser($filename, $multi=false, $meta=false, $full=false) {
if (!file_exists($filename))
return null;
list($pdb,) = explode(".",basename($filename));
$this->pdb = $pdb;
$this->file = realpath($filename);
$arr = file($filename);
// parsing the PDB file
$month = array (
"JAN" => "01", "FEB" => "02", "MAR" => "03",
"APR" => "04", "MAY" => "05", "JUN" => "06",
"JUL" => "07", "AUG" => "08", "SEP" => "09",
"OCT" => "10", "NOV" => "11", "DEC" => "12"
);
$header_re = "/^HEADER[[:space:]]+(([^[:space:]]+ )+)[[:space:]]+";
$header_re .= "([0-9]{2}-[A-Z]{3}-[0-9]{2,4})[[:space:]]+[A-Z0-9]{4}/";
if (preg_match($header_re, $arr[0], $regs)) {
$this->class = trim($regs[1]);
// put date in a more standard format
$tmp = explode("-", $regs[3]);
if ($tmp[2] <= 23)
$year = 2000 + (int)$tmp[2];
else
$year = 1900 + (int)$tmp[2];
$this->date = $year."-".$month[$tmp[1]]."-".$tmp[0];
}
$flag = "nomodel";
$tmparr = array();
for ($i=0; $i < count($arr); $i++) {
if (!trim($arr[$i]))
continue;
$rectype = strtok($arr[$i]," ");
// check if we have multi-model file
if ($rectype == "MODEL")
continue;
// did we get a multi-model file and are parsing the end
// of a model, if so, end parsing altogether
if ($rectype == "ENDMDL") {
if ($multi) {
$this->macromolecules[] = $this->parseResidues($tmparr, $full);
$this->num_macromolecules++; // = count($this->macromolecules);
$tmparr = array();
} else {
break;
}
continue;
}
// accumulate atom records, put the rest into the meta array
if ($rectype == "ATOM" || $rectype == "HETATM")
$tmparr[] = $arr[$i];
elseif ($meta)
$this->meta[$rectype][] = $arr[$i];
}
if (!empty($tmparr)) {
$this->macromolecules[] = $this->parseResidues($tmparr, $full);
$this->num_macromolecules++; // = count($this->macromolecules);
}
}
/**
* Makes the array of residues in the macromolecule
*
* @param array $records
* @param boolean $full whether to store the full set of fields per atom
* @see parseFile()
* @see parseAtom()
*/
function parseResidues($records, $full) {
$curr_res_id = "";
$residues = array();
$res_atoms = array();
for ($i=0; $i< count($records); $i++) {
$atomrec =& $records[$i];
$res_name = trim(substr($atomrec,17,3));
$chain = trim(substr($atomrec,21,1));
$seq_num = (int) trim(substr($atomrec,22,4));
$res_id = $res_name.":".$seq_num.":".$chain;
//if ($i == 0)
// $curr_res_id = $res_id;
if ($res_id == $curr_res_id) {
$res_atoms[] = $atomrec;
if ($i != (count($records) - 1))
continue;
}
if (($res_id != $curr_res_id) ||
($i == (count($records) - 1))
) {
if (!empty($res_atoms)) {
for ($j=0; $j < count($res_atoms); $j++) {
$temp = $this->parseAtom($res_atoms[$j], $full, $atomname);
$residues[$curr_res_id][$atomname] = $temp;
}
}
$curr_res_id = $res_id;
$res_atoms = array($atomrec);
}
}
return $residues;
}
/**
* Parses an atom record into an associative array
*
* @param string $atomrec PDB atom record
* @param boolean $full whether to store the full set of fields per atom
* @see parseResidues()
*/
function parseAtom($atomrec, $full, $atomname) {
$atom = array();
// process PDB atom record
// no error checking, assumes correct and standard record
$atom["RecName"] = trim(substr($atomrec,0,6));
$atom["SerNum"] = (int) trim(substr($atomrec,6,5));
$atom["AtomName"] = trim(substr($atomrec,12,4));
$atomname = $atom["AtomName"];
if ($full) {
$atom["AltLoc"] = trim(substr($atomrec,16,1));
$atom["ResName"] = trim(substr($atomrec,17,3));
$atom["ChainID"] = trim(substr($atomrec,21,1));
$atom["ResSeqNum"] = (int) trim(substr($atomrec,22,4));
$atom["InsCode"] = trim(substr($atomrec,26,1));
$atom["Occupancy"] = (float) trim(substr($atomrec,54,6));
$atom["TempFactor"] = (float) trim(substr($atomrec,60,6));
$atom["SegmentID"] = trim(substr($atomrec,72,4));
$atom["Charge"] = (float)trim(substr($atomrec,78,2));
$atom["Element"] = trim(substr($atomrec,76,2));
}
$atom["X"] = (double) trim(substr($atomrec,30,8));
$atom["Y"] = (double) trim(substr($atomrec,38,8));
$atom["Z"] = (double) trim(substr($atomrec,46,8));
return $atom;
}
/**
* Returns an array of residues with a particular name
* from the indicated macromolecule index
*
* @param integer $macromol Index of the macromolecule in the $macromolecules array
* @param string $resnam Residue name, e.g. HIS, CYS, etc.
* @return array list of residues with the requested name
* @access public
* @see $macromolecules
*/
function getResidueList ($macromol, $resname) {
$mol =& $this->macromolecules[$macromol];
$reslist = array();
if (!$mol)
return $reslist;
foreach($mol as $resid=>$atoms) {
list($curr_res_name,,) = explode(":",$resid);
// echo $curr_res_name."***\n";
if ($curr_res_name == $resname)
$reslist[$resid] = $atoms;
else
continue;
}
return $reslist;
}
} // end of PDBParser
// vim: expandtab: ts=4: sw=4
?>