open($filename)) { print "ERROR opening OWL file: '$filename'\n"; exit(); } // Get the RDF stanza. We pass FALSE as the second parameter to prevent // the object from reading the entire file into memory. $rdf = new OWLStanza($owl, FALSE); // Get the ontology stanza. It will contain the values for the database // name for this ontology. $ontology = new OWLStanza($owl); // Look for the db name in using the 'oboInOwl:default-namespace term. If it's // not present then we'll use the 'about' element to get the namespace. $namespace = $ontology->getChild('oboInOwl:default-namespace'); if ($namespace) { $db_name = $namespace->getValue(); } else { // Insert the database record into Chado using the owl:Ontology stanza of the owl file. $about = $ontology->getAttribute('rdf:about'); // We wrote the regular expression on the rdf.about line to get the database name for any particular Owl file. if (preg_match('/^.*\/(.*?)\..*$/', $about, $matches)) { $db_name = strtoupper($matches[1]); } } // // Step 1: Make sure that all dependencies (database names) are met for each Owl ontology file. // // loop through each stanza, one at a time, and handle each one // based on the tag name. $stanza = new OWLStanza($owl); // Set an empty array for the dependencies to go in. $deps = array ( 'db' => array (), 'dbxref' => array () ); // Start looping and parsing through the owl:Class stanza section of the Owl file. while (!$stanza->isFinished()) { // Use the tag name from OWLStanza.inc to identify which function should be called. switch ($stanza->getTagName()) { case 'owl:Class': tripal_owl_check_class_depedencies($stanza, $vocab_db_name, $deps); break; } // Get to the next stanza in the OWL file. $stanza = new OWLStanza($owl); } if (count(array_keys($deps['db'])) > 0 or count(array_keys($deps['dbxref'])) > 0) { // We have unmet dependencies. Print those out and return. // The deps array will have DB’s, then terms' . "\n"); if (count($deps['db']) > 0) { drupal_set_message('Cannot import ontology, "' . $db_name . '", as the following ' . 'dependent vocabularies must first be imported: ' . print_r(array_keys($deps['db']), TRUE) . '\n', 'error'); } if (count($deps['dbxref']) > 0) { drupal_set_message('Cannot import ontology, "' . $db_name . '", as the following ' . 'dependent terms must first be imported: ' . print_r(array_keys($deps['dbxref']), TRUE) . '\n', 'error'); } return; } // // Step 2: If we pass the dependency check in step 1 then we can insert // the terms. // // Holds an array of CV and DB records that have already been // inserted (reduces number of queires). $vocabs = array ( 'db' => array (), 'cv' => array (), 'this' => array () ); // Reload the ontology to reposition at the beginning of the OWl file for inserting the // new terms into Chado. $owl = new XMLReader(); if (!$owl->open($filename)) { print "ERROR opening OWL file: '$filename'\n"; exit(); } $rdf = new OWLStanza($owl, FALSE); $ontology = new OWLStanza($owl); // Insert the database record into Chado using the // owl:Ontology stanza. $url = ''; $homepage = $ontology->getChild('foaf:homepage'); if ($homepage) { $url = $homepage->getValue(); } $db = array ( 'url' => $url, 'name' => $db_name ); // Using the Tripal API function to insert the term into the Chado database. $db = chado_insert_db($db); // Get the description for this vocabulary. This should be in the // dc:description element. If that element is missing then the // description should default to the empty string. $cv_description = ''; $description = $ontology->getChild('dc:description'); if ($description) { $cv_description = $description->getValue(); } // Get the name for the CV. This should be in the 'dc:title' element. If the // title is not present then the cv name should default to the database name. $cv_name == $namespace = $ontology->getChild('oboInOwl:default-namespace'); if ($namespace) { $cv_name = $namespace->getValue(); } $title = $ontology->getChild('dc:title'); if ($title) { $cv_name = preg_replace("/[^\w]/", "_", strtolower($title->getValue())); } // Insert the CV record into Chado. $cv = chado_insert_cv($cv_name, $cv_description); // Add this CV and DB to our vocabs array so we can reuse it later. $vocabs[$db_name]['cv'] = $namespace_cv; $vocabs[$db_name]['db'] = $db; $vocabs['this'] = $db_name; // loop through each stanza of the owl file, one at a time, and handle each one // based on the tag name from the OWLStanza.inc file. $stanza = new OWLStanza($owl); while (!$stanza->isFinished()) { // Use the tag name to identify which function should be called. switch ($stanza->getTagName()) { case 'owl:AnnotationProperty': // tripal_owl_handle_annotation_property($stanza, $vocabs); break; case 'rdf:Description': // tripal_owl_handle_description($stanza, $vocabs); break; case 'owl:ObjectProperty': // tripal_owl_handle_object_property($stanza, $vocabs); break; case 'owl:Class': tripal_owl_handle_class($stanza, $vocabs); break; case 'owl:Axiom': break; case 'owl:Restriction': break; default: throw new Exception("Unhandled stanza: " . $stanza->getTagName()); exit(); break; } // Get the next stanza in the OWL file. $stanza = new OWLStanza($owl); } // Close the XMLReader $owl object. $owl->close(); } /** * Checks for required vocabularies that are not loaded into Chado. * * Some vocabularies use terms from other ontologies. If this is happens * we need to ensure that the dependent vocabularies are present in the * database prior to loading this one. This function adds to the $deps * array all of the database names and term accessions that are missing in * Chado. * * @param $stanza The * OWLStanza object for the current stanza from the OWL file. * @param $vocab_db_name The * name of the database for the vocabulary being loded. * @param $deps The * dependencies array. The missing databases are provided in array * using a 'db' key, and missing terms are in a second array using a * 'dbxref' key. */ function tripal_owl_check_class_depedencies(OWLStanza $stanza, $vocab_db_name, &$deps) { // Initialize the variables. $db_name = ''; $accession = ''; $db = null; // Get the DB name and accession from the "rdf:about" attribute. $about = $stanza->getAttribute('rdf:about'); if (!$about) { // TODO: some owl:Class stanzas do not have an about. What are these? // how should we handle them. return; } // We wrote the regular expression on the rdf.about line to get the database // name and accession term for any particular Owl file. if (preg_match('/.*\/(.+)_(.+)/', $about, $matches)) { $db_name = strtoupper($matches[1]); $accession = $matches[2]; } else { throw new Exception("owl:Class stanza 'rdf:about' attribute is not formated as expected: '$about'. " . "This is necessary to determine the term's accession: \n\n" . $stanza->getXML()); } // If the database name for this term is the same as the vocabulary // we are trying to load, then don't include it in the $deps array. if ($db_name !== $vocab_db_name) { return; } // Check if the db_name does not exist in the chado.db table. If it // does not exist then add it to our $deps array. If the query fails then // throw an exception. $db = chado_select_record('db', array ( 'db_id' ), array ( 'name' => $db_name )); if ($db === FALSE) { throw new Exception("Failed to execute query to find vocabulary in chado.db table\n\n" . $stanza->getXML()); } else if (count($db) == 0) { $deps['db'][$db_name] = TRUE; // Does this stanza provide the URL for the OWL file of this missing // dependency. If so then add it to our deps array. $imported_from = $stanza->getChild('obo:IAO_0000412'); if ($imported_from == NULL) { return; } $url = $imported_from->getAttribute('rdf:resource'); if ($url) { $deps['db'][$db_name] = $url; } return; } // If the db_name exists, then check if the accession exists in // the chado.dbxref table. If it doesn't exist then add an entry to the // $deps array. If the query fails then throw an exception. $values = array ( 'db_id' => $db[0]->db_id, 'accession' => $accession ); $dbxref = chado_select_record('dbxref', array ( 'dbxref_id', 'db_id' ), $values); if ($dbxref === FALSE) { throw new Exception("Failed to execute query to find vocabulary term in chado.dbxref table\n\n" . $stanza->getXML()); } elseif (count($accession) == 0) { $deps['dbxref'][$db_name . ':' . $accesson] = TRUE; } return; } /** * * @param * $stanza * @param * $vocabs * @throws Exception */ function tripal_owl_handle_object_property($stanza, $vocabs) { } /** * * @param * $stanza * @param * $vocabs * @throws Exception */ function tripal_owl_handle_annotation_property($stanza, $vocabs) { // $matches = array(); // $db_name = ''; // $accession = ''; // $about = $stanza->getAttribute('rdf:about'); // // Get the DB name and accession from the about attribute using the preg match function. // if (preg_match('/.*\/(.+)_(.+)/', $about, $matches)) { // $db_name = ($matches[1]); // $accession = $matches[2]; // } // else { // throw new Exception("owl:Class stanza 'rdf:about' attribute is not formated as expected: '$about'. " . "This is necessary to determine the term's accession: \n\n" . $stanza->getXML()); // } // // Insert a DB Record // if (array_key_exists($db_name, $vocabs)) { // $db = $vocabs[$db_name]['db']; // $default_namespace_cv = $vocabs[$db_name]['cv']; // } // else { // // Unfortunately, all we have is the name. The OWL format // // doesn't provides us the URL, description, etc. // $values = array( // 'name' => $db_name // ); // $db = chado_insert_db($values); // // Insert a dbxref record. // $values = array( // 'db_id' => $db->db_id, // 'accession' => $accession // ); // $dbxref = chado_insert_dbxref($values); // $imported_from = $stanza->getChild('obo:IAO_0000114'); // if ($imported_from == NULL) { // return; // } // $url = $imported_from->getAttribute('rdf:resource'); // if ($url) { // $vocabs['db'][$db_name] = $url; // } // return; // } // // Insert a new cvterm record. // $cvterm_name = ''; // $definition = ''; // $cvterm_name = $stanza->getChild('rdfs:label'); // if ($cvterm_name) { // $cvterm_name = $stanza->getValue(); // } // $definition = $stanza->getChild('obo:IAO_0000115'); // if ($definition) { // $definition = $stanza->getValue(); // } // $term = array( // 'id' => $db->name .':'. $dbxref->accession, // 'name' => $cvterm_name, // 'cv_name' => $cv->name, // 'definition' => $definition, // ); // $option =array(); // if ($vocabs['this'] != $db->name){ // $option['update_existing'] = FALSE; // } // $cvterm = chado_insert_cvterm($term, $option); // } } /** * * @param * $stanza * @param * $vocabs * @throws Exception */ function tripal_owl_handle_description($stanza, $vocabs) { } /** * * The function goes through owl:Class stanza to insert new vocabularies. * * @param $stanza The * OWLStanza object for the current stanza from the OWL file. * @param * $vocabs * * @throws Exception */ function tripal_owl_handle_class(OWLStanza $stanza, $vocabs) { // Initialize the database and cv variables. $db_name = $vocabs['this']; $accession = ''; $is_a = ''; $namespace_cv = $vocabs[$db_name]['cv']; $db = $vocabs[$db_name]['db']; // Insert the dbxref record into Chado using the owl:Class stanza of the owl file. // Any oboInOwl:id supercedes what we find in the rdf:about in the owl file. $obo_id = $stanza->getChild('oboInOwl:id'); if ($obo_id) { if (preg_match('/.*>(.+):(.+)<.*/', $about, $matches)) { $db_name = strtoupper($matches[1]); $accession = $matches[2]; } else { $about = $stanza->getAttribute('rdf:about'); // We wrote the regular expression on the rdf.about line to get the // db_name and accession for any particular Owl file. if (preg_match('/.*\/(.+)_(.+)/', $about, $matches)) { $db_name = strtoupper($matches[1]); $accession = $matches[2]; } else { throw new Exception("owl:Class stanza 'rdf:about' attribute is not formated as expected: '$about'. " . "This is necessary to determine the term's accession: \n\n" . $stanza->getXML()); } } } // If the database name for this term is the same as the vocabulary // we are trying to load, then do include it in the $vocabs array. if ($db_name == $vocabs['this']) { return; } // insert dbxref $values = array ( 'db_id' => $db->db_id, 'accession' => $accession ); $dbxref = chado_insert_dbxref($values); $cvterm_name = $stanza->getChild('rdfs:label'); if ($cvterm_name) { $cvterm_name = $stanza->getValue(); } $definition = $stanza->getChild('obo:IAO_0000115'); if ($definition) { $definition = $stanza->getValue(); } $term = array ( 'id' => $db->name . ':' . $dbxref->accession, 'name' => $db->name, 'cv_name' => $stanza->getValue(), 'definition' => $stanza->getValue(), ); $options = array (); if ($vocabs['this'] != $db->name) { $options['update_existing'] = FALSE; } $cvterm = chado_insert_cvterm($term, $options); // // Add a record to the chado relationship table if an ‘rdfs:subClassOf’ child exists. // $cvterm_name = $stanza->getChild('rdfs:subClassOf'); // Insert a new cvterm record. // $cvterm_name = ''; // $definition = ''; // $cvterm_name = $stanza->getChild('rdfs:label'); // if ($cvterm_name) { // $cvterm_name = $stanza->getValue(); //} // $definition = $stanza->getChild('obo:IAO_0000115'); // if ($definition) { //$definition = $stanza->getValue(); //} // $term = array ( // 'id' => $db->name . ':' . $dbxref->accession, // 'name' => $cvterm_name, // 'cv_name' => $cv->name, // 'definition' => $definition // ); // $option = array (); // if ($vocabs['this'] != $db->name) { // $option['update_existing'] = FALSE; // } // $cvterm = chado_insert_cvterm($term, $option); }