|
@@ -0,0 +1,513 @@
|
|
|
+<?php
|
|
|
+
|
|
|
+// This Tripal_cv.owl_loader.php and OWLStanza.inc(CLASS) file is being developed to read and parse through any scientific
|
|
|
+// ontology XML Owl file to have the vocabularies be inserted into the Chado database for the open source project
|
|
|
+// Tripal.info to be used.
|
|
|
+
|
|
|
+/**
|
|
|
+ * @file
|
|
|
+ * @add file from header
|
|
|
+ */
|
|
|
+require_once ('OWLStanza.inc');
|
|
|
+/**
|
|
|
+ * Parses an OWL XML file and imports the CV terms into Chado
|
|
|
+ *
|
|
|
+ * @param $filename The
|
|
|
+ * full path to the OWL XML file.
|
|
|
+ *
|
|
|
+ * @return No return value.
|
|
|
+ *
|
|
|
+ * @throws Exception
|
|
|
+ */
|
|
|
+function tripal_cv_parse_owl($filename) {
|
|
|
+ // TODO: need to pass in the $db_name as the cv name into the function when you see the
|
|
|
+ // oboInOwl:default-namespace that is not the same as $db_name.
|
|
|
+ //
|
|
|
+ // TODO: this all should occur inside of a transaction.
|
|
|
+
|
|
|
+ // Opening the OWL file for parsing.
|
|
|
+ $owl = new XMLReader();
|
|
|
+ // This command will open and read the any Owl file.
|
|
|
+ if (!$owl->open($filename)) {
|
|
|
+ print "ERROR opening OWL file: '$filename'\n";
|
|
|
+ exit();
|
|
|
+ }
|
|
|
+
|
|
|
+ // Get the RDF stanza. We pass FALSE as the second parameter to prevent
|
|
|
+ // the object from reading the entire file into memory.
|
|
|
+ $rdf = new OWLStanza($owl, FALSE);
|
|
|
+
|
|
|
+ // Get the ontology stanza. It will contain the values for the database
|
|
|
+ // name for this ontology.
|
|
|
+ $ontology = new OWLStanza($owl);
|
|
|
+
|
|
|
+ // Look for the db name in using the 'oboInOwl:default-namespace term. If it's
|
|
|
+ // not present then we'll use the 'about' element to get the namespace.
|
|
|
+ $namespace = $ontology->getChild('oboInOwl:default-namespace');
|
|
|
+ if ($namespace) {
|
|
|
+ $db_name = $namespace->getValue();
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ // Insert the database record into Chado using the owl:Ontology stanza of the owl file.
|
|
|
+ $about = $ontology->getAttribute('rdf:about');
|
|
|
+
|
|
|
+ // We wrote the regular expression on the rdf.about line to get the database name for any particular Owl file.
|
|
|
+ if (preg_match('/^.*\/(.*?)\..*$/', $about, $matches)) {
|
|
|
+ $db_name = strtoupper($matches[1]);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ //
|
|
|
+ // Step 1: Make sure that all dependencies (database names) are met for each Owl ontology file.
|
|
|
+ //
|
|
|
+
|
|
|
+ // loop through each stanza, one at a time, and handle each one
|
|
|
+ // based on the tag name.
|
|
|
+ $stanza = new OWLStanza($owl);
|
|
|
+
|
|
|
+ // Set an empty array for the dependencies to go in.
|
|
|
+ $deps = array (
|
|
|
+ 'db' => array (),
|
|
|
+ 'dbxref' => array ()
|
|
|
+ );
|
|
|
+
|
|
|
+ // Start looping and parsing through the owl:Class stanza section of the Owl file.
|
|
|
+ while (!$stanza->isFinished()) {
|
|
|
+ // Use the tag name from OWLStanza.inc to identify which function should be called.
|
|
|
+ switch ($stanza->getTagName()) {
|
|
|
+ case 'owl:Class':
|
|
|
+ tripal_owl_check_class_depedencies($stanza, $vocab_db_name, $deps);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ // Get to the next stanza in the OWL file.
|
|
|
+ $stanza = new OWLStanza($owl);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (count(array_keys($deps['db'])) > 0 or count(array_keys($deps['dbxref'])) > 0) {
|
|
|
+ // We have unmet dependencies. Print those out and return.
|
|
|
+ // The deps array will have DB’s, then terms' . "\n");
|
|
|
+ if (count($deps['db']) > 0) {
|
|
|
+ drupal_set_message('Cannot import ontology, "' . $db_name . '", as the following ' . 'dependent vocabularies must first be imported: ' . print_r(array_keys($deps['db']), TRUE) . '\n', 'error');
|
|
|
+ }
|
|
|
+ if (count($deps['dbxref']) > 0) {
|
|
|
+ drupal_set_message('Cannot import ontology, "' . $db_name . '", as the following ' . 'dependent terms must first be imported: ' . print_r(array_keys($deps['dbxref']), TRUE) . '\n', 'error');
|
|
|
+ }
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ //
|
|
|
+ // Step 2: If we pass the dependency check in step 1 then we can insert
|
|
|
+ // the terms.
|
|
|
+ //
|
|
|
+
|
|
|
+ // Holds an array of CV and DB records that have already been
|
|
|
+ // inserted (reduces number of queires).
|
|
|
+ $vocabs = array (
|
|
|
+ 'db' => array (),
|
|
|
+ 'cv' => array (),
|
|
|
+ 'this' => array ()
|
|
|
+ );
|
|
|
+
|
|
|
+ // Reload the ontology to reposition at the beginning of the OWl file for inserting the
|
|
|
+ // new terms into Chado.
|
|
|
+
|
|
|
+ $owl = new XMLReader();
|
|
|
+ if (!$owl->open($filename)) {
|
|
|
+ print "ERROR opening OWL file: '$filename'\n";
|
|
|
+ exit();
|
|
|
+ }
|
|
|
+ $rdf = new OWLStanza($owl, FALSE);
|
|
|
+ $ontology = new OWLStanza($owl);
|
|
|
+
|
|
|
+ // Insert the database record into Chado using the
|
|
|
+ // owl:Ontology stanza.
|
|
|
+ $url = '';
|
|
|
+ $homepage = $ontology->getChild('foaf:homepage');
|
|
|
+ if ($homepage) {
|
|
|
+ $url = $homepage->getValue();
|
|
|
+ }
|
|
|
+ $db = array (
|
|
|
+ 'url' => $url,
|
|
|
+ 'name' => $db_name
|
|
|
+ );
|
|
|
+
|
|
|
+ // Using the Tripal API function to insert the term into the Chado database.
|
|
|
+ $db = tripal_insert_db($db);
|
|
|
+
|
|
|
+ // Get the description for this vocabulary. This should be in the
|
|
|
+ // dc:description element. If that element is missing then the
|
|
|
+ // description should default to the empty string.
|
|
|
+ $cv_description = '';
|
|
|
+ $description = $ontology->getChild('dc:description');
|
|
|
+ if ($description) {
|
|
|
+ $cv_description = $description->getValue();
|
|
|
+ }
|
|
|
+
|
|
|
+ // Get the name for the CV. This should be in the 'dc:title' element. If the
|
|
|
+ // title is not present then the cv name should default to the database name.
|
|
|
+ $cv_name == $namespace = $ontology->getChild('oboInOwl:default-namespace');
|
|
|
+ if ($namespace) {
|
|
|
+ $cv_name = $namespace->getValue();
|
|
|
+ }
|
|
|
+
|
|
|
+ $title = $ontology->getChild('dc:title');
|
|
|
+ if ($title) {
|
|
|
+ $cv_name = preg_replace("/[^\w]/", "_", strtolower($title->getValue()));
|
|
|
+ }
|
|
|
+
|
|
|
+ // Insert the CV record into Chado.
|
|
|
+ $cv = tripal_insert_cv($cv_name, $cv_description);
|
|
|
+
|
|
|
+ // Add this CV and DB to our vocabs array so we can reuse it later.
|
|
|
+ $vocabs[$db_name]['cv'] = $namespace_cv;
|
|
|
+ $vocabs[$db_name]['db'] = $db;
|
|
|
+ $vocabs['this'] = $db_name;
|
|
|
+
|
|
|
+ // loop through each stanza of the owl file, one at a time, and handle each one
|
|
|
+ // based on the tag name from the OWLStanza.inc file.
|
|
|
+ $stanza = new OWLStanza($owl);
|
|
|
+ while (!$stanza->isFinished()) {
|
|
|
+
|
|
|
+ // Use the tag name to identify which function should be called.
|
|
|
+ switch ($stanza->getTagName()) {
|
|
|
+ case 'owl:AnnotationProperty':
|
|
|
+ // tripal_owl_handle_annotation_property($stanza, $vocabs);
|
|
|
+ break;
|
|
|
+ case 'rdf:Description':
|
|
|
+ // tripal_owl_handle_description($stanza, $vocabs);
|
|
|
+ break;
|
|
|
+ case 'owl:ObjectProperty':
|
|
|
+ // tripal_owl_handle_object_property($stanza, $vocabs);
|
|
|
+ break;
|
|
|
+ case 'owl:Class':
|
|
|
+ tripal_owl_handle_class($stanza, $vocabs);
|
|
|
+ break;
|
|
|
+ case 'owl:Axiom':
|
|
|
+ break;
|
|
|
+ case 'owl:Restriction':
|
|
|
+ break;
|
|
|
+ default:
|
|
|
+ throw new Exception("Unhandled stanza: " . $stanza->getTagName());
|
|
|
+ exit();
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ // Get the next stanza in the OWL file.
|
|
|
+ $stanza = new OWLStanza($owl);
|
|
|
+ }
|
|
|
+
|
|
|
+ // Close the XMLReader $owl object.
|
|
|
+ $owl->close();
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * Checks for required vocabularies that are not loaded into Chado.
|
|
|
+ *
|
|
|
+ * Some vocabularies use terms from other ontologies. If this is happens
|
|
|
+ * we need to ensure that the dependent vocabularies are present in the
|
|
|
+ * database prior to loading this one. This function adds to the $deps
|
|
|
+ * array all of the database names and term accessions that are missing in
|
|
|
+ * Chado.
|
|
|
+ *
|
|
|
+ * @param $stanza The
|
|
|
+ * OWLStanza object for the current stanza from the OWL file.
|
|
|
+ * @param $vocab_db_name The
|
|
|
+ * name of the database for the vocabulary being loded.
|
|
|
+ * @param $deps The
|
|
|
+ * dependencies array. The missing databases are provided in array
|
|
|
+ * using a 'db' key, and missing terms are in a second array using a
|
|
|
+ * 'dbxref' key.
|
|
|
+ */
|
|
|
+function tripal_owl_check_class_depedencies(OWLStanza $stanza, $vocab_db_name, &$deps) {
|
|
|
+
|
|
|
+ // Initialize the variables.
|
|
|
+ $db_name = '';
|
|
|
+ $accession = '';
|
|
|
+ $db = null;
|
|
|
+
|
|
|
+ // Get the DB name and accession from the "rdf:about" attribute.
|
|
|
+ $about = $stanza->getAttribute('rdf:about');
|
|
|
+ if (!$about) {
|
|
|
+ // TODO: some owl:Class stanzas do not have an about. What are these?
|
|
|
+ // how should we handle them.
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ // We wrote the regular expression on the rdf.about line to get the database
|
|
|
+ // name and accession term for any particular Owl file.
|
|
|
+ if (preg_match('/.*\/(.+)_(.+)/', $about, $matches)) {
|
|
|
+ $db_name = strtoupper($matches[1]);
|
|
|
+ $accession = $matches[2];
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ throw new Exception("owl:Class stanza 'rdf:about' attribute is not formated as expected: '$about'. " . "This is necessary to determine the term's accession: \n\n" . $stanza->getXML());
|
|
|
+ }
|
|
|
+
|
|
|
+ // If the database name for this term is the same as the vocabulary
|
|
|
+ // we are trying to load, then don't include it in the $deps array.
|
|
|
+ if ($db_name !== $vocab_db_name) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ // Check if the db_name does not exist in the chado.db table. If it
|
|
|
+ // does not exist then add it to our $deps array. If the query fails then
|
|
|
+ // throw an exception.
|
|
|
+ $db = chado_select_record('db', array (
|
|
|
+ 'db_id'
|
|
|
+ ), array (
|
|
|
+ 'name' => $db_name
|
|
|
+ ));
|
|
|
+ if ($db === FALSE) {
|
|
|
+ throw new Exception("Failed to execute query to find vocabulary in chado.db table\n\n" . $stanza->getXML());
|
|
|
+ }
|
|
|
+ else if (count($db) == 0) {
|
|
|
+ $deps['db'][$db_name] = TRUE;
|
|
|
+
|
|
|
+ // Does this stanza provide the URL for the OWL file of this missing
|
|
|
+ // dependency. If so then add it to our deps array.
|
|
|
+ $imported_from = $stanza->getChild('obo:IAO_0000412');
|
|
|
+
|
|
|
+ if ($imported_from == NULL) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ $url = $imported_from->getAttribute('rdf:resource');
|
|
|
+ if ($url) {
|
|
|
+ $deps['db'][$db_name] = $url;
|
|
|
+ }
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ // If the db_name exists, then check if the accession exists in
|
|
|
+ // the chado.dbxref table. If it doesn't exist then add an entry to the
|
|
|
+ // $deps array. If the query fails then throw an exception.
|
|
|
+ $values = array (
|
|
|
+ 'db_id' => $db[0]->db_id,
|
|
|
+ 'accession' => $accession
|
|
|
+ );
|
|
|
+
|
|
|
+ $dbxref = chado_select_record('dbxref', array (
|
|
|
+ 'dbxref_id',
|
|
|
+ 'db_id'
|
|
|
+ ), $values);
|
|
|
+ if ($dbxref === FALSE) {
|
|
|
+ throw new Exception("Failed to execute query to find vocabulary term in chado.dbxref table\n\n" . $stanza->getXML());
|
|
|
+ }
|
|
|
+ elseif (count($accession) == 0) {
|
|
|
+ $deps['dbxref'][$db_name . ':' . $accesson] = TRUE;
|
|
|
+ }
|
|
|
+ return;
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ *
|
|
|
+ * @param
|
|
|
+ * $stanza
|
|
|
+ * @param
|
|
|
+ * $vocabs
|
|
|
+ * @throws Exception
|
|
|
+ */
|
|
|
+function tripal_owl_handle_object_property($stanza, $vocabs) {
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ *
|
|
|
+ * @param
|
|
|
+ * $stanza
|
|
|
+ * @param
|
|
|
+ * $vocabs
|
|
|
+ * @throws Exception
|
|
|
+ */
|
|
|
+function tripal_owl_handle_annotation_property($stanza, $vocabs) {
|
|
|
+
|
|
|
+// $matches = array();
|
|
|
+// $db_name = '';
|
|
|
+// $accession = '';
|
|
|
+// $about = $stanza->getAttribute('rdf:about');
|
|
|
+// // Get the DB name and accession from the about attribute using the preg match function.
|
|
|
+// if (preg_match('/.*\/(.+)_(.+)/', $about, $matches)) {
|
|
|
+// $db_name = ($matches[1]);
|
|
|
+// $accession = $matches[2];
|
|
|
+// }
|
|
|
+// else {
|
|
|
+// throw new Exception("owl:Class stanza 'rdf:about' attribute is not formated as expected: '$about'. " . "This is necessary to determine the term's accession: \n\n" . $stanza->getXML());
|
|
|
+// }
|
|
|
+
|
|
|
+// // Insert a DB Record
|
|
|
+// if (array_key_exists($db_name, $vocabs)) {
|
|
|
+// $db = $vocabs[$db_name]['db'];
|
|
|
+// $default_namespace_cv = $vocabs[$db_name]['cv'];
|
|
|
+// }
|
|
|
+// else {
|
|
|
+// // Unfortunately, all we have is the name. The OWL format
|
|
|
+// // doesn't provides us the URL, description, etc.
|
|
|
+// $values = array(
|
|
|
+// 'name' => $db_name
|
|
|
+// );
|
|
|
+// $db = tripal_insert_db($values);
|
|
|
+// // Insert a dbxref record.
|
|
|
+// $values = array(
|
|
|
+// 'db_id' => $db->db_id,
|
|
|
+// 'accession' => $accession
|
|
|
+// );
|
|
|
+// $dbxref = tripal_insert_dbxref($values);
|
|
|
+
|
|
|
+// $imported_from = $stanza->getChild('obo:IAO_0000114');
|
|
|
+// if ($imported_from == NULL) {
|
|
|
+// return;
|
|
|
+// }
|
|
|
+// $url = $imported_from->getAttribute('rdf:resource');
|
|
|
+// if ($url) {
|
|
|
+// $vocabs['db'][$db_name] = $url;
|
|
|
+// }
|
|
|
+// return;
|
|
|
+// }
|
|
|
+
|
|
|
+// // Insert a new cvterm record.
|
|
|
+// $cvterm_name = '';
|
|
|
+// $definition = '';
|
|
|
+
|
|
|
+// $cvterm_name = $stanza->getChild('rdfs:label');
|
|
|
+// if ($cvterm_name) {
|
|
|
+// $cvterm_name = $stanza->getValue();
|
|
|
+// }
|
|
|
+// $definition = $stanza->getChild('obo:IAO_0000115');
|
|
|
+// if ($definition) {
|
|
|
+// $definition = $stanza->getValue();
|
|
|
+// }
|
|
|
+
|
|
|
+// $term = array(
|
|
|
+// 'id' => $db->name .':'. $dbxref->accession,
|
|
|
+// 'name' => $cvterm_name,
|
|
|
+// 'cv_name' => $cv->name,
|
|
|
+// 'definition' => $definition,
|
|
|
+// );
|
|
|
+// $option =array();
|
|
|
+// if ($vocabs['this'] != $db->name){
|
|
|
+// $option['update_existing'] = FALSE;
|
|
|
+// }
|
|
|
+// $cvterm = tripal_insert_cvterm($term, $option);
|
|
|
+// }
|
|
|
+}
|
|
|
+/**
|
|
|
+ *
|
|
|
+ * @param
|
|
|
+ * $stanza
|
|
|
+ * @param
|
|
|
+ * $vocabs
|
|
|
+ * @throws Exception
|
|
|
+ */
|
|
|
+function tripal_owl_handle_description($stanza, $vocabs) {
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ *
|
|
|
+ * The function goes through owl:Class stanza to insert new vocabularies.
|
|
|
+ *
|
|
|
+ * @param $stanza The
|
|
|
+ * OWLStanza object for the current stanza from the OWL file.
|
|
|
+ * @param
|
|
|
+ * $vocabs
|
|
|
+ *
|
|
|
+ * @throws Exception
|
|
|
+ */
|
|
|
+function tripal_owl_handle_class(OWLStanza $stanza, $vocabs) {
|
|
|
+
|
|
|
+ // Initialize the database and cv variables.
|
|
|
+ $db_name = $vocabs['this'];
|
|
|
+ $accession = '';
|
|
|
+ $is_a = '';
|
|
|
+ $namespace_cv = $vocabs[$db_name]['cv'];
|
|
|
+ $db = $vocabs[$db_name]['db'];
|
|
|
+
|
|
|
+ // Insert the dbxref record into Chado using the owl:Class stanza of the owl file.
|
|
|
+ // Any oboInOwl:id supercedes what we find in the rdf:about in the owl file.
|
|
|
+ $obo_id = $stanza->getChild('oboInOwl:id');
|
|
|
+
|
|
|
+ if ($obo_id) {
|
|
|
+ if (preg_match('/.*>(.+):(.+)<.*/', $about, $matches)) {
|
|
|
+ $db_name = strtoupper($matches[1]);
|
|
|
+ $accession = $matches[2];
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ $about = $stanza->getAttribute('rdf:about');
|
|
|
+ // We wrote the regular expression on the rdf.about line to get the
|
|
|
+ // db_name and accession for any particular Owl file.
|
|
|
+ if (preg_match('/.*\/(.+)_(.+)/', $about, $matches)) {
|
|
|
+ $db_name = strtoupper($matches[1]);
|
|
|
+ $accession = $matches[2];
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ throw new Exception("owl:Class stanza 'rdf:about' attribute is not formated as expected: '$about'. " . "This is necessary to determine the term's accession: \n\n" . $stanza->getXML());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // If the database name for this term is the same as the vocabulary
|
|
|
+ // we are trying to load, then do include it in the $vocabs array.
|
|
|
+ if ($db_name == $vocabs['this']) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ // insert dbxref
|
|
|
+ $values = array (
|
|
|
+ 'db_id' => $db->db_id,
|
|
|
+ 'accession' => $accession
|
|
|
+ );
|
|
|
+ print_r($values);
|
|
|
+ $dbxref = tripal_insert_dbxref($values);
|
|
|
+
|
|
|
+ $cvterm_name = $stanza->getChild('rdfs:label');
|
|
|
+ if ($cvterm_name) {
|
|
|
+ $cvterm_name = $stanza->getValue();
|
|
|
+ }
|
|
|
+ print_r($cvterm_name);
|
|
|
+
|
|
|
+ $definition = $stanza->getChild('obo:IAO_0000115');
|
|
|
+ if ($definition) {
|
|
|
+ $definition = $stanza->getValue();
|
|
|
+ }
|
|
|
+
|
|
|
+ $term = array (
|
|
|
+ 'id' => $db->name . ':' . $dbxref->accession,
|
|
|
+ 'name' => $db->name,
|
|
|
+ 'cv_name' => $stanza->getValue(),
|
|
|
+ 'definition' => $stanza->getValue(),
|
|
|
+ );
|
|
|
+ print_r($term);
|
|
|
+
|
|
|
+ $options = array ();
|
|
|
+ if ($vocabs['this'] != $db->name) {
|
|
|
+ $options['update_existing'] = FALSE;
|
|
|
+ }
|
|
|
+ $cvterm = tripal_insert_cvterm($term, $options);
|
|
|
+
|
|
|
+ // // Add a record to the chado relationship table if an ‘rdfs:subClassOf’ child exists.
|
|
|
+
|
|
|
+ // $cvterm_name = $stanza->getChild('rdfs:subClassOf');
|
|
|
+
|
|
|
+ // Insert a new cvterm record.
|
|
|
+ // $cvterm_name = '';
|
|
|
+ // $definition = '';
|
|
|
+ // $cvterm_name = $stanza->getChild('rdfs:label');
|
|
|
+ // if ($cvterm_name) {
|
|
|
+ // $cvterm_name = $stanza->getValue();
|
|
|
+ //}
|
|
|
+
|
|
|
+ // $definition = $stanza->getChild('obo:IAO_0000115');
|
|
|
+ // if ($definition) {
|
|
|
+ //$definition = $stanza->getValue();
|
|
|
+ //}
|
|
|
+
|
|
|
+
|
|
|
+ // $term = array (
|
|
|
+ // 'id' => $db->name . ':' . $dbxref->accession,
|
|
|
+ // 'name' => $cvterm_name,
|
|
|
+ // 'cv_name' => $cv->name,
|
|
|
+ // 'definition' => $definition
|
|
|
+ // );
|
|
|
+ // $option = array ();
|
|
|
+ // if ($vocabs['this'] != $db->name) {
|
|
|
+ // $option['update_existing'] = FALSE;
|
|
|
+ // }
|
|
|
+ // $cvterm = tripal_insert_cvterm($term, $option);
|
|
|
+ }
|