Quellcode durchsuchen

Merge pull request #1117 from tripal/1116-tv3-edam_import

Fixes to load EDAM ontology.
Lacey-Anne Sanderson vor 4 Jahren
Ursprung
Commit
e2b5386bd0
1 geänderte Dateien mit 54 neuen und 8 gelöschten Zeilen
  1. 54 8
      tripal_chado/includes/TripalImporter/OBOImporter.inc

+ 54 - 8
tripal_chado/includes/TripalImporter/OBOImporter.inc

@@ -160,6 +160,15 @@ class OBOImporter extends TripalImporter {
    */
   private $default_namespace = '';
 
+
+  /**
+   * Holds the idspace elements from the header. These will correspond
+   * to the accession prefixes, or short names (e.g. GO) for the terms. For
+   * example, the EDAM vocabulary has several id spaces:
+   * format, data, operation and topic.
+   */
+  private $idspaces = [];
+
   /**
    * The default database prefix for this ontology.
    *
@@ -841,6 +850,7 @@ class OBOImporter extends TripalImporter {
   private function setDefaults($header) {
     $short_name = '';
     $namespace = '';
+    $idspaces = [];
 
     // Get the 'ontology' and 'default-namespace' headers.  Unfortunately,
     // not all OBO files contain these.
@@ -850,7 +860,19 @@ class OBOImporter extends TripalImporter {
     if (array_key_exists('default-namespace', $header)) {
       $namespace = $header['default-namespace'][0];
     }
-
+    if (array_key_exists('idspace', $header)) {
+      $matches = [];
+      foreach ($header['idspace'] as $idspace) {
+        if (preg_match('/^(.+?)\s+(.+?)\s+"(.+)"$/', $idspace, $matches)) {
+          $idspaces[$matches[1]]['url'] = $matches[2];
+          $idspaces[$matches[1]]['description'] = $matches[3];
+        }
+        elseif (preg_match('/^(.+?)\s+(.+?)$/', $idspace, $matches)) {
+          $idspaces[$matches[1]]['url'] = $matches[2];
+          $idspaces[$matches[1]]['description'] = '';
+        }
+      }
+    }
     // The OBO specification allows the 'ontology' header tag to be nested for
     // subsets (e.g. go/subsets/goslim_plant).  We need to simplify that down
     // to the top-level item.
@@ -901,7 +923,7 @@ class OBOImporter extends TripalImporter {
 
     // If we can't find the namespace or the short_name then bust.
     if (!$namespace and !$short_name) {
-      throw new ErrorException('Cannot determine the namespace or ontology prefix from this OBO file. It is missing both the "default-namespace" and "ontology" headers.');
+      throw new ErrorException('Cannot determine the namespace or ontology prefix from this OBO file. It is missing both the "default-namespace" or a compatible "ontology" header.');
     }
 
     // Set the defaults.
@@ -910,7 +932,12 @@ class OBOImporter extends TripalImporter {
     $this->addDB($this->default_db);
     $cv = $this->addCV($this->default_namespace);
     $this->obo_namespaces[$namespace] = $cv->cv_id;
+    $this->idspaces = $idspaces;
 
+    // Add a new database for each idspace.
+    foreach ($idspaces as $shortname => $idspace) {
+      $this->addDB($shortname, $idspace['url'], $idspace['description']);
+    }
   }
 
   /**
@@ -1026,7 +1053,7 @@ class OBOImporter extends TripalImporter {
       $ontology_results = drupal_json_decode($response->data);
       if ($ontology_results['error']) {
 
-        $this->logMessage('Cannot find the ontology via an EBI OLS lookup: !short_name. \n' .
+        $this->logMessage(t('Cannot find the ontology via an EBI OLS lookup: !short_name. \n' .
           'We tried to access: !url' .
           'EBI Reported: !message. ' .
           'Consider finding the OBO file for this ontology and manually loading it first.',
@@ -1034,7 +1061,7 @@ class OBOImporter extends TripalImporter {
             '!message' => $ontology_results['message'],
             '!short_name' => $short_name,
             '!url' => $full_url,
-          ], TRIPAL_WARNING);
+          ]), TRIPAL_WARNING);
       }
       //What should happen with this stuff?
       $base_iri = $ontology_results['config']['baseUris'][0];
@@ -1784,7 +1811,8 @@ class OBOImporter extends TripalImporter {
       $accession = $matches[2];
 
       // If the term is borrowed then let's try to deal with it.
-      if ($short_name != $this->default_db) {
+      $idspaces = array_keys($this->idspaces);
+      if ($short_name != $this->default_db and !in_array($short_name, $idspaces)) {
 
         // First try to lookup the term and replace the stanza with the updated
         // details.
@@ -2079,6 +2107,11 @@ class OBOImporter extends TripalImporter {
 
           // If this term has a namespace then we want to keep track of it.
           if (array_key_exists('namespace', $stanza)) {
+            // Fix the namespace for EDAM terms so they all use the same
+            // namespacke (i.e. cv record).
+            if ($this->default_namespace == 'EDAM') {
+              $stanza['namespace'][0] = 'EDAM';
+            }
             $namespace = $stanza['namespace'][0];
             $cv = $this->all_cvs[$namespace];
             $this->obo_namespaces[$namespace] = $cv->cv_id;
@@ -2104,6 +2137,11 @@ class OBOImporter extends TripalImporter {
         continue;
       }
 
+      // For EDAM, we have to unfortuantely hard-code a fix as the
+      // short names of terms are correct.
+      $line = preg_replace('/EDAM_(\w+)/', '\1', $line);
+
+
       // break apart the line into the tag and value but ignore any escaped colons
       preg_replace("/\\:/", "|-|-|", $line); // temporarily replace escaped colons
       $pair = explode(":", $line, 2);
@@ -2257,7 +2295,7 @@ class OBOImporter extends TripalImporter {
    * @return
    *   A Chado database object.
    */
-  private function addDB($dbname) {
+  private function addDB($dbname, $url = '',  $description = '') {
     // Add the database if it doesn't exist.
     $db = NULL;
     if (array_key_exists($dbname, $this->all_dbs)) {
@@ -2266,7 +2304,14 @@ class OBOImporter extends TripalImporter {
     else {
       // If it's not in the cache we can assume it doesn't exist and insert.
       $db = new ChadoRecord('db');
-      $db->setValues(['name' => $dbname]);
+      $values = ['name' => $dbname];
+      if ($url) {
+        $values['url'] = $url;
+      }
+      if ($description) {
+        $values['description'] = $description;
+      }
+      $db->setValues($values);
       $db->insert();
       $db = (object) $db->getValues();
       $this->all_dbs[$dbname] = $db;
@@ -2341,7 +2386,8 @@ class OBOImporter extends TripalImporter {
     }
 
     if (!$accession) {
-      throw new Exception("Cannot add an Alt ID without an accession: '$alt_id'");
+      $this->logMessage("Cannot add an Alt ID without an accession: '!alt_id'", ['!alt_id' => $alt_id]);
+      return;
     }
 
     // Add the database if it doesn't exist.