소스 검색

Fixes to OBO loader

spficklin 12 년 전
부모
커밋
9e47f0db1a
5개의 변경된 파일418개의 추가작업 그리고 474개의 파일을 삭제
  1. 2 1
      tripal_bulk_loader/tripal_bulk_loader.loader.inc
  2. 222 109
      tripal_cv/api/tripal_cv.api.inc
  3. 107 284
      tripal_cv/includes/obo_loader.inc
  4. 51 35
      tripal_db/tripal_db.api.inc
  5. 36 45
      tripal_feature/includes/gff_loader.inc

+ 2 - 1
tripal_bulk_loader/tripal_bulk_loader.loader.inc

@@ -282,7 +282,8 @@ function tripal_bulk_loader_load_data($nid, $job_id) {
     print "\t\tOpen File...\n";
     $file = new SplFileObject($node->file, 'r'); 
     if (!$file) { 
-      watchdog('T_bulk_loader','Could not open file %file', array($node->file), WATCHDOG_ERROR);
+      watchdog('T_bulk_loader','Could not open file %file', 
+        array($node->file), WATCHDOG_ERROR);
       return;
     }
 

+ 222 - 109
tripal_cv/api/tripal_cv.api.inc

@@ -258,33 +258,41 @@ function tripal_cv_get_cvterm_options($cv_id = 0) {
  *
  * @ingroup tripal_cv_api
  */
-function tripal_cv_add_cv($name, $comment) {
-
+function tripal_cv_add_cv($name, $definition) {
+	
+	$ins_values = array(
+	  'name'       => $name,
+	  'definition' => $definition
+	);
+	
   // see if the CV (default-namespace) exists already in the database
-  $vocab = $name;
-  $remark = $comment;
-  $cv_sql = "SELECT * FROM {cv} WHERE name = '%s'";
-  $cv = db_fetch_object(db_query($cv_sql, $vocab));
-
-  // if the CV exists then update it, otherwise insert
-  if (!$cv) {
-    $sql = "INSERT INTO {cv} (name,definition) VALUES ('%s','%s')";
-    if (!db_query($sql, $vocab, $remark)) {
+  $sel_values = array('name' => $name);
+  $sel_options = array('statement_name' => 'sel_cv_na');
+  $results = tripal_core_chado_select('cv', array('*'), $sel_values, $sel_options);
+
+  // if it does not exists then add it
+  if (count($results) == 0) {  	
+    $ins_options = array('statement_name' => 'ins_cv_nade');
+  	$success = tripal_core_chado_insert('cv', $ins_values, $ins_options);
+    if (!$success) {
       watchdog('tripal_cv', "Failed to create the CV record", NULL, WATCHDOG_WARNING);
       return FALSE;
     }
-    $cv = db_fetch_object(db_query($cv_sql, $vocab));
+    $results = tripal_core_chado_select('cv', array('*'), $sel_values, $sel_options);
   }
+  // if it already exists then do an update
   else {
-    $sql = "UPDATE {cv} SET definition = '%s' WHERE name ='%s'";
-    if (!db_query($sql, $remark, $vocab)) {
+    $upd_options = array('statement_name' => 'upd_cv_nade');
+  	$success = tripal_core_chado_update('cv', $sel_values, $ins_values, $upd_options);
+    if (!$success) {
       watchdog('tripal_cv', "Failed to update the CV record", NULL, WATCHDOG_WARNING);
       return FALSE;
     }
-    $cv = db_fetch_object(db_query($cv_sql, $vocab));
+    $results = tripal_core_chado_select('cv', array('*'), $sel_values, $sel_options);
   }
 
-  return $cv;
+  // return the cv object
+  return $results[0];
 }
 
 /**
@@ -329,17 +337,21 @@ function tripal_cv_add_cv($name, $comment) {
  *
  * @ingroup tripal_cv_api
  */
-function tripal_cv_add_cvterm($term, $defaultcv='', $is_relationship = 0, $update = 1, $dbname=NULL) {
-
+function tripal_cv_add_cvterm($term, $defaultcv = '_global', $is_relationship = 0, 
+  $update = 1, $dbname = 'internal') {
+  	
+  $connection = tripal_db_persistent_chado();
+  	
   // get the term properties
   $id = $term['id'];
   $name = $term['name'];
   $cvname = $term['namespace'];
   $definition = preg_replace('/^\"(.*)\"/', '\1', $term['def']);
   $is_obsolete = 0;
-  if (isset($term['is_obsolete']) and  strcmp($term['is_obsolete'], 'true') == 0) {
+  
+  if (isset($term['is_obsolete']) and strcmp($term['is_obsolete'], 'true') == 0) {
     $is_obsolete = 1;
-  }
+  }  
   if (!$name and !$id) {
     watchdog('tripal_cv', "Cannot find cvterm without 'id' or 'name'", NULL, WATCHDOG_WARNING);
     return 0;
@@ -353,57 +365,55 @@ function tripal_cv_add_cvterm($term, $defaultcv='', $is_relationship = 0, $updat
   if (!$cvname) {
     $cvname = $defaultcv;
   }
-  // make sure the CV name exists
-  $cv = tripal_cv_add_cv($cvname, '');
-  if (!$cv) {
-    watchdog('tripal_cv', "Cannot find namespace '$cvname' when adding/updating $id", NULL, WATCHDOG_WARNING);
-    return 0;
-  }
-
-  // this SQL statement will be used a lot to find a cvterm so just set it
-  // here for easy reference below.  Because CV terms can change their names
-  // but accessions don't change, the following SQL finds cvterms based on
-  // their accession rather than the name
-  $cvtermsql = "SELECT CVT.name, CVT.cvterm_id, DB.name as dbname, DB.db_id
-                FROM {cvterm} CVT
-                  INNER JOIN {dbxref} DBX on CVT.dbxref_id = DBX.dbxref_id
-                  INNER JOIN {db} DB on DBX.db_id = DB.db_id
-                  INNER JOIN {cv} CV on CV.cv_id = CVT.cv_id
-                WHERE DBX.accession = '%s' and DB.name = '%s'";
-
-  // get the accession and the database from the cvterm
+  
+  // get the accession and the database from the cvterm id
   if ($dbname) {
     $accession = $id;
   }
-  elseif (preg_match('/^.+?:.*$/', $id)) {
+  
+  if (preg_match('/^.+?:.*$/', $id)) {
     $accession = preg_replace('/^.+?:(.*)$/', '\1', $id);
     $dbname = preg_replace('/^(.+?):.*$/', '\1', $id);
   }
+  
+  // check that we have a database name, give a different message if it's a relationship
   if ($is_relationship and !$dbname) {
-    $accession = $id;
-    // because this is a relationship cvterm first check to see if it
-    // exists in the relationship ontology. If it does then return the cvterm.
-    //  If not then set the dbname to _global and we'll add it or find it there
-    $cvterm = db_fetch_object(db_query($cvtermsql, $accession, 'OBO_REL'));
-    if ($cvterm) {
-      return $cvterm;
-    }
-    else {
-      // next check if this term is in the _global ontology.  If it is then
-      // return it no matter what the original CV
-      $dbname = '_global';
-
-      $cvterm = db_fetch_object(db_query($cvtermsql, $accesion, $dbname));
-      if ($cvterm) {
-        return $cvterm;
-      }
-    }
+    watchdog('tripal_cv', "A database name is not provided for this relationship term: $id", NULL, WATCHDOG_WARNING);
+    return 0; 
   }
   if (!$is_relationship and !$dbname) {
     watchdog('tripal_cv', "A database identifier is missing from the term: $id", NULL, WATCHDOG_WARNING);
     return 0;
   }
+  
+  // make sure the CV name exists
+  $cv = tripal_cv_add_cv($cvname, '');
+  if (!$cv) {
+    watchdog('tripal_cv', "Cannot find namespace '$cvname' when adding/updating $id", NULL, WATCHDOG_WARNING);
+    return 0;
+  }
 
+  // this SQL statement will be used a lot to find a cvterm so just set it
+  // here for easy reference below.  Because CV terms can change their names
+  // but accessions don't change, the following SQL finds cvterms based on
+  // their accession rather than the name
+  if (!tripal_core_is_sql_prepared('sel_cvterm_by_accession')) {
+	  $pcvtermsql = "
+	    PREPARE sel_cvterm_by_accession(text, text) AS
+	    SELECT CVT.name, CVT.cvterm_id, CV.cv_id, CV.name as cvname, 
+	      DB.name as dbname, DB.db_id, DBX.accession 
+	    FROM cvterm CVT
+	      INNER JOIN dbxref DBX on CVT.dbxref_id = DBX.dbxref_id
+	      INNER JOIN db DB on DBX.db_id = DB.db_id
+	      INNER JOIN cv CV on CV.cv_id = CVT.cv_id
+	    WHERE DBX.accession = $1 and DB.name = $2";
+	  if(!chado_query($pcvtermsql)){
+	    watchdog('tripal_cv', "Cannot prepare statement 'sel_cvterm_by_accession'", NULL, WATCHDOG_WARNING);
+      return 0; 
+	  }
+  } 
+  $cvtermsql = "EXECUTE sel_cvterm_by_accession('%s','%s')";  
+  
   // add the database. The function will just return the DB object if the
   // database already exists.
   $db = tripal_db_add_db($dbname);
@@ -412,28 +422,146 @@ function tripal_cv_add_cvterm($term, $defaultcv='', $is_relationship = 0, $updat
     return 0;
   }
 
-
-  // if the cvterm doesn't exist then add it otherwise just update it
-  $cvterm = db_fetch_object(db_query($cvtermsql, $accession, $dbname));
+  // the cvterm table has two unique dependencies. We need to check both.
+  // first check the (name, cv_id, is_obsolete) constraint
+  $values = array(
+     'name' => $name,
+     'is_obsolete' => $is_obsolete,
+     'cv_id' => array(
+       'name' => $cvname,
+     ),
+  );
+  $options = array('statement_name' => 'sel_cvterm_c1');
+  $result = tripal_core_chado_select('cvterm', array('*'), $values, $options);
+  
+  // if the constraint is met then let's check it to see if
+  // the database name matches the one we have been provided
+  if (count($result) == 1) {
+  	$cvterm = $result[0];
+  	
+  	// get the dbxref
+    $values = array('dbxref_id' => $cvterm->dbxref_id);
+    $options = array('statement_name' => 'sel_dbxref_id');
+    $result = tripal_core_chado_select('dbxref', array('*'), $values, $options);
+    $dbxref = $result[0];
+    
+    // get the db
+    $values = array('db_id' => $dbxref->db_id);
+    $options = array('statement_name' => 'sel_db_id');
+    $result = tripal_core_chado_select('db', array('*'), $values, $options);
+    $db_check = $result[0];
+    
+    // the database name for this existing term does not match that of the 
+    // one provided to this function.  The CV name matches otherwise we
+    // wouldn't have made it this far. So, let's swap the database for
+    // this term
+    if ($db_check->name != $db->name) {
+    	
+    	// look to see if the correct dbxref record already exists for 
+    	// this database
+    	$values = array(
+    	  'db_id' => $db->db_id,
+    	  'accession' => $accession,    	 
+    	);
+    	$options = array('staement_name' => 'sel_dbxref_idac');
+    	$result = tripal_core_chado_select('dbxref', array('*'), $values, $options);
+    	
+    	// if we already have a good dbxref then we want to update our cvterm to use this dbxref
+    	if (count($result) > 0) {
+    		$dbxref = $result[0];
+    	  $match = array('cvterm_id' => $cvterm->cvterm_id);
+	      $values = array('dbxref_id' => $dbxref->dbxref_id);
+	      $options = array('statement_name' => 'upd_cvterm_db');
+	      $success = tripal_core_chado_update('cvterm', $match, $values, $options); 
+	      if (!$success) {
+	        watchdog('tripal_cv', "Failed to correct the dbxref id for the $cvterm, " .
+	          "$name (id: $accession), for database $dbname", NULL, WATCHDOG_WARNING);
+	        return 0;
+	      }
+    	}
+    	// if we don't have the record then we want to delete our cvterm and let the code
+    	// below recreate it with the correct info 
+    	else {
+        $match = array('cvterm_id' => $cvterm->cvterm_id);
+    		tripal_core_chado_delete('cvterm', $match);
+    	}    	
+    }
+    
+    // check that the accession matches.  Sometimes an OBO can define the same term 
+    // multiple times but with different accessions.  If this is the case we
+    // can't do an insert or it will violate the constraint in the cvterm table.
+    // so we'll need to add the record to the cvterm_dbxref table instead
+    if ($dbxref->accession != $accession) {
+    	
+    	// get/add the dbxref fort his term
+      $dbxref_new =  tripal_db_add_dbxref($db->db_id, $accession);
+      if (!$dbxref_new) {
+        watchdog('tripal_cv', "Failed to find or insert the dbxref record for cvterm, " .
+          "$name (id: $accession), for database $dbname", NULL, WATCHDOG_WARNING);
+        return 0;
+      }
+      
+      // check to see if the cvterm_dbxref record already exists
+      $values = array(
+        'cvterm_id' => $cvterm->cvterm_id,
+        'dbxref_id' => $dbxref_new->dbxref_id,
+        'is_for_definition' => 1,
+      );
+      $options = array('statement_name' => 'sel_cvtermdbxref_cvdbis');      
+      $result = tripal_core_chado_select('cvterm_dbxref', array('*'), $values, $options);
+      
+      // if the cvterm_dbxref record does not exists then add it 
+      if (count($result)==0) {
+      	$options = array('statement_name' => 'ins_cvtermdbxref_cvdbis');
+	      $success = tripal_core_chado_insert('cvterm_dbxref', $values, $options);
+	      if(!$success){
+	        watchdog('tripal_cv', "Failed to find or insert the cvterm_dbxref record for a ". 
+	          "duplicated cvterm:  $name (id: $accession), for database $dbname", NULL, WATCHDOG_WARNING);
+	        return 0;
+	      }
+      }	      
+	    // get the original cvterm with the same name and return that.
+	    $cvterm = db_fetch_object(chado_query($cvtermsql, $dbxref->accession, $dbname));
+	    return $cvterm;
+    }
+    
+    // continue on, we've fixed the record if the db_id did not match, 
+    // we can now perform and updated if we need to.
+  }
+  
+  // get the CVterm record
+  $cvterm = db_fetch_object(chado_query($cvtermsql, $accession, $dbname)); 
+  //print "$pcvtermsql\n$cvtermsql\n$accession, $dbname\n";
+  //print "CVTERM:\n";
+  //print_r($cvterm);
   if (!$cvterm) {
-    // check to see if the dbxref exists if not, add it
+
+  	// check to see if the dbxref exists if not, add it
     $dbxref =  tripal_db_add_dbxref($db->db_id, $accession);
     if (!$dbxref) {
-      watchdog('tripal_cv', "Failed to find or insert the dbxref record for cvterm, $name (id: $accession), for database $dbname", NULL, WATCHDOG_WARNING);
+      watchdog('tripal_cv', "Failed to find or insert the dbxref record for cvterm, " .
+        "$name (id: $accession), for database $dbname", NULL, WATCHDOG_WARNING);
       return 0;
     }
-
+    
     // check to see if the dbxref already has an entry in the cvterm table
-    $sql = "SELECT * FROM {cvterm} WHERE dbxref_id = %d";
-    $check = db_fetch_object(db_query($sql, $dbxref->dbxref_id));
-    if (!$check) {
+    // this is the second constraint in the cvterm table
+    $values = array('dbxref_id' => $dbxref->dbxref_id);
+    $options = array('statement_name' => 'sel_cvterm_db');
+    $check = tripal_core_chado_select('cvterm', array('cvterm_id'), $values, $options);
+    if (count($check) == 0) {    	
       // now add the cvterm
-      $sql = "
-        INSERT INTO {cvterm} (cv_id, name, definition, dbxref_id,
-           is_obsolete, is_relationshiptype)
-        VALUES (%d,'%s','%s',%d,%d,%d)
-      ";
-      if (!db_query($sql, $cv->cv_id, $name, $definition, $dbxref->dbxref_id, $is_obsolete, $is_relationship)) {
+      $ins_values = array(
+        'cv_id'                => $cv->cv_id,
+        'name'                 => $name,
+        'definition'           => $definition,
+        'dbxref_id'            => $dbxref->dbxref_id,
+        'is_obsolete'          => $is_obsolete,
+        'is_relationshiptype'  => $is_relationship, 
+      );
+      $ins_options = array('statement_name' => 'ins_cvterm_all');
+      $success = tripal_core_chado_insert('cvterm', $ins_values, $ins_options);
+      if (!$success) {
         if (!$is_relationship) {
           watchdog('tripal_cv', "Failed to insert the term: $name ($dbname)", NULL, WATCHDOG_WARNING);
           return 0;
@@ -444,47 +572,32 @@ function tripal_cv_add_cvterm($term, $defaultcv='', $is_relationship = 0, $updat
         }
       }
     }
-    // if the dbxref already exists check to make sure it exists for the correct databaes name
-    // if it does then we're good and we don't need to do anything
-    elseif ($check and strcmp($check->name, $name) == 0) {
-      // this entry already exists. We're good, so do nothing
-    }
-    // if the dbxref exists but does not map to the same database name
-    elseif ($check and strcmp($check->name, $name) != 0) {
-      watchdog(
-        'tripal_cv',
-        "The dbxref already exists in the cvterm table. DBXREF ID: $dbxref->dbxref_id, ACCESSION: $accession. DB:  '" . $dbxref->db_name . "'. term '$name'. The requested db was '$dbname'",
-        NULL,
-        WATCHDOG_WARNING);
-      return FALSE;
-    }
-    $cvterm = db_fetch_object(db_query($cvtermsql, $name, $dbname));
-    if (!$is_relationship) {
-      drupal_set_message("Added CV term: $name ($dbname)");
-    }
+    // this dbxref already exists in the cvterm table 
     else {
-      drupal_set_message("Added relationship CV term: $name ($dbname)");
-    }
-    return $cvterm;
+      watchdog('tripal_cv', "The dbxref already exists for another cvterm record: $name (cv: " . $cvname . " db: $dbname)", NULL, WATCHDOG_WARNING);
+      return 0;
+    }    
+    $cvterm = db_fetch_object(chado_query($cvtermsql, $accession, $dbname));
   }
-  elseif ($update) { // update the cvterm
-    $sql = "
-       UPDATE {cvterm} SET name='%s', definition='%s',
-          is_obsolete = %d, is_relationshiptype = %d
-       WHERE cvterm_id = %d
-    ";
-    if (!db_query($sql, $term['name'], $definition, $is_obsolete, $is_relationship, $cvterm->cvterm_id)) {
+  // upate the cvterm
+  elseif ($update) { 
+  	$match = array('cvterm_id' => $cvterm->cvterm_id);
+  	$upd_values = array(
+        'name'                => $name,
+        'definition'          => $definition,
+        'is_obsolete'         => $is_obsolete,
+        'is_relationshiptype' => $is_relationship, 
+    );
+    $upd_options = array('statement_name' => 'upd_cvterm_nadeisis');
+    $success = tripal_core_chado_update('cvterm', $match, $upd_values, $upd_options);    
+    if (!$success) {
       watchdog('tripal_cv', "Failed to update the term: $name", NULL, WATCHDOG_WARNING);
       return FALSE;
     }
-    $cvterm = db_fetch_object(db_query($cvtermsql, $name, $dbname));
-    if (!$is_relationship) {
-      drupal_set_message("Updated CV term: $name ($dbname)");
-    }
-    else {
-      drupal_set_message("Updated relationship CV term: $name ($dbname)");
-    }
-    return $cvterm;
+    $cvterm = db_fetch_object(chado_query($cvtermsql, $accession, $dbname));
+  } 
+  else {
+     // do nothing, we have the cvterm but we don't want to update
   }
   // return the cvterm
   return $cvterm;

+ 107 - 284
tripal_cv/includes/obo_loader.inc

@@ -114,7 +114,7 @@ function tripal_cv_load_update_cvtermpath($newcvs, $jobid) {
 }
 
 /**
- *
+ * Add the obo to the tripal_cv_obo table in the Drupal database
  */
 function tripal_cv_load_obo_add_ref($name, $path) {
   $isql = "INSERT INTO {tripal_cv_obo} (name,path) VALUES ('%s','%s')";
@@ -126,29 +126,30 @@ function tripal_cv_load_obo_add_ref($name, $path) {
  * @ingroup tripal_obo_loader
  */
 function tripal_cv_load_obo_v1_2($file, $jobid = NULL, &$newcvs) {
-  global $_tripal_cv_previous_db;
+  
+  // we need to get a persistent connection.  If one exists this function
+  // will not recreate it, but if not it will create one and store it in
+  // a Drupal variable for reuse later.
+  $connection = tripal_db_persistent_chado();
 
   $header = array();
   $obo = array();
 
-  print "Opening File $file\n";
-
-  // set the search path
-  $_tripal_cv_previous_db = tripal_db_set_active('chado');
+  print "Opening File $file\n";  
 
   // make sure we have an 'internal' and a '_global' database
-  if (!tripal_cv_obo_add_db('internal')) {
+  if (!tripal_db_add_db('internal')) {
     tripal_cv_obo_quiterror("Cannot add 'internal' database");
   }
-  if (!tripal_cv_obo_add_db('_global')) {
+  if (!tripal_db_add_db('_global')) {
     tripal_cv_obo_quiterror("Cannot add '_global' database");
   }
 
   // parse the obo file
-  tripal_cv_obo_parse($file, $obo, $header);
+  $default_db = tripal_cv_obo_parse($file, $obo, $header);
 
   // add the CV for this ontology to the database
-  $defaultcv = tripal_cv_obo_add_cv($header['default-namespace'][0], '');
+  $defaultcv = tripal_cv_add_cv($header['default-namespace'][0], '');
   if (!$defaultcv) {
     tripal_cv_obo_quiterror('Cannot add namespace ' . $header['default-namespace'][0]);
   }
@@ -157,16 +158,21 @@ function tripal_cv_load_obo_v1_2($file, $jobid = NULL, &$newcvs) {
   // add any typedefs to the vocabulary first
   $typedefs = $obo['Typedef'];
   foreach ($typedefs as $typedef) {
-    tripal_cv_obo_process_term($typedef, $defaultcv, $obo, 1, $newcvs);
-  }
-
+  	$t = array();
+  	$t['id']     = $typedef['id'][0];
+  	$t['name']   = $typedef['name'][0];
+  	$t['def']    = $typedef['def'][0];
+  	$t['subset'] = $typedef['subset'][0];
+    tripal_cv_obo_process_term($t, $defaultcv->name, $obo, 1, $newcvs, $default_db);
+  }
+  
   // next add terms to the vocabulary
   $terms = $obo['Term'];
-  if (!tripal_cv_obo_process_terms($terms, $defaultcv, $obo, $jobid, $newcvs)) {
+  if (!tripal_cv_obo_process_terms($terms, $defaultcv->name, $obo, $jobid, $newcvs, $default_db)) {
     tripal_cv_obo_quiterror('Cannot add terms from this ontology');
   }
 
-  return tripal_cv_obo_loader_done();
+  return;
 }
 
 /**
@@ -175,8 +181,7 @@ function tripal_cv_load_obo_v1_2($file, $jobid = NULL, &$newcvs) {
  */
 function tripal_cv_obo_quiterror($message) {
 
-  print "ERROR: $message\n";
-  db_query("set search_path to public");
+  watchdog("T_obo_loader",$message, array(), WATCHDOG_ERROR);;
   exit;
 
 }
@@ -185,20 +190,7 @@ function tripal_cv_obo_quiterror($message) {
  *
  * @ingroup tripal_obo_loader
  */
-function tripal_cv_obo_loader_done() {
-
-  // return the search path to normal
-  tripal_db_set_active($_tripal_cv_previous_db);
-  db_query("set search_path to public");
-
-  return '';
-}
-
-/**
- *
- * @ingroup tripal_obo_loader
- */
-function tripal_cv_obo_process_terms($terms, $defaultcv, $obo, $jobid = NULL, &$newcvs) {
+function tripal_cv_obo_process_terms($terms, $defaultcv, $obo, $jobid = NULL, &$newcvs, $default_db) {
 
   $i = 0;
   $count = sizeof($terms);
@@ -207,20 +199,39 @@ function tripal_cv_obo_process_terms($terms, $defaultcv, $obo, $jobid = NULL, &$
     $interval = 1;
   }
 
+  // iterate through each term from the OBO file and add it
+  print "Loading terms...\n";
   foreach ($terms as $term) {
 
     // update the job status every 1% terms
     if ($jobid and $i % $interval == 0) {
-      tripal_job_set_progress($jobid, intval(($i / $count) * 50)); // we mulitply by 50 because parsing and loacing cvterms
-                                                                   // is only the first half.  The other half is updating
-    }                                                              // the cvtermpath table.
-
-    if (!tripal_cv_obo_process_term($term, $defaultcv, $obo, 0, $newcvs)) {
+    	$complete = ($i / $count) * 100;
+      tripal_job_set_progress($jobid, intval($complete)); 
+      printf("%d of %d records. (%0.2f%%) memory: %d\r", $i, $count, $complete, memory_get_usage());                                                             
+    }                                 
+    
+    // add/update this term
+    $t = array();
+    $t['id']          = $term['id'][0];
+    $t['name']        = $term['name'][0];
+    $t['def']         = $term['def'][0];
+    $t['subset']      = $term['subset'][0];
+    $t['namespace']   = $term['namespace'][0];
+    $t['is_obsolete'] = $term['is_obsolete'][0];    
+    if (!tripal_cv_obo_process_term($t, $defaultcv, $obo, 0, $newcvs, $default_db)) {
       tripal_cv_obo_quiterror("Failed to process terms from the ontology");
     }
 
     $i++;
   }
+  
+  // set the final status
+  if ($jobid) {
+    $complete = ($i / $count) * 100;
+    tripal_job_set_progress($jobid, intval($complete)); 
+    printf("%d of %d records. (%0.2f%%) memory: %d\r", $i, $count, $complete, memory_get_usage());
+  }                                                             
+  
   return 1;
 }
 
@@ -228,15 +239,15 @@ function tripal_cv_obo_process_terms($terms, $defaultcv, $obo, $jobid = NULL, &$
  *
  * @ingroup tripal_obo_loader
  */
-function tripal_cv_obo_process_term($term, $defaultcv, $obo, $is_relationship = 0, &$newcvs) {
+function tripal_cv_obo_process_term($term, $defaultcv, $obo, $is_relationship = 0, &$newcvs, $default_db) {
 
   // add the cvterm
-  $cvterm = tripal_cv_obo_add_cvterm($term, $defaultcv, $is_relationship, 1);
+  $cvterm = tripal_cv_add_cvterm($term, $defaultcv, $is_relationship, 1, $default_db);
   if (!$cvterm) {
-    tripal_cv_obo_quiterror("Cannot add the term " . $term['id'][0]);
+    tripal_cv_obo_quiterror("Cannot add the term " . $term['id']);
   }
-  if ($term['namespace'][0]) {
-    $newcvs[$term['namespace'][0]] = $cvterm->cv_id;
+  if ($term['namespace']) {
+    $newcvs[$term['namespace']] = $cvterm->cv_id;
   }
 
   // now handle other properites
@@ -361,55 +372,6 @@ function tripal_cv_obo_process_term($term, $defaultcv, $obo, $is_relationship =
   return 1;
 }
 
-/**
- * Add a database record
- * @ingroup tripal_obo_loader
- */
-function tripal_cv_obo_add_db($dbname) {
-
-  $db_sql = "SELECT * FROM {db} WHERE name ='%s'";
-  $db = db_fetch_object(db_query($db_sql, $dbname));
-  if (!$db) {
-    if (!db_query("INSERT INTO {db} (name) VALUES ('%s')", $dbname)) {
-      tripal_cv_obo_quiterror("Cannot create '$dbname' db in Chado.");
-    }
-    $db = db_fetch_object(db_query($db_sql, $dbname));
-  }
-
-  return $db;
-}
-
-/**
- * Add a controlled vocab record
- * @ingroup tripal_obo_loader
- */
-function tripal_cv_obo_add_cv($name, $comment) {
-
-  // see if the CV (default-namespace) exists already in the database
-  $vocab = $name;
-  $remark = $comment;
-  $cv_sql = "SELECT * FROM {cv} WHERE name = '%s'";
-  $cv = db_fetch_object(db_query($cv_sql, $vocab));
-
-  // if the CV exists then update it, otherwise insert
-  if (!$cv) {
-    $sql = "INSERT INTO {cv} (name,definition) VALUES ('%s','%s')";
-    if (!db_query($sql, $vocab, $remark)) {
-      tripal_cv_obo_quiterror("Failed to create the CV record");
-    }
-    $cv = db_fetch_object(db_query($cv_sql, $vocab));
-  }
-  else {
-    $sql = "UPDATE {cv} SET definition = '%s' WHERE name ='%s'";
-    if (!db_query($sql, $remark, $vocab)) {
-      tripal_cv_obo_quiterror("Failed to update the CV record");
-    }
-    $cv = db_fetch_object(db_query($cv_sql, $vocab));
-  }
-
-  return $cv;
-}
-
 /**
  * Add a cvterm relationship
  *
@@ -424,7 +386,7 @@ function tripal_cv_obo_add_relationship($cvterm, $defaultcv, $obo, $rel, $objnam
     'definition' => array(''),
     'is_obsolete' => array(0),
   );
-  $relcvterm = tripal_cv_obo_add_cvterm($term, $defaultcv, 1, 0);
+  $relcvterm = tripal_cv_add_cvterm($term, $defaultcv, 1, 0);
   if (!$relcvterm) {
     tripal_cv_obo_quiterror("Cannot find or insert the relationship term: $rel\n");
   }
@@ -434,17 +396,25 @@ function tripal_cv_obo_add_relationship($cvterm, $defaultcv, $obo, $rel, $objnam
   if (!$objterm) {
     tripal_cv_obo_quiterror("Could not find object term $objname\n");
   }
-  $objcvterm = tripal_cv_obo_add_cvterm($objterm, $defaultcv, $object_is_relationship, 1);
+  $objcvterm = tripal_cv_add_cvterm($objterm, $defaultcv, $object_is_relationship, 1);
   if (!$objcvterm) {
     tripal_cv_obo_quiterror("Cannot add/find cvterm");
   }
 
   // check to see if the cvterm_relationship already exists, if not add it
-  $cvrsql = "SELECT * FROM {cvterm_relationship} WHERE type_id = %d and subject_id = %d and object_id = %d";
-  if (!db_fetch_object(db_query($cvrsql, $relcvterm->cvterm_id, $cvterm->cvterm_id, $objcvterm->cvterm_id))) {
+  $values = array(
+    'type_id'    => $relcvterm->cvterm_id,
+    'subject_id' => $cvterm->cvterm_id,
+    'object_id'  => $objcvterm->cvterm_id
+  );
+  $options = array('statement_name' => 'sel_cvtermrelationship_tysuob');
+  $result = tripal_core_chado_select('cvterm_relationship', array('*'), $values, $options);
+  if (count($result) == 0) {
+  	$options = array('statement_name' => 'ins_cvtermrelationship_tysuob');
     $sql = "INSERT INTO {cvterm_relationship} ".
            "(type_id,subject_id,object_id) VALUES (%d,%d,%d)";
-    if (!db_query($sql, $relcvterm->cvterm_id, $cvterm->cvterm_id, $objcvterm->cvterm_id)) {
+    $success = tripal_core_chado_insert('cvterm_relationship', $values, $options);
+    if (!$success) {
       tripal_cv_obo_quiterror("Cannot add term relationship: '$cvterm->name' $rel '$objcvterm->name'");
     }
   }
@@ -476,32 +446,28 @@ function tripal_cv_obo_get_term($obo, $id) {
 function tripal_cv_obo_add_synonyms($term, $cvterm) {
 
   // make sure we have a 'synonym_type' vocabulary
-  $sql = "SELECT * FROM {cv} WHERE name='synonym_type'";
-  $syncv = db_fetch_object(db_query($sql));
-  if (!$syncv) {
-    $sql = "INSERT INTO {cv} (name,definition) VALUES ('synonym_type','')";
-    if (!db_query($sql)) {
-      tripal_cv_obo_quiterror("Failed to add the synonyms type vocabulary");
-    }
-    $syncv = db_fetch_object(db_query($sql));
-  }
+  $syncv = tripal_cv_add_cv('synonym_type');
 
   // now add the synonyms
   if (isset($term['synonym'])) {
     foreach ($term['synonym'] as $synonym) {
+    	
       // separate out the synonym definition and the synonym type
       $def = preg_replace('/^\s*"(.*)"\s*.*$/', '\1', $synonym);
       $type = drupal_strtolower(preg_replace('/^.*"\s+(.*?)\s+.*$/', '\1', $synonym));
 
       // make sure the synonym type exists in the 'synonym_type' vocabulary
-      $cvtsql = "
-        SELECT *
-        FROM {cvterm} CVT
-           INNER JOIN {cv} CV ON CVT.cv_id = CV.cv_id
-        WHERE CVT.name = '%s' and CV.name = '%s'
-      ";
-      $syntype = db_fetch_object(db_query($cvtsql, $type, 'synonym_type'));
-      if (!$syntype) {
+      $values = array(
+        'name' => $type,
+        'cv_id' => array(
+          'name' => 'synonym_type',
+        ),
+      );
+      $options = array('statement_name' => 'sel_cvterm_nacv');
+      $results = tripal_core_chado_select('cvterm', array('*'), $values, $options);
+
+      // if it doesn't exist then add it
+      if (count($results) == 0) {
         // build a 'term' object so we can add the missing term
         $term = array(
            'name' => array($type),
@@ -509,23 +475,28 @@ function tripal_cv_obo_add_synonyms($term, $cvterm) {
            'definition' => array(''),
            'is_obsolete' => array(0),
         );
-        $syntype = tripal_cv_obo_add_cvterm($term, $syncv, 0, 1);
+        $syntype = tripal_cv_add_cvterm($term, $syncv, 0, 1);
         if (!$syntype) {
           tripal_cv_obo_quiterror("Cannot add synonym type: internal:$type");
         }
       }
 
       // make sure the synonym doesn't already exists
-      $sql = "
-        SELECT *
-        FROM {cvtermsynonym}
-        WHERE cvterm_id = %d and synonym = '%s'
-      ";
-      $syn = db_fetch_object(db_query($sql, $cvterm->cvterm_id, $def));
-      if (!$syn) {
-        $sql = "INSERT INTO {cvtermsynonym} (cvterm_id,synonym,type_id)
-                VALUES(%d,'%s',%d)";
-        if (!db_query($sql, $cvterm->cvterm_id, $def, $syntype->cvterm_id)) {
+      $values = array(
+        'cvterm_id' => $cvterm->cvterm_id, 
+        'synonym' => $def
+      );
+      $options = array('statement_name' => 'sel_cvtermsynonym_cvsy');
+      $results = tripal_core_chado_select('cvtermsynonym', array('*'), $values, $options);
+      if (count($results) == 0) {
+      	$values = array(
+          'cvterm_id' => $cvterm->cvterm_id, 
+          'synonym' => $def,
+      	  'type_id' => $syntype->cvterm_id
+        );
+      	$options = array('statement_name' => 'ins_cvtermsynonym_cvsy');
+      	$success = tripal_core_chado_insert('cvtermsynonym', $values, $options);
+        if (!$success) {
           tripal_cv_obo_quiterror("Failed to insert the synonym for term: $name ($def)");
         }
       }
@@ -559,6 +530,7 @@ function tripal_cv_obo_parse($obo_file, &$obo, &$header) {
   $i = 0;
   $in_header = 1;
   $stanza = array();
+  $default_db = '_global';
 
   // iterate through the lines in the OBO file and parse the stanzas
   $fh = fopen($obo_file, 'r');
@@ -605,6 +577,13 @@ function tripal_cv_obo_parse($obo_file, &$obo, &$header) {
     $pair = explode(":", $line, 2);
     $tag = $pair[0];
     $value = ltrim(rtrim($pair[1]));// remove surrounding spaces
+    
+    // look for the default DB
+    $matches = array();
+    if ($tag == 'id' and preg_match('/^(.+?):.*$/', $value, $matches)) {
+       $default_db = $matches[1];
+    }
+    
     $tag = preg_replace("/\|-\|-\|/", "\:", $tag); // return the escaped colon
     $value = preg_replace("/\|-\|-\|/", "\:", $value);
     if ($in_header) {
@@ -632,6 +611,7 @@ function tripal_cv_obo_parse($obo_file, &$obo, &$header) {
       array_merge($obo[$type][$stanza['id'][0]], $stanza);
     }
   }
+  return $default_db;
 }
 
 /**
@@ -658,8 +638,8 @@ function tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref) {
     $dbname = 'URL';
   }
 
-  // check to see if the database exists
-  $db = tripal_cv_obo_add_db($dbname);
+  // add the database
+  $db = tripal_db_add_db($dbname);
   if (!$db) {
     tripal_cv_obo_quiterror("Cannot find database '$dbname' in Chado.");
   }
@@ -690,7 +670,7 @@ function tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref) {
 function tripal_cv_obo_add_cvterm_prop($cvterm, $property, $value, $rank) {
 
   // make sure the 'cvterm_property_type' CV exists
-  $cv = tripal_cv_obo_add_cv('cvterm_property_type', '');
+  $cv = tripal_cv_add_cv('cvterm_property_type', '');
   if (!$cv) {
     tripal_cv_obo_quiterror("Cannot add/find cvterm_property_type cvterm");
   }
@@ -709,7 +689,7 @@ function tripal_cv_obo_add_cvterm_prop($cvterm, $property, $value, $rank) {
       'definition' => array(''),
       'is_obsolete' => array(0),
     );
-    $cvproptype = tripal_cv_obo_add_cvterm($term, $cv, 0, 0);
+    $cvproptype = tripal_cv_add_cvterm($term, $cv, 0, 0);
     if (!$cvproptype) {
       tripal_cv_obo_quiterror("Cannot add cvterm property: internal:$property");
     }
@@ -733,163 +713,6 @@ function tripal_cv_obo_add_cvterm_prop($cvterm, $property, $value, $rank) {
 
 }
 
-/**
- *
- * @ingroup tripal_obo_loader
- */
-function tripal_cv_obo_add_cvterm($term, $defaultcv, $is_relationship = 0, $update = 1) {
-
-  // get the term properties
-  $id = $term['id'][0];
-  $name = $term['name'][0];
-  $cvname = $term['namespace'][0];
-  $definition = preg_replace('/^\"(.*)\"/', '\1', $term['def'][0]);
-  $is_obsolete = 0;
-  if (isset($term['is_obsolete'][0]) and  strcmp($term['is_obsolete'][0], 'true') == 0) {
-    $is_obsolete = 1;
-  }
-  if (!$cvname) {
-    $cvname = $defaultcv->name;
-  }
-
-  // make sure the CV name exists
-  $cv = tripal_cv_obo_add_cv($cvname, '');
-  if (!$cv) {
-    tripal_cv_obo_quiterror("Cannot find namespace '$cvname' when adding/updating $id");
-  }
-
-  // this SQL statement will be used a lot to find a cvterm so just set it
-  // here for easy reference below.
-  $cvtermsql = "SELECT CVT.name, CVT.cvterm_id, DB.name as dbname, DB.db_id
-                FROM {cvterm} CVT
-                  INNER JOIN {dbxref} DBX on CVT.dbxref_id = DBX.dbxref_id
-                  INNER JOIN {db} DB on DBX.db_id = DB.db_id
-                  INNER JOIN {cv} CV on CV.cv_id = CVT.cv_id
-                WHERE CVT.name = '%s' and DB.name = '%s'";
-
-  // get the accession and the database from the cvterm
-  if (preg_match('/^.+?:.*$/', $id)) {
-    $accession = preg_replace('/^.+?:(.*)$/', '\1', $id);
-    $dbname = preg_replace('/^(.+?):.*$/', '\1', $id);
-  }
-  if ($is_relationship and !$dbname) {
-    $accession = $id;
-    // because this is a relationship cvterm first check to see if it
-    // exists in the relationship ontology. If it does then return the cvterm.
-    //  If not then set the dbname to _global and we'll add it or find it there
-    $cvterm = db_fetch_object(db_query($cvtermsql, $name, 'OBO_REL'));
-    if ($cvterm) {
-      return $cvterm;
-    }
-    else {
-      // next check if this term is in the _global ontology.  If it is then
-      // return it no matter what the original CV
-      $dbname = '_global';
-
-      $cvterm = db_fetch_object(db_query($cvtermsql, $name, $dbname));
-      if ($cvterm) {
-        return $cvterm;
-      }
-    }
-  }
-  if (!$is_relationship and !$dbname) {
-    tripal_cv_obo_quiterror("A database identifier is missing from the term: $id");
-  }
-
-  // check to see if the database exists.
-  $db = tripal_cv_obo_add_db($dbname);
-  if (!$db) {
-    tripal_cv_obo_quiterror("Cannot find database '$dbname' in Chado.");
-  }
-
-
-  // if the cvterm doesn't exist then add it otherwise just update it
-  $cvterm = db_fetch_object(db_query($cvtermsql, $name, $dbname));
-  if (!$cvterm) {
-    // check to see if the dbxref exists if not, add it
-    $dbxref =  tripal_cv_obo_add_dbxref($db->db_id, $accession);
-    if (!$dbxref) {
-      tripal_cv_obo_quiterror("Failed to find or insert the dbxref record for cvterm, $name (id: $accession), for database $dbname");
-    }
-
-    // check to see if the dbxref already has an entry in the cvterm table
-    $sql = "SELECT * FROM {cvterm} WHERE dbxref_id = %d";
-    $check = db_fetch_object(db_query($sql, $dbxref->dbxref_id));
-
-    if (!$check) {
-      // now add the cvterm
-      $sql = "
-          INSERT INTO {cvterm} (cv_id, name, definition, dbxref_id,
-             is_obsolete, is_relationshiptype)
-          VALUES (%d,'%s','%s',%d,%d,%d)
-      ";
-      if (!db_query($sql, $cv->cv_id, $name, $definition,
-        $dbxref->dbxref_id, $is_obsolete, $is_relationship)) {
-        if (!$is_relationship) {
-          tripal_cv_obo_quiterror("Failed to insert the term: $name ($dbname)");
-        }
-        else {
-          tripal_cv_obo_quiterror("Failed to insert the relationship term: $name (cv: " . $cvname . " db: $dbname)");
-        }
-      }
-      if (!$is_relationship) {
-        print "Added CV term: $name ($dbname)\n";
-      }
-      else {
-        print "Added relationship CV term: $name ($dbname)\n";
-      }
-    }
-    elseif ($check and strcmp($check->name, $name)!=0) {
-      // this dbxref_id alrady exists in the database but the name is
-      // different.  We will trust that the OBO is correct and that there
-      // has been a name change for this dbxref, so we'll update
-      $sql = "
-          UPDATE {cvterm} SET name = '%s', definition = '%s',
-             is_obsolete = %d, is_relationshiptype = %d
-          WHERE dbxref_id = %d
-      ";
-      if (!db_query($sql, $name, $definition, $is_obsolete, $is_relationship, $dbxref->dbxref_id)) {
-        if (!$is_relationship) {
-          tripal_cv_obo_quiterror("Failed to update the term: $name ($dbname)");
-        }
-        else {
-          tripal_cv_obo_quiterror("Failed to update the relationship term: $name (cv: " . $cvname . " db: $dbname)");
-        }
-      }
-      if (!$is_relationship) {
-        print "Updated CV term: $name ($dbname)\n";
-      }
-      else {
-        print "Updated relationship CV term: $name ($dbname)\n";
-      }
-    }
-    elseif ($check and strcmp($check->name, $name)==0) {
-      // this entry already exists. We're good, so do nothing
-    }
-    $cvterm = db_fetch_object(db_query($cvtermsql, $name, $dbname));
-  }
-  elseif ($update) { // update the cvterm
-    $sql = "
-       UPDATE {cvterm} SET name='%s', definition='%s',
-          is_obsolete = %d, is_relationshiptype = %d
-       WHERE cvterm_id = %d
-    ";
-    if (!db_query($sql, $term['name'][0], $definition,
-      $is_obsolete, $is_relationship, $cvterm->cvterm_id)) {
-      tripal_cv_obo_quiterror("Failed to update the term: $name");
-    }
-    $cvterm = db_fetch_object(db_query($cvtermsql, $name, $dbname));
-    if (!$is_relationship) {
-      print "Updated CV term: $name ($dbname)\n";
-    }
-    else {
-      print "Updated relationship CV term: $name ($dbname)\n";
-    }
-  }
-
-  // return the cvterm
-  return $cvterm;
-}
 
 /**
  * Add Database Reference

+ 51 - 35
tripal_db/tripal_db.api.inc

@@ -294,67 +294,83 @@ function tripal_db_get_dbxref_by_accession($accession, $db_id=0) {
  *   then nothing is added.
  *
  * @return
- *   An object populated with fields from the newly added database.
+ *   An object populated with fields from the newly added database.  If the 
+ *   database already exists it returns the values in the current entry.
  *
  * @ingroup tripal_db_api
  */
-function tripal_db_add_db($dbname, $description='', $url='', $urlprefix='', $update=0) {
+function tripal_db_add_db($dbname, $description = '', $url = '', 
+  $urlprefix = '', $update = 0) {
 
-  $values = array(
+	// build the values array for inserting/updating
+  $ins_values = array(
     'name' => $dbname,
     'description' => $description,
     'url' => $url,
     'urlprefix' => $urlprefix
   );
-
-  $db_sql = "SELECT * FROM {db} WHERE name ='%s'";
-  $db = db_fetch_object(db_query($db_sql, $dbname));
-  if (!$db) {
-    if (!tripal_core_chado_insert('db', $values)) {
+	
+	// get the database record if it already exists
+	$sel_values = array('name' => $dbname);
+  $sel_options = array('statement_name' => 'sel_db_na');
+  $result = tripal_core_chado_select('db', array('*'), $sel_values, $sel_options);
+  
+  // if it does not exists then add it
+  if (count($result) == 0) {  	
+	  $ins_options = array('statement_name' => 'ins_db_nadeurur');	 
+	  $success = tripal_core_chado_insert('db', $ins_values, $ins_options); 
+    if (!$success) {
       watchdog('tripal_db', "Cannot create db '$dbname'.", NULL, WATCHDOG_WARNING);
       return 0;
     }
-    $db = tripal_core_chado_select('db', array('*'), $values);
+    $result = tripal_core_chado_select('db', array('*'), $sel_values, $sel_options);
   }
+  // if it exists and update is enabled the do the update
   elseif ($update) {
-    $match = array('db_id' => $db->db_id);
-    if (!tripal_core_chado_update('db', $match, $values)) {
+  	$upd_options = array('statement_name' => 'upd_db_nadeurur');
+  	$success = tripal_core_chado_update('db', $sel_values, $ins_values, $upd_options);
+    if (!$success) {
       watchdog('tripal_db', "Cannot update db '$dbname'.", NULL, WATCHDOG_WARNING);
       return 0;
     }
-    $db = tripal_core_chado_select('db', array('*'), $values);
-  }
-  else {
-    return $db;
+    $result = tripal_core_chado_select('db', array('*'), $sel_values, $sel_options);
   }
+  
+  // return the database object
+  return $result[0];
+
 }
 /**
  *
  * @ingroup tripal_db_api
  */
-function tripal_db_add_dbxref($db_id, $accession, $version='', $description='') {
+function tripal_db_add_dbxref($db_id, $accession, $version = '', $description = '') {
 
-  // check to see if the dbxref exists if not, add it
-  $dbxsql = "
-    SELECT DBX.dbxref_id, DBX.db_id, DBX.description, DBX.version, DBX.accession,
-       DB.name as db_name
-    FROM {dbxref} DBX
-       INNER JOIN db DB on DB.db_id = DBX.db_id
-    WHERE DBX.db_id = %d and DBX.accession = '%s'
-  ";
-  $dbxref = db_fetch_object(db_query($dbxsql, $db_id, $accession));
-  if (!$dbxref) {
-    $sql = "
-       INSERT INTO {dbxref} (db_id, accession, version, description)
-       VALUES (%d,'%s','%s','%s')
-    ";
-    if (!db_query($sql, $db_id, $accession, $version, $description)) {
+  $ins_values = array(
+    'db_id'       => $db_id,
+    'accession'   => $accession,
+    'version'     => $version,
+    'description' => $description
+  );
+    
+  // check to see if the dbxref exists
+  $sel_values = array(
+    'db_id'     => $db_id,
+    'accession' => $accession,
+  );
+  $sel_options = array('statement_name' => 'sel_dbxref_dbacve');
+  $result = tripal_core_chado_select('dbxref', array('*'), $sel_values, $sel_options);
+  
+  // if it doesn't already exist then add it
+  if (count($result) == 0) {
+    $ins_options = array('statement_name' => 'ins_dbxref_dbacvede');
+    $success = tripal_core_chado_insert('dbxref', $ins_values, $ins_options);
+    if (!$success) {
       watchdog('tripal_cv', "Failed to insert the dbxref record $accession", NULL, WATCHDOG_WARNING);
       return 0;
     }
-    drupal_set_message("Added Dbxref accession: $accession");
-    $dbxref = db_fetch_object(db_query($dbxsql, $db_id, $accession));
+    $result = tripal_core_chado_select('dbxref', array('*'), $sel_values, $sel_options);
   }
-  return $dbxref;
-
+  
+  return $result[0];
 }

+ 36 - 45
tripal_feature/includes/gff_loader.inc

@@ -206,18 +206,7 @@ function tripal_feature_gff3_load_form_submit($form, &$form_state) {
  * @ingroup gff3_loader
  */
 function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id, 
-  $add_only =0, $update = 0, $refresh = 0, $remove = 0, $job = NULL) {
-
-  // Prepare log
-  $filename = preg_replace("/.*\/(.*)/", "$1", $blastfile);
-  $logfile = tempnam(sys_get_temp_dir(), "gff_loader_log.");
-  $log = fopen($logfile, 'a'); // append parsing results to log file
-  if (!$log) {
-    print "ERROR: cannot open log file: $logfile\n";
-    exit;
-  }
-  print "Writing to log file: $logfile\n";
-  fwrite($log, date("D M j G:i:s Y") . ". Loading $gff_file\n");
+  $add_only =0, $update = 0, $refresh = 0, $remove = 0, $job = NULL) {  
 
   // this array is used to cache all of the features in the GFF file and
   // used to lookup parent and target relationships
@@ -231,7 +220,8 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
     $dfile = $gff_file;
   }
   if (!file_exists($dfile)) {
-    print "ERROR: cannot find the file: $dfile\n";
+    watchdog('T_gff_loader',"Cannot find the file: %dfile", 
+      array($dfile), WATCHDOG_ERROR);
     return 0;
   }
 
@@ -241,7 +231,8 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
   //$lines = file($dfile,FILE_SKIP_EMPTY_LINES);
   $fh = fopen($dfile, 'r');
   if (!$fh) {
-    print "ERROR: cannot open file: $dfile\n";
+    watchdog('T_gff_loader',"cannot open file: %dfile", 
+      array($dfile), WATCHDOG_ERROR);
     return 0;
   }
   $filesize = filesize($dfile);
@@ -251,8 +242,9 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
   // @coder-ignore: non-drupal schema thus table prefixing does not apply
   $sql = "SELECT * FROM cv WHERE name = '%s'";
   $cv = db_fetch_object(db_query($sql, 'sequence'));
-  if (!$cv) {
-    print "ERROR:  cannot find the 'sequence' ontology\n";
+  if (!$cv) {   
+    watchdog('T_gff_loader',"Cannot find the 'sequence' ontology", 
+      array($dfile), WATCHDOG_ERROR);
     return '';
   }
 
@@ -283,7 +275,6 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
       $intv_read = 0;
       $percent = sprintf("%.2f", ($num_read / $filesize) * 100);
       print "Parsing Line $line_num (" . $percent . "%).\r";
-      fwrite($log, "Parsing line $line_num (" . $percent . "%).\n");
       tripal_job_set_progress($job, intval(($num_read / $filesize) * 100));
     }
     
@@ -309,8 +300,8 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
     // get the columns
     $cols = explode("\t", $line);
     if (sizeof($cols) != 9) {
-      fwrite($log, "ERROR: improper number of columns on line $line_num\n");
-      fwrite($log, print_r($cols,1));
+      watchdog('T_gff_loader','improper number of columns on line %line_num', 
+        array($line_num), WATCHDOG_ERROR);
       return '';
     }
     
@@ -359,7 +350,7 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
                WHERE CV.cv_id = $1 and (CVT.name = $2 or CVTS.synonym = $3)";
        $status = chado_query($psql);
        if (!$status) {
-         fwrite($log, "ERROR: cannot prepare statement 'sel_cvterm_cvid_cvtname_synonym' for ontology term $line_num\n");
+         watchdog('T_gff_loader','cannot prepare statement \'sel_cvterm_cvid_cvtname_synonym\' for ontology term %line_num', array($line_num), WATCHDOG_ERROR);
          return '';
       }
       
@@ -368,7 +359,7 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
     $result = chado_query("EXECUTE sel_cvterm_cvid_cvtname_synonym (%d, '%s', '%s')", $cv->cv_id, $type, $type);
     $cvterm = db_fetch_object($result);
     if (!$cvterm) {
-      fwrite($log, "ERROR: cannot find ontology term '$type' on line $line_num.\n");
+      watchdog('T_gff_loader','cannot find ontology term \'%type\' on line %line_num', array($type, $line_num), WATCHDOG_ERROR);
       return '';
     }
 
@@ -391,7 +382,8 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
         continue;
       }
       if (!preg_match('/^[^\=]+\=.+$/', $attr)) {
-        fwrite($log, "ERROR: attribute is not correctly formatted on line $line_num: $attr\n");
+        watchdog('T_gff_loader','Attribute is not correctly formatted on line %line_num: %attr', 
+          array($line_num, $attr), WATCHDOG_ERROR);
         return '';
       }
 
@@ -436,7 +428,8 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
         $attr_uniquename = $tags['Parent'][0] . "-$type-$landmark:$fmin..$fmax";
       }
       else {
-        fwrite($log, "ERROR: cannot generate a uniquename for feature on line $line_num\n");
+        watchdog('T_gff_loader','Cannot generate a uniquename for feature on line %line_num', 
+          array($line_num), WATCHDOG_ERROR);
         exit;
       }
       $attr_name = $attr_uniquename;
@@ -467,15 +460,15 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
       $options = array('statement_name' => 'sel_feature_organismid_uniquename');
       $count = tripal_core_chado_select('feature', $columns, $select, $options);
       if (!$count or $count[0]->num_landmarks == 0) {
-        fwrite($log,  "ERROR: the landmark '$landmark' cannot be found for this organism. ".
+        watchdog('T_gff_loader',"The landmark '%landmark' cannot be found for this organism. ".
               "Please add the landmark and then retry the import of this GFF3 ".
-              "file.\n");
+              "file", array($landmark), WATCHDOG_ERROR);
         return '';
 
       }
       if ($count[0]->num_landmarks > 1) {
-        fwrite($log,  "ERROR: the landmark '$landmark' is not unique for this organism. ".
-              "The features cannot be associated.\n");
+        watchdog('T_gff_loader',"The landmark '%landmark' is not unique for this organism. ".
+              "The features cannot be associated", array($landmark), WATCHDOG_ERROR);
         return '';
       }  
     }
@@ -483,7 +476,6 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
     // if the option is to remove or refresh then we want to remove
     // the feature from the database.
     if ($remove or $refresh) {
-      fwrite($log,  "Removing feature '$attr_uniquename'\n");
       $sql = "DELETE FROM {feature}
               WHERE organism_id = %d and uniquename = '%s' and type_id = %d";
       $match = array(
@@ -493,7 +485,7 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
       );
       $result = tripal_core_chado_delete('feature',$match);
       if (!$result) {
-        fwrite($log,  "ERROR: cannot delete feature $attr_uniquename\n");
+        watchdog('T_gff_loader',"cannot delete feature %attr_uniquename", array($attr_uniquename), WATCHDOG_ERROR);
       }
       $feature = 0;
       unset($result);
@@ -627,7 +619,6 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
 
       // now iterate through the children and set their rank
       $rank = 1;
-      fwrite($log,  "Updating child ranks for $parent (" . $details['type'] . ")\n");
       foreach ($details['children'] as $kfeature_id => $kfmin) {
         $match = array(
            'object_id' => $pfeature->feature_id,
@@ -652,8 +643,7 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
 
   //tripal_db_set_active($previous_db);
   
-  print "Done.\nSuccessful and failed entries have been saved in the log file:\n $logfile\n";
-  fwrite($log, "\n");
+  print "Done\n";
   fclose($log);
   
   return 1;
@@ -679,7 +669,9 @@ function tripal_feature_load_gff3_derives_from($feature, $subject, $gff_features
   $options = array('statement_name' => 'sel_feature_organismid_uniquename_typeid');
   $sfeature = tripal_core_chado_select('feature', array('*'), $match, $options);
   if (count($sfeature)==0) {
-    fwrite($log,  "ERROR: could not add 'Derives_from' relationship for $feature->uniquename and $subject.  Subject feature, '$subject', cannot be found\n");
+    watchdog('T_gff_loader',"Could not add 'Derives_from' relationship ".
+      "for %uniquename and %subject.  Subject feature, '%subject', ".
+      "cannot be found", array($feature->uniquename, $subject, $subject), WATCHDOG_ERROR);
     return;
   }
 
@@ -1137,16 +1129,16 @@ function tripal_feature_load_gff3_feature($organism, $analysis_id, $cvterm, $uni
   $feature = $result[0];
 
   if (strcmp($is_obsolete, 'f')==0 or $is_obsolete == 0) {
-    $is_obsolete = 'false';
+    $is_obsolete = 'FALSE';
   }
   if (strcmp($is_obsolete, 't')==0 or $is_obsolete == 1) {
-    $is_obsolete = 'true';
+    $is_obsolete = 'TRUE';
   }
   if (strcmp($is_analysis, 'f')==0 or $is_analysis == 0) {
-    $is_analysis = 'false'; 
+    $is_analysis = 'FALSE'; 
   }
   if (strcmp($is_analysis, 't')==0 or $is_analysis == 1) {
-    $is_analysis = 'true'; 
+    $is_analysis = 'TRUE'; 
   }
 
   // insert the feature if it does not exist otherwise perform an update
@@ -1346,20 +1338,17 @@ function tripal_feature_load_gff3_featureloc($feature, $organism, $landmark, $fm
   if (!$exists) {
 
     // this feature location is new so add it
-    if (!$phase) {
-      $phase = 'NULL';
-    }
     if (strcmp($is_fmin_partial, 'f')==0 or !$is_fmin_partial) {
-      $is_fmin_partial = 'false';
+      $is_fmin_partial = 'FALSE';
     }
     elseif (strcmp($is_fmin_partial, 't')==0 or $is_fmin_partial = 1) {
-      $is_fmin_partial = 'true';
+      $is_fmin_partial = 'TRUE';
     }
     if (strcmp($is_fmax_partial, 'f')==0 or !$is_fmax_partial) {
-      $is_fmax_partial = 'false';
+      $is_fmax_partial = 'FALSE';
     }
     elseif (strcmp($is_fmax_partial, 't')==0 or $is_fmax_partial = 1) {
-      $is_fmax_partial = 'true';
+      $is_fmax_partial = 'TRUE';
     }
     fwrite($log, "   Adding featureloc $srcfeature->uniquename fmin: $fmin (is_partial: $is_fmin_partial), fmax: $fmax (is_partial: $is_fmin_partial), strand: $strand, phase: $phase, rank: $rank\n");
     $values = array(
@@ -1370,11 +1359,13 @@ function tripal_feature_load_gff3_featureloc($feature, $organism, $landmark, $fm
        'fmax'            => $fmax,
        'is_fmax_partial' => $is_fmax_partial,
        'strand'          => $strand,
-       'phase'           => $phase,
        'residue_info'    => $residue_info,
        'locgroup'        => $locgroup,
        'rank'            => $rank 
     );
+    if($phase) {
+      $values['phase'] = $phase;
+    }
     $options = array('statement_name' => 'ins_featureloc_all');
     $success = tripal_core_chado_insert('featureloc', $values, $options);
     if (!$success) {