ソースを参照

Fixed memory leaks in OBO loader.

spficklin 12 年 前
コミット
8098c77257

+ 2 - 2
tripal_core/tripal_core.module

@@ -24,7 +24,7 @@ require_once "api/tripal_core.ahah.inc";
  * @ingroup tripal_core
  */
 function tripal_core_init() {
-  global $base_url;  
+  global $base_url;     
   
   // we need to declare here the persistent_chado global variable
   global $persistent_chado;
@@ -32,7 +32,7 @@ function tripal_core_init() {
   $persistent_chado = NULL;
   $prepared_statements = array();
   
-  
+  // add javascript files 
   drupal_add_js(drupal_get_path('theme', 'tripal') . '/js/tripal.ahah.js');
   
   // create the 'tripal' controlled volcabulary in chado but only if it doesn't already exist, and

+ 60 - 16
tripal_cv/api/tripal_cv.api.inc

@@ -336,32 +336,39 @@ function tripal_cv_add_cv($name, $definition) {
  */
 function tripal_cv_add_cvterm($term, $defaultcv = '_global', $is_relationship = 0, 
   $update = 1, $dbname = 'internal') {
-   
-  $connection = tripal_db_persistent_chado();
-    
+      
   // get the term properties
   $id = $term['id'];
-  if (isset($term['name'])) {
+  $name = '';
+  $cvname = '';
+  $definition = '';
+  $is_obsolete = 0;
+  $accession = '';
+  
+  if (array_key_exists('name', $term)) {
     $name = $term['name'];  
   }
   else {
     $name = $id;  
-  }  
-  if (isset($term['namespace'])) {
+  }
+
+  if (array_key_exists('namespace', $term)) {
     $cvname = $term['namespace'];
   } 
   else {
     $cvname = $defaultcv;
   }
-  if (isset($term['def'])) {
+  if (array_key_exists('def', $term)) {
     $definition = preg_replace('/^\"(.*)\"/', '\1', $term['def']);
   }
   else {
     $definition = '';
   }
-  $is_obsolete = $term['is_obsolete'] ? $term['is_obsolete'] : 0;
-  if (strcmp($is_obsolete, 'true') == 0) {
-    $is_obsolete = 1;
+  if (array_key_exists('is_obsolete', $term)) {
+    $is_obsolete = $term['is_obsolete'];
+    if (strcmp($is_obsolete, 'true') == 0) {
+      $is_obsolete = 1;
+    }
   }  
   if (!$name and !$id) {
     watchdog('tripal_cv', "Cannot find cvterm without 'id' or 'name'", NULL, WATCHDOG_WARNING);
@@ -370,7 +377,6 @@ function tripal_cv_add_cvterm($term, $defaultcv = '_global', $is_relationship =
   if (!$id) {
     $id = $name;
   }
-
   
   // get the accession and the database from the cvterm id
   if ($dbname) {
@@ -431,11 +437,11 @@ function tripal_cv_add_cvterm($term, $defaultcv = '_global', $is_relationship =
   // the cvterm table has two unique dependencies. We need to check both.
   // first check the (name, cv_id, is_obsolete) constraint
   $values = array(
-     'name' => $name,
-     'is_obsolete' => $is_obsolete,
-     'cv_id' => array(
-       'name' => $cvname,
-     ),
+    'name' => $name,
+    'is_obsolete' => $is_obsolete,
+    'cv_id' => array(
+      'name' => $cvname,
+    ),
   );
   $options = array('statement_name' => 'sel_cvterm_c1');
   $result = tripal_core_chado_select('cvterm', array('*'), $values, $options);
@@ -614,3 +620,41 @@ function tripal_cv_add_cvterm($term, $defaultcv = '_global', $is_relationship =
 }
 
 
+/**
+ * This function defines the custom tables that will be created 
+ * in the chado schema.
+ *
+ * @ingroup tripal_cv
+ */
+function tripal_cv_get_custom_tables($table = NULL) {
+
+ if (!$table or strcmp($table, 'tripal_obo_temp')==0) {
+    $schema['tripal_obo_temp'] = array(
+      'table' => 'tripal_obo_temp',
+      'fields' => array(
+        'id' => array(
+          'type' => 'varchar',
+          'length' => '255',
+          'not null' => TRUE,
+        ),
+        'stanza' => array(
+          'type' => 'text',
+          'not null' => TRUE,
+        ),
+        'type' => array(
+          'type' => 'varchar',
+          'length' => '50',
+          'not null' => TRUE,
+        ),
+      ),
+      'indexes' => array(
+        'tripal_obo_temp_idx0' => array('id'),
+        'tripal_obo_temp_idx0' => array('type'),
+      ),
+      'unique keys' => array(
+        'tripal_obo_temp_uq0' => array('id'),
+      ),
+    );
+  }
+  return $schema;
+}

+ 174 - 126
tripal_cv/includes/obo_loader.inc

@@ -127,15 +127,37 @@ function tripal_cv_load_obo_add_ref($name, $path) {
  */
 function tripal_cv_load_obo_v1_2($file, $jobid = NULL, &$newcvs) {
   
-  // we need to get a persistent connection.  If one exists this function
-  // will not recreate it, but if not it will create one and store it in
-  // a Drupal variable for reuse later.
-  $connection = tripal_db_persistent_chado();
-
   $header = array();
-  $obo = array();
+    
+  // make sure our temporary table exists
+  $ret = array(); 
+  if (!db_table_exists('tripal_obo_temp')) { 
+    $schema = tripal_cv_get_custom_tables('tripal_obo_temp');  
+    $success = tripal_core_create_custom_table($ret, 'tripal_obo_temp', $schema['tripal_obo_temp']);
+    if (!$success) {
+      watchdog('T_obo_loader', "Cannot create temporary loading table", array(), WATCHDOG_ERROR); 
+      return;
+    } 
+  }
+  // empty the temp table
+  $sql = "DELETE FROM tripal_obo_temp";
+  chado_query($sql);
 
-  print "Opening File $file\n";  
+  // get a persistent connection
+  $connection = tripal_db_persistent_chado();
+  if (!$connection) {
+     print "A persistant connection was not obtained. Loading will be slow\n";
+  }
+          
+  // if we cannot get a connection then let the user know the loading will be slow
+  tripal_db_start_transaction();
+  if ($connection) {
+     print "\nNOTE: Loading of this OBO file is performed using a database transaction. \n" .
+           "If the load fails or is terminated prematurely then the entire set of \n" .
+           "insertions/updates is rolled back and will not be found in the database\n\n";
+  }
+
+  print "Step 1: Preloading File $file\n";  
 
   // make sure we have an 'internal' and a '_global' database
   if (!tripal_db_add_db('internal')) {
@@ -146,7 +168,7 @@ function tripal_cv_load_obo_v1_2($file, $jobid = NULL, &$newcvs) {
   }
 
   // parse the obo file
-  $default_db = tripal_cv_obo_parse($file, $obo, $header);
+  $default_db = tripal_cv_obo_parse($file, $header, $jobid);
 
   // add the CV for this ontology to the database
   $defaultcv = tripal_cv_add_cv($header['default-namespace'][0], '');
@@ -156,18 +178,24 @@ function tripal_cv_load_obo_v1_2($file, $jobid = NULL, &$newcvs) {
   $newcvs[$header['default-namespace'][0]] = $defaultcv->cv_id;
 
   // add any typedefs to the vocabulary first
-  
-  $typedefs = $obo['Typedef'];
-  foreach ($typedefs as $typedef) {
-    tripal_cv_obo_process_term($typedef, $defaultcv->name, $obo, 1, $newcvs, $default_db);
+  $sql = "
+    SELECT * FROM tripal_obo_temp
+    WHERE type = 'Typedef' 
+  ";
+  $typedefs = chado_query($sql);
+  while ($typedef = db_fetch_object($typedefs)) {
+    $term = unserialize(base64_decode($typedef->stanza));
+    tripal_cv_obo_process_term($term, $defaultcv->name, 1, $newcvs, $default_db);
   }
-  
+
   // next add terms to the vocabulary
-  $terms = $obo['Term'];
-  if (!tripal_cv_obo_process_terms($terms, $defaultcv->name, $obo, $jobid, $newcvs, $default_db)) {
+  print "\nStep 2: Loading terms...\n";  
+  if (!tripal_cv_obo_process_terms($defaultcv->name, $jobid, $newcvs, $default_db)) {
     tripal_cv_obo_quiterror('Cannot add terms from this ontology');
   }
 
+  // transaction is complete
+  tripal_db_commit_transaction();
   return;
 }
 
@@ -186,37 +214,36 @@ function tripal_cv_obo_quiterror($message) {
  *
  * @ingroup tripal_obo_loader
  */
-function tripal_cv_obo_process_terms($terms, $defaultcv, $obo, $jobid = NULL, &$newcvs, $default_db) {
+function tripal_cv_obo_process_terms($defaultcv, $jobid = NULL, &$newcvs, $default_db) {
 
   $i = 0;
-  $count = count($terms);
-  
-  // if the number of terms is zero then simply return.
-  // this can happen when an OBO file simply has typedef
-  // entries and no actual terms.
-  if($count == 0) {
-    return 1;
-  }
+
+  // iterate through each term from the OBO file and add it
+  $sql = "
+    SELECT * FROM tripal_obo_temp
+    WHERE type = 'Term' 
+    ORDER BY id
+  ";
+  $terms = chado_query($sql);
+  $count = pg_num_rows($terms);
   
   // calculate the interval for updates
   $interval = intval($count * 0.0001);
   if ($interval < 1) {
     $interval = 1;
   }
-
-  // iterate through each term from the OBO file and add it
-  print "Loading terms...\n";
-  foreach ($terms as $term) {
-
-    // update the job status every 1% terms
+  while($t = db_fetch_object($terms)) {
+    $term = unserialize(base64_decode($t->stanza));
+    
+    // update the job status every interval
     if ($jobid and $i % $interval == 0) {
-      $complete = ($i / $count) * 100;
-      tripal_job_set_progress($jobid, intval($complete)); 
-      printf("%d of %d records. (%0.2f%%) memory: %s bytes\r", $i, $count, $complete, number_format(memory_get_usage()));                                                             
+      $complete = ($i / $count) * 50;
+      tripal_job_set_progress($jobid + 50, intval($complete)); 
+      printf("%d of %d records. (%0.2f%%) Memory: %s bytes\r", $i, $count, $complete * 2, number_format(memory_get_usage()));                                                             
     }                                 
     
     // add/update this term
-    if (!tripal_cv_obo_process_term($term, $defaultcv, $obo, 0, $newcvs, $default_db)) {
+    if (!tripal_cv_obo_process_term($term, $defaultcv, 0, $newcvs, $default_db)) {
       tripal_cv_obo_quiterror("Failed to process terms from the ontology");
     }
 
@@ -225,9 +252,9 @@ function tripal_cv_obo_process_terms($terms, $defaultcv, $obo, $jobid = NULL, &$
   
   // set the final status
   if ($jobid) {
-    $complete = ($i / $count) * 100;
-    tripal_job_set_progress($jobid, intval($complete)); 
-    printf("%d of %d records. (%0.2f%%) memory: %d\r", $i, $count, $complete, memory_get_usage());
+    $complete = ($i / $count) * 50;
+    tripal_job_set_progress($jobid + 50, intval($complete)); 
+    printf("%d of %d records. (%0.2f%%) Memory: %s bytes\r", $i, $count, $complete * 2, number_format(memory_get_usage()));
   }                                                             
   
   return 1;
@@ -237,39 +264,41 @@ function tripal_cv_obo_process_terms($terms, $defaultcv, $obo, $jobid = NULL, &$
  *
  * @ingroup tripal_obo_loader
  */
-function tripal_cv_obo_process_term($term, $defaultcv, $obo, $is_relationship = 0, &$newcvs, $default_db) {
+function tripal_cv_obo_process_term($term, $defaultcv, $is_relationship = 0, &$newcvs, $default_db) {
+ 
   // construct the term array for sending to the tripal_cv_add_cvterm function
   // for adding a new cvterm
   $t = array(); 
-  $t['id']            = $term['id'][0];
-  $t['name']          = $term['name'][0];
-  $t['def']           = $term['def'][0];
-  if (isset($term['subset'])) {
-    $t['subset']      = $term['subset'][0];  
+  $t['id'] = $term['id'][0];
+  $t['name'] = $term['name'][0];
+  if (array_key_exists('def', $term)) {
+    $t['def'] = $term['def'][0];
+  }
+  if (array_key_exists('subset', $term)) {
+    $t['subset'] = $term['subset'][0];  
   }  
-  if (isset($term['namespace'])) {
-    $t['namespace']   = $term['namespace'][0];
+  if (array_key_exists('namespace', $term)) {
+    $t['namespace'] = $term['namespace'][0];
   }
-  if (isset($term['is_obsolete'])) {
+  if (array_key_exists('is_obsolete', $term)) {
     $t['is_obsolete'] = $term['is_obsolete'][0];
   } 
   
   // add the cvterm
-  $cvterm = tripal_cv_add_cvterm($t, $defaultcv, $is_relationship, 1, $default_db);
+  $cvterm = tripal_cv_add_cvterm($t, $defaultcv, $is_relationship, 1, $default_db); 
   if (!$cvterm) {
     tripal_cv_obo_quiterror("Cannot add the term " . $term['id']);
-  }
-  
-  
-  if (isset($term['namespace'])) {
+  }  
+ 
+  if (array_key_exists('namespace', $term)) {
     $newcvs[$term['namespace'][0]] = $cvterm->cv_id;
   }
   
   // now handle other properites
-  if (isset($term['is_anonymous'])) {
+  if (array_key_exists('is_anonymous', $term)) {
     //print "WARNING: unhandled tag: is_anonymous\n";
   }
-  if (isset($term['alt_id'])) {
+  if (array_key_exists('alt_id', $term)) {
     foreach ($term['alt_id'] as $alt_id) {
       if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $alt_id)) {
         tripal_cv_obo_quiterror("Cannot add alternate id $alt_id");
@@ -277,11 +306,11 @@ function tripal_cv_obo_process_term($term, $defaultcv, $obo, $is_relationship =
     }
   }
   
-  if (isset($term['subset'])) {
+  if (array_key_exists('subset', $term)) {
     //print "WARNING: unhandled tag: subset\n";
   }
   // add synonyms for this cvterm
-  if (isset($term['synonym'])) {    
+  if (array_key_exists('synonym', $term)) {    
     if (!tripal_cv_obo_add_synonyms($term, $cvterm)) {
       tripal_cv_obo_quiterror("Cannot add synonyms");
     }
@@ -289,20 +318,20 @@ function tripal_cv_obo_process_term($term, $defaultcv, $obo, $is_relationship =
 
   // reformat the deprecated 'exact_synonym, narrow_synonym, and broad_synonym'
   // types to be of the v1.2 standard
-  if (isset($term['exact_synonym']) or isset($term['narrow_synonym']) or isset($term['broad_synonym'])) {
-    if (isset($term['exact_synonym'])) {
+  if (array_key_exists('exact_synonym', $term) or array_key_exists('narrow_synonym', $term) or array_key_exists('broad_synonym', $term)) {
+    if (array_key_exists('exact_synonym', $term)) {
       foreach ($term['exact_synonym'] as $synonym) {
         $new = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 EXACT $2', $synonym);
         $term['synonym'][] = $new;
       }
     }
-    if (isset($term['narrow_synonym'])) {
+    if (array_key_exists('narrow_synonym', $term)) {
       foreach ($term['narrow_synonym'] as $synonym) {
         $new = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 NARROW $2', $synonym);
         $term['synonym'][] = $new;
       }
     }
-    if (isset($term['broad_synonym'])) {
+    if (array_key_exists('broad_synonym', $term)) {
       foreach ($term['broad_synonym'] as $synonym) {
         $new = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 BROAD $2', $synonym);
         $term['synonym'][] = $new;
@@ -315,7 +344,7 @@ function tripal_cv_obo_process_term($term, $defaultcv, $obo, $is_relationship =
   }
   
   // add the comment to the cvtermprop table
-  if (isset($term['comment'])) {
+  if (array_key_exists('comment', $term)) {
     $comments = $term['comment'];
     $j = 0;
     foreach ($comments as $comment) {
@@ -325,9 +354,9 @@ function tripal_cv_obo_process_term($term, $defaultcv, $obo, $is_relationship =
       $j++;
     }
   }
-  
+
   // add any other external dbxrefs
-  if (isset($term['xref'])) {
+  if (array_key_exists('xref', $term)) {
     foreach ($term['xref'] as $xref) {
       if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref)) {
         tripal_cv_obo_quiterror("Cannot add/update cvterm database reference (dbxref).");
@@ -335,14 +364,14 @@ function tripal_cv_obo_process_term($term, $defaultcv, $obo, $is_relationship =
     }
   }
   
-  if (isset($term['xref_analog'])) {
+  if (array_key_exists('xref_analog', $term)) {
     foreach ($term['xref_analog'] as $xref) {
       if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref)) {
         tripal_cv_obo_quiterror("Cannot add/update cvterm database reference (dbxref).");
       }
     }
   }
-  if (isset($term['xref_unk'])) {
+  if (array_key_exists('xref_unk', $term)) {
     foreach ($term['xref_unk'] as $xref) {
       if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref)) {
         tripal_cv_obo_quiterror("Cannot add/update cvterm database reference (dbxref).");
@@ -351,42 +380,42 @@ function tripal_cv_obo_process_term($term, $defaultcv, $obo, $is_relationship =
   }
 
   // add is_a relationships for this cvterm
-  if (isset($term['is_a'])) {
+  if (array_key_exists('is_a', $term)) {
     foreach ($term['is_a'] as $is_a) {
-      if (!tripal_cv_obo_add_relationship($cvterm, $defaultcv, $obo, 'is_a', $is_a, $is_relationship, $default_db)) {
+      if (!tripal_cv_obo_add_relationship($cvterm, $defaultcv, 'is_a', $is_a, $is_relationship, $default_db)) {
         tripal_cv_obo_quiterror("Cannot add relationship is_a: $is_a");
       }
     }
   }
-  
-  if (isset($term['intersection_of'])) {
+
+  if (array_key_exists('intersection_of', $term)) {
     //print "WARNING: unhandled tag: intersection_of\n";
   }
-  if (isset($term['union_of'])) {
+  if (array_key_exists('union_of', $term)) {
     //print "WARNING: unhandled tag: union_on\n";
   }
-  if (isset($term['disjoint_from'])) {
+  if (array_key_exists('disjoint_from', $term)) {
     //print "WARNING: unhandled tag: disjoint_from\n";
   }
-  if (isset($term['relationship'])) {
+  if (array_key_exists('relationship', $term)) {
     foreach ($term['relationship'] as $value) {
       $rel = preg_replace('/^(.+?)\s.+?$/', '\1', $value);
       $object = preg_replace('/^.+?\s(.+?)$/', '\1', $value);
-      if (!tripal_cv_obo_add_relationship($cvterm, $defaultcv, $obo, $rel, $object, $is_relationship, $default_db)) {
+      if (!tripal_cv_obo_add_relationship($cvterm, $defaultcv, $rel, $object, $is_relationship, $default_db)) {
         tripal_cv_obo_quiterror("Cannot add relationship $rel: $object");
       }
     }
   }
-  if (isset($term['replaced_by'])) {
+  if (array_key_exists('replaced_by', $term)) {
    //print "WARNING: unhandled tag: replaced_by\n";
   }
-  if (isset($term['consider'])) {
+  if (array_key_exists('consider', $term)) {
     //print "WARNING: unhandled tag: consider\n";
   }
-  if (isset($term['use_term'])) {
+  if (array_key_exists('use_term', $term)) {
     //print "WARNING: unhandled tag: user_term\n";
   }
-  if (isset($term['builtin'])) {
+  if (array_key_exists('builtin', $term)) {
     //print "WARNING: unhandled tag: builtin\n";
   }
   return 1;
@@ -397,7 +426,7 @@ function tripal_cv_obo_process_term($term, $defaultcv, $obo, $is_relationship =
  *
  * @ingroup tripal_obo_loader
  */
-function tripal_cv_obo_add_relationship($cvterm, $defaultcv, $obo, $rel, 
+function tripal_cv_obo_add_relationship($cvterm, $defaultcv, $rel, 
   $objname, $object_is_relationship = 0, $default_db = 'OBO_REL') {
 
   // make sure the relationship cvterm exists
@@ -425,7 +454,7 @@ function tripal_cv_obo_add_relationship($cvterm, $defaultcv, $obo, $rel,
   }
 
   // get the object term
-  $oterm = tripal_cv_obo_get_term($obo, $objname);
+  $oterm = tripal_cv_obo_get_term($objname);
   if (!$oterm) {
     tripal_cv_obo_quiterror("Could not find object term $objname\n");
   }
@@ -433,14 +462,16 @@ function tripal_cv_obo_add_relationship($cvterm, $defaultcv, $obo, $rel,
   $objterm = array(); 
   $objterm['id']            = $oterm['id'][0];
   $objterm['name']          = $oterm['name'][0];
-  $objterm['def']           = $oterm['def'][0];
-  if (isset($oterm['subset'])) {
+  if (array_key_exists('def', $oterm)) {
+    $objterm['def']           = $oterm['def'][0];
+  }
+  if (array_key_exists('subset', $oterm)) {
     $objterm['subset']      = $oterm['subset'][0];  
   }  
-  if (isset($oterm['namespace'])) {
+  if (array_key_exists('namespace', $oterm)) {
     $objterm['namespace']   = $oterm['namespace'][0];
   }
-  if (isset($oterm['is_obsolete'])) {
+  if (array_key_exists('is_obsolete', $oterm)) {
     $objterm['is_obsolete'] = $oterm['is_obsolete'][0];
   }
   $objcvterm = tripal_cv_add_cvterm($objterm, $defaultcv, $object_is_relationship, 1, $default_db);  
@@ -474,26 +505,14 @@ function tripal_cv_obo_add_relationship($cvterm, $defaultcv, $obo, $rel,
  *
  * @ingroup tripal_obo_loader
  */
-function tripal_cv_obo_get_term($obo, $id) {
-  /*
-  foreach ($obo as $type) {
-    foreach ($type as $term) {
-      $accession = $term['id'][0];
-      if (strcmp($accession, $id)==0) {
-        return $term;
-      }
-    }
-  } */
-  // iterate through each of the types in the
-  // obo file (parsed into memory) and look
-  // for this term.  If found, return it.
-  foreach ($obo as $type) {
-    if (array_key_exists($id, $type)) {
-      return $type[$id];
-    }
+function tripal_cv_obo_get_term($id) {
+  $values = array('id' => $id);
+  $options = array('statement_name' => 'sel_tripalobotemp_id');
+  $result = tripal_core_chado_select('tripal_obo_temp', array('stanza'), $values, $options);
+  if (count($result) == 0) {
+    return FALSE;
   }
-
-  return FALSE;
+  return unserialize(base64_decode($result[0]->stanza));
 }
 
 /**
@@ -506,7 +525,7 @@ function tripal_cv_obo_add_synonyms($term, $cvterm) {
   $syncv = tripal_cv_add_cv('synonym_type', 'A vocabulary added by the Tripal CV module OBO loader for storing synonym types.');
 
   // now add the synonyms
-  if (isset($term['synonym'])) {
+  if (array_key_exists('synonym', $term)) {
     foreach ($term['synonym'] as $synonym) {
       
       // separate out the synonym definition and the synonym type
@@ -593,20 +612,41 @@ function tripal_cv_obo_add_synonyms($term, $cvterm) {
  *
  * @ingroup tripal_obo_loader
  */
-function tripal_cv_obo_parse($obo_file, &$obo, &$header) {
-  $i = 0;
+function tripal_cv_obo_parse($obo_file, &$header, $jobid) {
   $in_header = 1;
   $stanza = array();
   $default_db = '_global';
+  $line_num = 0;
+  $num_read = 0;
+  $intv_read = 0;
+  
+  $filesize = filesize($obo_file); 
+  $interval = intval($filesize * 0.01);
+  if ($interval < 1) {
+    $interval = 1;
+  } 
 
   // iterate through the lines in the OBO file and parse the stanzas
   $fh = fopen($obo_file, 'r');
   while ($line = fgets($fh)) {
-    $i++;
-
+    
+    $line_num++;
+    $size = drupal_strlen($line);
+    $num_read += $size;
+    $intv_read += $size; 
+    $line = trim($line);      
+
+    // update the job status every 1% features
+    if ($jobid and $intv_read >= $interval) {            
+      $percent = sprintf("%.2f", ($num_read / $filesize) * 100);
+      print "Parsing Line $line_num (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes.\r";
+      tripal_job_set_progress($jobid, intval(($num_read / $filesize) * 50));
+      $intv_read = 0;      
+    }
+    
     // remove newlines
     $line = rtrim($line);
-
+        
     // remove any special characters that may be hiding
     $line = preg_replace('/[^(\x20-\x7F)]*/', '', $line);
 
@@ -618,19 +658,25 @@ function tripal_cv_obo_parse($obo_file, &$obo, &$header) {
     //remove comments from end of lines
     $line = preg_replace('/^(.*?)\!.*$/', '\1', $line);  // TODO: if the explamation is escaped
 
-    if (preg_match('/^\s*\[/', $line)) {  // at the first stanza we're out of header
+    // at the first stanza we're out of header
+    if (preg_match('/^\s*\[/', $line)) {  
       $in_header = 0;
+
       // store the stanza we just finished reading
       if (sizeof($stanza) > 0) {
-        if (!isset($obo[$type])) {
-          $obo[$type] = array();
-        }
-        if (!isset($obo[$type][$stanza['id'][0]])) {
-          $obo[$type][$stanza['id'][0]] = $stanza;
-        }
-        else {
-          array_merge($obo[$type][$stanza['id'][0]], $stanza);
+        // add the term to the temp table
+        $values = array(
+          'id' => $stanza['id'][0],
+          'stanza' => base64_encode(serialize($stanza)),
+          'type' => $type,
+        );
+        $options = array('statement_name' => 'ins_tripalobotemp_all');
+        $success = tripal_core_chado_insert('tripal_obo_temp', $values, $options);
+        if (!$success) {
+          watchdog('T_obo_loader', "ERROR: Cannot insert stanza into temporary table.", array(), 'error');
+          exit;
         }
+        
       }
       // get the stanza type:  Term, Typedef or Instance
       $type = preg_replace('/^\s*\[\s*(.+?)\s*\]\s*$/', '\1', $line);
@@ -654,13 +700,13 @@ function tripal_cv_obo_parse($obo_file, &$obo, &$header) {
     $tag = preg_replace("/\|-\|-\|/", "\:", $tag); // return the escaped colon
     $value = preg_replace("/\|-\|-\|/", "\:", $value);
     if ($in_header) {
-      if (!isset($header[$tag])) {
+      if (!array_key_exists($tag, $header)) {
         $header[$tag] = array();
       }
       $header[$tag][] = $value;
     }
     else {
-      if (!isset($stanza[$tag])) {
+      if (!array_key_exists($tag, $stanza)) {
         $stanza[$tag] = array();
       }
       $stanza[$tag][] = $value;
@@ -668,14 +714,16 @@ function tripal_cv_obo_parse($obo_file, &$obo, &$header) {
   }
   // now add the last term in the file
   if (sizeof($stanza) > 0) {
-    if (!isset($obo[$type])) {
-      $obo[$type] = array();
-    }
-    if (!isset($obo[$type][$stanza['id'][0]])) {
-      $obo[$type][$stanza['id'][0]] = $stanza;
-    }
-    else {
-      array_merge($obo[$type][$stanza['id'][0]], $stanza);
+    $values = array(
+      'id' => $stanza['id'][0],
+      'stanza' => base64_encode(serialize($stanza)),
+      'type' => $type,
+    );
+    $options = array('statement_name' => 'ins_tripalobotemp_all');
+    tripal_core_chado_insert('tripal_obo_temp', $values, $options);
+    if (!$success) {
+      watchdog('T_obo_loader', "ERROR: Cannot insert stanza into temporary table.", array(), 'error');
+      exit;
     }
   }
   return $default_db;

+ 3 - 2
tripal_feature/includes/gff_loader.inc

@@ -326,8 +326,9 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
   while ($line = fgets($fh)) {
 
     $line_num++;
-    $num_read += drupal_strlen($line);   
-    $intv_read += $num_read; 
+    $size = drupal_strlen($line);
+    $num_read += $size;
+    $intv_read += $size; 
 
     // update the job status every 1% features
     if ($job and $intv_read >= $interval) {