Browse Source

Merge branch '6.x-1.x' of git.drupal.org:sandbox/spficklin/1337878 into 6.x-1.x

Lacey Sanderson 12 years ago
parent
commit
7eadda2e68

+ 2 - 2
tripal_core/tripal_core.module

@@ -24,7 +24,7 @@ require_once "api/tripal_core.ahah.inc";
  * @ingroup tripal_core
  */
 function tripal_core_init() {
-  global $base_url;  
+  global $base_url;     
   
   // we need to declare here the persistent_chado global variable
   global $persistent_chado;
@@ -32,7 +32,7 @@ function tripal_core_init() {
   $persistent_chado = NULL;
   $prepared_statements = array();
   
-  
+  // add javascript files 
   drupal_add_js(drupal_get_path('theme', 'tripal') . '/js/tripal.ahah.js');
   
   // create the 'tripal' controlled volcabulary in chado but only if it doesn't already exist, and

+ 60 - 16
tripal_cv/api/tripal_cv.api.inc

@@ -336,32 +336,39 @@ function tripal_cv_add_cv($name, $definition) {
  */
 function tripal_cv_add_cvterm($term, $defaultcv = '_global', $is_relationship = 0, 
   $update = 1, $dbname = 'internal') {
-   
-  $connection = tripal_db_persistent_chado();
-    
+      
   // get the term properties
   $id = $term['id'];
-  if (isset($term['name'])) {
+  $name = '';
+  $cvname = '';
+  $definition = '';
+  $is_obsolete = 0;
+  $accession = '';
+  
+  if (array_key_exists('name', $term)) {
     $name = $term['name'];  
   }
   else {
     $name = $id;  
-  }  
-  if (isset($term['namespace'])) {
+  }
+
+  if (array_key_exists('namespace', $term)) {
     $cvname = $term['namespace'];
   } 
   else {
     $cvname = $defaultcv;
   }
-  if (isset($term['def'])) {
+  if (array_key_exists('def', $term)) {
     $definition = preg_replace('/^\"(.*)\"/', '\1', $term['def']);
   }
   else {
     $definition = '';
   }
-  $is_obsolete = $term['is_obsolete'] ? $term['is_obsolete'] : 0;
-  if (strcmp($is_obsolete, 'true') == 0) {
-    $is_obsolete = 1;
+  if (array_key_exists('is_obsolete', $term)) {
+    $is_obsolete = $term['is_obsolete'];
+    if (strcmp($is_obsolete, 'true') == 0) {
+      $is_obsolete = 1;
+    }
   }  
   if (!$name and !$id) {
     watchdog('tripal_cv', "Cannot find cvterm without 'id' or 'name'", NULL, WATCHDOG_WARNING);
@@ -370,7 +377,6 @@ function tripal_cv_add_cvterm($term, $defaultcv = '_global', $is_relationship =
   if (!$id) {
     $id = $name;
   }
-
   
   // get the accession and the database from the cvterm id
   if ($dbname) {
@@ -431,11 +437,11 @@ function tripal_cv_add_cvterm($term, $defaultcv = '_global', $is_relationship =
   // the cvterm table has two unique dependencies. We need to check both.
   // first check the (name, cv_id, is_obsolete) constraint
   $values = array(
-     'name' => $name,
-     'is_obsolete' => $is_obsolete,
-     'cv_id' => array(
-       'name' => $cvname,
-     ),
+    'name' => $name,
+    'is_obsolete' => $is_obsolete,
+    'cv_id' => array(
+      'name' => $cvname,
+    ),
   );
   $options = array('statement_name' => 'sel_cvterm_c1');
   $result = tripal_core_chado_select('cvterm', array('*'), $values, $options);
@@ -614,3 +620,41 @@ function tripal_cv_add_cvterm($term, $defaultcv = '_global', $is_relationship =
 }
 
 
+/**
+ * This function defines the custom tables that will be created 
+ * in the chado schema.
+ *
+ * @ingroup tripal_cv
+ */
+function tripal_cv_get_custom_tables($table = NULL) {
+
+ if (!$table or strcmp($table, 'tripal_obo_temp')==0) {
+    $schema['tripal_obo_temp'] = array(
+      'table' => 'tripal_obo_temp',
+      'fields' => array(
+        'id' => array(
+          'type' => 'varchar',
+          'length' => '255',
+          'not null' => TRUE,
+        ),
+        'stanza' => array(
+          'type' => 'text',
+          'not null' => TRUE,
+        ),
+        'type' => array(
+          'type' => 'varchar',
+          'length' => '50',
+          'not null' => TRUE,
+        ),
+      ),
+      'indexes' => array(
+        'tripal_obo_temp_idx0' => array('id'),
+        'tripal_obo_temp_idx0' => array('type'),
+      ),
+      'unique keys' => array(
+        'tripal_obo_temp_uq0' => array('id'),
+      ),
+    );
+  }
+  return $schema;
+}

+ 174 - 126
tripal_cv/includes/obo_loader.inc

@@ -127,15 +127,37 @@ function tripal_cv_load_obo_add_ref($name, $path) {
  */
 function tripal_cv_load_obo_v1_2($file, $jobid = NULL, &$newcvs) {
   
-  // we need to get a persistent connection.  If one exists this function
-  // will not recreate it, but if not it will create one and store it in
-  // a Drupal variable for reuse later.
-  $connection = tripal_db_persistent_chado();
-
   $header = array();
-  $obo = array();
+    
+  // make sure our temporary table exists
+  $ret = array(); 
+  if (!db_table_exists('tripal_obo_temp')) { 
+    $schema = tripal_cv_get_custom_tables('tripal_obo_temp');  
+    $success = tripal_core_create_custom_table($ret, 'tripal_obo_temp', $schema['tripal_obo_temp']);
+    if (!$success) {
+      watchdog('T_obo_loader', "Cannot create temporary loading table", array(), WATCHDOG_ERROR); 
+      return;
+    } 
+  }
+  // empty the temp table
+  $sql = "DELETE FROM tripal_obo_temp";
+  chado_query($sql);
 
-  print "Opening File $file\n";  
+  // get a persistent connection
+  $connection = tripal_db_persistent_chado();
+  if (!$connection) {
+     print "A persistant connection was not obtained. Loading will be slow\n";
+  }
+          
+  // if we cannot get a connection then let the user know the loading will be slow
+  tripal_db_start_transaction();
+  if ($connection) {
+     print "\nNOTE: Loading of this OBO file is performed using a database transaction. \n" .
+           "If the load fails or is terminated prematurely then the entire set of \n" .
+           "insertions/updates is rolled back and will not be found in the database\n\n";
+  }
+
+  print "Step 1: Preloading File $file\n";  
 
   // make sure we have an 'internal' and a '_global' database
   if (!tripal_db_add_db('internal')) {
@@ -146,7 +168,7 @@ function tripal_cv_load_obo_v1_2($file, $jobid = NULL, &$newcvs) {
   }
 
   // parse the obo file
-  $default_db = tripal_cv_obo_parse($file, $obo, $header);
+  $default_db = tripal_cv_obo_parse($file, $header, $jobid);
 
   // add the CV for this ontology to the database
   $defaultcv = tripal_cv_add_cv($header['default-namespace'][0], '');
@@ -156,18 +178,24 @@ function tripal_cv_load_obo_v1_2($file, $jobid = NULL, &$newcvs) {
   $newcvs[$header['default-namespace'][0]] = $defaultcv->cv_id;
 
   // add any typedefs to the vocabulary first
-  
-  $typedefs = $obo['Typedef'];
-  foreach ($typedefs as $typedef) {
-    tripal_cv_obo_process_term($typedef, $defaultcv->name, $obo, 1, $newcvs, $default_db);
+  $sql = "
+    SELECT * FROM tripal_obo_temp
+    WHERE type = 'Typedef' 
+  ";
+  $typedefs = chado_query($sql);
+  while ($typedef = db_fetch_object($typedefs)) {
+    $term = unserialize(base64_decode($typedef->stanza));
+    tripal_cv_obo_process_term($term, $defaultcv->name, 1, $newcvs, $default_db);
   }
-  
+
   // next add terms to the vocabulary
-  $terms = $obo['Term'];
-  if (!tripal_cv_obo_process_terms($terms, $defaultcv->name, $obo, $jobid, $newcvs, $default_db)) {
+  print "\nStep 2: Loading terms...\n";  
+  if (!tripal_cv_obo_process_terms($defaultcv->name, $jobid, $newcvs, $default_db)) {
     tripal_cv_obo_quiterror('Cannot add terms from this ontology');
   }
 
+  // transaction is complete
+  tripal_db_commit_transaction();
   return;
 }
 
@@ -186,37 +214,36 @@ function tripal_cv_obo_quiterror($message) {
  *
  * @ingroup tripal_obo_loader
  */
-function tripal_cv_obo_process_terms($terms, $defaultcv, $obo, $jobid = NULL, &$newcvs, $default_db) {
+function tripal_cv_obo_process_terms($defaultcv, $jobid = NULL, &$newcvs, $default_db) {
 
   $i = 0;
-  $count = count($terms);
-  
-  // if the number of terms is zero then simply return.
-  // this can happen when an OBO file simply has typedef
-  // entries and no actual terms.
-  if($count == 0) {
-    return 1;
-  }
+
+  // iterate through each term from the OBO file and add it
+  $sql = "
+    SELECT * FROM tripal_obo_temp
+    WHERE type = 'Term' 
+    ORDER BY id
+  ";
+  $terms = chado_query($sql);
+  $count = pg_num_rows($terms);
   
   // calculate the interval for updates
   $interval = intval($count * 0.0001);
   if ($interval < 1) {
     $interval = 1;
   }
-
-  // iterate through each term from the OBO file and add it
-  print "Loading terms...\n";
-  foreach ($terms as $term) {
-
-    // update the job status every 1% terms
+  while($t = db_fetch_object($terms)) {
+    $term = unserialize(base64_decode($t->stanza));
+    
+    // update the job status every interval
     if ($jobid and $i % $interval == 0) {
-      $complete = ($i / $count) * 100;
-      tripal_job_set_progress($jobid, intval($complete)); 
-      printf("%d of %d records. (%0.2f%%) memory: %s bytes\r", $i, $count, $complete, number_format(memory_get_usage()));                                                             
+      $complete = ($i / $count) * 50;
+      tripal_job_set_progress($jobid + 50, intval($complete)); 
+      printf("%d of %d records. (%0.2f%%) Memory: %s bytes\r", $i, $count, $complete * 2, number_format(memory_get_usage()));                                                             
     }                                 
     
     // add/update this term
-    if (!tripal_cv_obo_process_term($term, $defaultcv, $obo, 0, $newcvs, $default_db)) {
+    if (!tripal_cv_obo_process_term($term, $defaultcv, 0, $newcvs, $default_db)) {
       tripal_cv_obo_quiterror("Failed to process terms from the ontology");
     }
 
@@ -225,9 +252,9 @@ function tripal_cv_obo_process_terms($terms, $defaultcv, $obo, $jobid = NULL, &$
   
   // set the final status
   if ($jobid) {
-    $complete = ($i / $count) * 100;
-    tripal_job_set_progress($jobid, intval($complete)); 
-    printf("%d of %d records. (%0.2f%%) memory: %d\r", $i, $count, $complete, memory_get_usage());
+    $complete = ($i / $count) * 50;
+    tripal_job_set_progress($jobid + 50, intval($complete)); 
+    printf("%d of %d records. (%0.2f%%) Memory: %s bytes\r", $i, $count, $complete * 2, number_format(memory_get_usage()));
   }                                                             
   
   return 1;
@@ -237,39 +264,41 @@ function tripal_cv_obo_process_terms($terms, $defaultcv, $obo, $jobid = NULL, &$
  *
  * @ingroup tripal_obo_loader
  */
-function tripal_cv_obo_process_term($term, $defaultcv, $obo, $is_relationship = 0, &$newcvs, $default_db) {
+function tripal_cv_obo_process_term($term, $defaultcv, $is_relationship = 0, &$newcvs, $default_db) {
+ 
   // construct the term array for sending to the tripal_cv_add_cvterm function
   // for adding a new cvterm
   $t = array(); 
-  $t['id']            = $term['id'][0];
-  $t['name']          = $term['name'][0];
-  $t['def']           = $term['def'][0];
-  if (isset($term['subset'])) {
-    $t['subset']      = $term['subset'][0];  
+  $t['id'] = $term['id'][0];
+  $t['name'] = $term['name'][0];
+  if (array_key_exists('def', $term)) {
+    $t['def'] = $term['def'][0];
+  }
+  if (array_key_exists('subset', $term)) {
+    $t['subset'] = $term['subset'][0];  
   }  
-  if (isset($term['namespace'])) {
-    $t['namespace']   = $term['namespace'][0];
+  if (array_key_exists('namespace', $term)) {
+    $t['namespace'] = $term['namespace'][0];
   }
-  if (isset($term['is_obsolete'])) {
+  if (array_key_exists('is_obsolete', $term)) {
     $t['is_obsolete'] = $term['is_obsolete'][0];
   } 
   
   // add the cvterm
-  $cvterm = tripal_cv_add_cvterm($t, $defaultcv, $is_relationship, 1, $default_db);
+  $cvterm = tripal_cv_add_cvterm($t, $defaultcv, $is_relationship, 1, $default_db); 
   if (!$cvterm) {
     tripal_cv_obo_quiterror("Cannot add the term " . $term['id']);
-  }
-  
-  
-  if (isset($term['namespace'])) {
+  }  
+ 
+  if (array_key_exists('namespace', $term)) {
     $newcvs[$term['namespace'][0]] = $cvterm->cv_id;
   }
   
   // now handle other properites
-  if (isset($term['is_anonymous'])) {
+  if (array_key_exists('is_anonymous', $term)) {
     //print "WARNING: unhandled tag: is_anonymous\n";
   }
-  if (isset($term['alt_id'])) {
+  if (array_key_exists('alt_id', $term)) {
     foreach ($term['alt_id'] as $alt_id) {
       if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $alt_id)) {
         tripal_cv_obo_quiterror("Cannot add alternate id $alt_id");
@@ -277,11 +306,11 @@ function tripal_cv_obo_process_term($term, $defaultcv, $obo, $is_relationship =
     }
   }
   
-  if (isset($term['subset'])) {
+  if (array_key_exists('subset', $term)) {
     //print "WARNING: unhandled tag: subset\n";
   }
   // add synonyms for this cvterm
-  if (isset($term['synonym'])) {    
+  if (array_key_exists('synonym', $term)) {    
     if (!tripal_cv_obo_add_synonyms($term, $cvterm)) {
       tripal_cv_obo_quiterror("Cannot add synonyms");
     }
@@ -289,20 +318,20 @@ function tripal_cv_obo_process_term($term, $defaultcv, $obo, $is_relationship =
 
   // reformat the deprecated 'exact_synonym, narrow_synonym, and broad_synonym'
   // types to be of the v1.2 standard
-  if (isset($term['exact_synonym']) or isset($term['narrow_synonym']) or isset($term['broad_synonym'])) {
-    if (isset($term['exact_synonym'])) {
+  if (array_key_exists('exact_synonym', $term) or array_key_exists('narrow_synonym', $term) or array_key_exists('broad_synonym', $term)) {
+    if (array_key_exists('exact_synonym', $term)) {
       foreach ($term['exact_synonym'] as $synonym) {
         $new = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 EXACT $2', $synonym);
         $term['synonym'][] = $new;
       }
     }
-    if (isset($term['narrow_synonym'])) {
+    if (array_key_exists('narrow_synonym', $term)) {
       foreach ($term['narrow_synonym'] as $synonym) {
         $new = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 NARROW $2', $synonym);
         $term['synonym'][] = $new;
       }
     }
-    if (isset($term['broad_synonym'])) {
+    if (array_key_exists('broad_synonym', $term)) {
       foreach ($term['broad_synonym'] as $synonym) {
         $new = preg_replace('/^\s*(\".+?\")(.*?)$/', '$1 BROAD $2', $synonym);
         $term['synonym'][] = $new;
@@ -315,7 +344,7 @@ function tripal_cv_obo_process_term($term, $defaultcv, $obo, $is_relationship =
   }
   
   // add the comment to the cvtermprop table
-  if (isset($term['comment'])) {
+  if (array_key_exists('comment', $term)) {
     $comments = $term['comment'];
     $j = 0;
     foreach ($comments as $comment) {
@@ -325,9 +354,9 @@ function tripal_cv_obo_process_term($term, $defaultcv, $obo, $is_relationship =
       $j++;
     }
   }
-  
+
   // add any other external dbxrefs
-  if (isset($term['xref'])) {
+  if (array_key_exists('xref', $term)) {
     foreach ($term['xref'] as $xref) {
       if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref)) {
         tripal_cv_obo_quiterror("Cannot add/update cvterm database reference (dbxref).");
@@ -335,14 +364,14 @@ function tripal_cv_obo_process_term($term, $defaultcv, $obo, $is_relationship =
     }
   }
   
-  if (isset($term['xref_analog'])) {
+  if (array_key_exists('xref_analog', $term)) {
     foreach ($term['xref_analog'] as $xref) {
       if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref)) {
         tripal_cv_obo_quiterror("Cannot add/update cvterm database reference (dbxref).");
       }
     }
   }
-  if (isset($term['xref_unk'])) {
+  if (array_key_exists('xref_unk', $term)) {
     foreach ($term['xref_unk'] as $xref) {
       if (!tripal_cv_obo_add_cvterm_dbxref($cvterm, $xref)) {
         tripal_cv_obo_quiterror("Cannot add/update cvterm database reference (dbxref).");
@@ -351,42 +380,42 @@ function tripal_cv_obo_process_term($term, $defaultcv, $obo, $is_relationship =
   }
 
   // add is_a relationships for this cvterm
-  if (isset($term['is_a'])) {
+  if (array_key_exists('is_a', $term)) {
     foreach ($term['is_a'] as $is_a) {
-      if (!tripal_cv_obo_add_relationship($cvterm, $defaultcv, $obo, 'is_a', $is_a, $is_relationship, $default_db)) {
+      if (!tripal_cv_obo_add_relationship($cvterm, $defaultcv, 'is_a', $is_a, $is_relationship, $default_db)) {
         tripal_cv_obo_quiterror("Cannot add relationship is_a: $is_a");
       }
     }
   }
-  
-  if (isset($term['intersection_of'])) {
+
+  if (array_key_exists('intersection_of', $term)) {
     //print "WARNING: unhandled tag: intersection_of\n";
   }
-  if (isset($term['union_of'])) {
+  if (array_key_exists('union_of', $term)) {
     //print "WARNING: unhandled tag: union_on\n";
   }
-  if (isset($term['disjoint_from'])) {
+  if (array_key_exists('disjoint_from', $term)) {
     //print "WARNING: unhandled tag: disjoint_from\n";
   }
-  if (isset($term['relationship'])) {
+  if (array_key_exists('relationship', $term)) {
     foreach ($term['relationship'] as $value) {
       $rel = preg_replace('/^(.+?)\s.+?$/', '\1', $value);
       $object = preg_replace('/^.+?\s(.+?)$/', '\1', $value);
-      if (!tripal_cv_obo_add_relationship($cvterm, $defaultcv, $obo, $rel, $object, $is_relationship, $default_db)) {
+      if (!tripal_cv_obo_add_relationship($cvterm, $defaultcv, $rel, $object, $is_relationship, $default_db)) {
         tripal_cv_obo_quiterror("Cannot add relationship $rel: $object");
       }
     }
   }
-  if (isset($term['replaced_by'])) {
+  if (array_key_exists('replaced_by', $term)) {
    //print "WARNING: unhandled tag: replaced_by\n";
   }
-  if (isset($term['consider'])) {
+  if (array_key_exists('consider', $term)) {
     //print "WARNING: unhandled tag: consider\n";
   }
-  if (isset($term['use_term'])) {
+  if (array_key_exists('use_term', $term)) {
     //print "WARNING: unhandled tag: user_term\n";
   }
-  if (isset($term['builtin'])) {
+  if (array_key_exists('builtin', $term)) {
     //print "WARNING: unhandled tag: builtin\n";
   }
   return 1;
@@ -397,7 +426,7 @@ function tripal_cv_obo_process_term($term, $defaultcv, $obo, $is_relationship =
  *
  * @ingroup tripal_obo_loader
  */
-function tripal_cv_obo_add_relationship($cvterm, $defaultcv, $obo, $rel, 
+function tripal_cv_obo_add_relationship($cvterm, $defaultcv, $rel, 
   $objname, $object_is_relationship = 0, $default_db = 'OBO_REL') {
 
   // make sure the relationship cvterm exists
@@ -425,7 +454,7 @@ function tripal_cv_obo_add_relationship($cvterm, $defaultcv, $obo, $rel,
   }
 
   // get the object term
-  $oterm = tripal_cv_obo_get_term($obo, $objname);
+  $oterm = tripal_cv_obo_get_term($objname);
   if (!$oterm) {
     tripal_cv_obo_quiterror("Could not find object term $objname\n");
   }
@@ -433,14 +462,16 @@ function tripal_cv_obo_add_relationship($cvterm, $defaultcv, $obo, $rel,
   $objterm = array(); 
   $objterm['id']            = $oterm['id'][0];
   $objterm['name']          = $oterm['name'][0];
-  $objterm['def']           = $oterm['def'][0];
-  if (isset($oterm['subset'])) {
+  if (array_key_exists('def', $oterm)) {
+    $objterm['def']           = $oterm['def'][0];
+  }
+  if (array_key_exists('subset', $oterm)) {
     $objterm['subset']      = $oterm['subset'][0];  
   }  
-  if (isset($oterm['namespace'])) {
+  if (array_key_exists('namespace', $oterm)) {
     $objterm['namespace']   = $oterm['namespace'][0];
   }
-  if (isset($oterm['is_obsolete'])) {
+  if (array_key_exists('is_obsolete', $oterm)) {
     $objterm['is_obsolete'] = $oterm['is_obsolete'][0];
   }
   $objcvterm = tripal_cv_add_cvterm($objterm, $defaultcv, $object_is_relationship, 1, $default_db);  
@@ -474,26 +505,14 @@ function tripal_cv_obo_add_relationship($cvterm, $defaultcv, $obo, $rel,
  *
  * @ingroup tripal_obo_loader
  */
-function tripal_cv_obo_get_term($obo, $id) {
-  /*
-  foreach ($obo as $type) {
-    foreach ($type as $term) {
-      $accession = $term['id'][0];
-      if (strcmp($accession, $id)==0) {
-        return $term;
-      }
-    }
-  } */
-  // iterate through each of the types in the
-  // obo file (parsed into memory) and look
-  // for this term.  If found, return it.
-  foreach ($obo as $type) {
-    if (array_key_exists($id, $type)) {
-      return $type[$id];
-    }
+function tripal_cv_obo_get_term($id) {
+  $values = array('id' => $id);
+  $options = array('statement_name' => 'sel_tripalobotemp_id');
+  $result = tripal_core_chado_select('tripal_obo_temp', array('stanza'), $values, $options);
+  if (count($result) == 0) {
+    return FALSE;
   }
-
-  return FALSE;
+  return unserialize(base64_decode($result[0]->stanza));
 }
 
 /**
@@ -506,7 +525,7 @@ function tripal_cv_obo_add_synonyms($term, $cvterm) {
   $syncv = tripal_cv_add_cv('synonym_type', 'A vocabulary added by the Tripal CV module OBO loader for storing synonym types.');
 
   // now add the synonyms
-  if (isset($term['synonym'])) {
+  if (array_key_exists('synonym', $term)) {
     foreach ($term['synonym'] as $synonym) {
       
       // separate out the synonym definition and the synonym type
@@ -593,20 +612,41 @@ function tripal_cv_obo_add_synonyms($term, $cvterm) {
  *
  * @ingroup tripal_obo_loader
  */
-function tripal_cv_obo_parse($obo_file, &$obo, &$header) {
-  $i = 0;
+function tripal_cv_obo_parse($obo_file, &$header, $jobid) {
   $in_header = 1;
   $stanza = array();
   $default_db = '_global';
+  $line_num = 0;
+  $num_read = 0;
+  $intv_read = 0;
+  
+  $filesize = filesize($obo_file); 
+  $interval = intval($filesize * 0.01);
+  if ($interval < 1) {
+    $interval = 1;
+  } 
 
   // iterate through the lines in the OBO file and parse the stanzas
   $fh = fopen($obo_file, 'r');
   while ($line = fgets($fh)) {
-    $i++;
-
+    
+    $line_num++;
+    $size = drupal_strlen($line);
+    $num_read += $size;
+    $intv_read += $size; 
+    $line = trim($line);      
+
+    // update the job status every 1% features
+    if ($jobid and $intv_read >= $interval) {            
+      $percent = sprintf("%.2f", ($num_read / $filesize) * 100);
+      print "Parsing Line $line_num (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes.\r";
+      tripal_job_set_progress($jobid, intval(($num_read / $filesize) * 50));
+      $intv_read = 0;      
+    }
+    
     // remove newlines
     $line = rtrim($line);
-
+        
     // remove any special characters that may be hiding
     $line = preg_replace('/[^(\x20-\x7F)]*/', '', $line);
 
@@ -618,19 +658,25 @@ function tripal_cv_obo_parse($obo_file, &$obo, &$header) {
     //remove comments from end of lines
     $line = preg_replace('/^(.*?)\!.*$/', '\1', $line);  // TODO: if the explamation is escaped
 
-    if (preg_match('/^\s*\[/', $line)) {  // at the first stanza we're out of header
+    // at the first stanza we're out of header
+    if (preg_match('/^\s*\[/', $line)) {  
       $in_header = 0;
+
       // store the stanza we just finished reading
       if (sizeof($stanza) > 0) {
-        if (!isset($obo[$type])) {
-          $obo[$type] = array();
-        }
-        if (!isset($obo[$type][$stanza['id'][0]])) {
-          $obo[$type][$stanza['id'][0]] = $stanza;
-        }
-        else {
-          array_merge($obo[$type][$stanza['id'][0]], $stanza);
+        // add the term to the temp table
+        $values = array(
+          'id' => $stanza['id'][0],
+          'stanza' => base64_encode(serialize($stanza)),
+          'type' => $type,
+        );
+        $options = array('statement_name' => 'ins_tripalobotemp_all');
+        $success = tripal_core_chado_insert('tripal_obo_temp', $values, $options);
+        if (!$success) {
+          watchdog('T_obo_loader', "ERROR: Cannot insert stanza into temporary table.", array(), 'error');
+          exit;
         }
+        
       }
       // get the stanza type:  Term, Typedef or Instance
       $type = preg_replace('/^\s*\[\s*(.+?)\s*\]\s*$/', '\1', $line);
@@ -654,13 +700,13 @@ function tripal_cv_obo_parse($obo_file, &$obo, &$header) {
     $tag = preg_replace("/\|-\|-\|/", "\:", $tag); // return the escaped colon
     $value = preg_replace("/\|-\|-\|/", "\:", $value);
     if ($in_header) {
-      if (!isset($header[$tag])) {
+      if (!array_key_exists($tag, $header)) {
         $header[$tag] = array();
       }
       $header[$tag][] = $value;
     }
     else {
-      if (!isset($stanza[$tag])) {
+      if (!array_key_exists($tag, $stanza)) {
         $stanza[$tag] = array();
       }
       $stanza[$tag][] = $value;
@@ -668,14 +714,16 @@ function tripal_cv_obo_parse($obo_file, &$obo, &$header) {
   }
   // now add the last term in the file
   if (sizeof($stanza) > 0) {
-    if (!isset($obo[$type])) {
-      $obo[$type] = array();
-    }
-    if (!isset($obo[$type][$stanza['id'][0]])) {
-      $obo[$type][$stanza['id'][0]] = $stanza;
-    }
-    else {
-      array_merge($obo[$type][$stanza['id'][0]], $stanza);
+    $values = array(
+      'id' => $stanza['id'][0],
+      'stanza' => base64_encode(serialize($stanza)),
+      'type' => $type,
+    );
+    $options = array('statement_name' => 'ins_tripalobotemp_all');
+    tripal_core_chado_insert('tripal_obo_temp', $values, $options);
+    if (!$success) {
+      watchdog('T_obo_loader', "ERROR: Cannot insert stanza into temporary table.", array(), 'error');
+      exit;
     }
   }
   return $default_db;

+ 239 - 62
tripal_feature/includes/gff_loader.inc

@@ -27,7 +27,6 @@ function tripal_feature_gff3_load_form() {
                            installation (e.g. /sites/default/files/xyz.gff).  The path must be accessible to the
                            server on which this Drupal instance is running.'),
     '#required' => TRUE,
-    '#weight'        => 1
   );
   // get the list of organisms
   $sql = "SELECT * FROM {organism} ORDER BY genus, species";
@@ -44,10 +43,32 @@ function tripal_feature_gff3_load_form() {
     '#required'    => TRUE,
     '#options'     => $organisms,
   );
+  
+  // get the list of analyses
+  $sql = "SELECT * FROM {analysis} ORDER BY name";
+  $org_rset = chado_query($sql);
+  $analyses = array();
+  $analyses[''] = '';
+  while ($analysis = db_fetch_object($org_rset)) {
+    $analyses[$analysis->analysis_id] = "$analysis->name ($analysis->program $analysis->programversion, $analysis->sourcename)";
+  }
+  $form['analysis_id'] = array(
+   '#title'       => t('Analysis'),
+   '#type'        => t('select'),
+   '#description' => t("Choose the analysis to which these features are associated. 
+       Why specify an analysis for a data load?  All data comes
+       from some place, even if downloaded from Genbank. By specifying
+       analysis details for all data imports it allows an end user to reproduce the
+       data set, but at least indicates the source of the data."),
+   '#required'    => TRUE,
+   '#options'     => $analyses,
+  );
+  
+  
+  
   $form['import_options'] = array(
     '#type' => 'fieldset',
     '#title' => t('Import Options'),
-    '#weight' => 6,
     '#collapsed' => TRUE
   );
   $form['import_options']['use_transaction']= array(
@@ -58,7 +79,6 @@ function tripal_feature_gff3_load_form() {
       the entire datset loaded prior to the failure will be rolled back and will not be available
       in the database.  If this option is unchecked and failure occurs all records up to the point
       of failure will be present in the database.'),
-    '#weight' => 1
   );
   $form['import_options']['add_only']= array(
     '#type' => 'checkbox',
@@ -66,7 +86,6 @@ function tripal_feature_gff3_load_form() {
     '#required' => FALSE,
     '#description' => t('The job will skip features in the GFF file that already
                          exist in the database and import only new features.'),
-    '#weight' => 2
   );
   $form['import_options']['update']= array(
     '#type' => 'checkbox',
@@ -76,7 +95,6 @@ function tripal_feature_gff3_load_form() {
     '#description' => t('Existing features will be updated and new features will be added.  Attributes
                          for a feature that are not present in the GFF but which are present in the
                          database will not be altered.'),
-    '#weight' => 3
   );
   $form['import_options']['refresh']= array(
     '#type' => 'checkbox',
@@ -84,7 +102,6 @@ function tripal_feature_gff3_load_form() {
     '#required' => FALSE,
     '#description' => t('Existing features will be updated and feature properties not
                          present in the GFF file will be removed.'),
-    '#weight' => 4
   );
   $form['import_options']['remove']= array(
     '#type' => 'checkbox',
@@ -92,37 +109,51 @@ function tripal_feature_gff3_load_form() {
     '#required' => FALSE,
     '#description' => t('Features present in the GFF file that exist in the database
                          will be removed rather than imported'),
-    '#weight' => 5
   );
 
-  $form['analysis'] = array(
+  $form['targets'] = array(
     '#type' => 'fieldset',
-    '#title' => t('Analysis Used to Derive Features'),
-    '#weight' => 6,
+    '#title' => t('Targets'),
     '#collapsed' => TRUE
   );
-  $form['analysis']['desc'] = array(
+  $form['targets']['adesc'] = array(
     '#type' => 'markup',
-    '#value' => t("Why specify an analysis for a data load?  All data comes
-       from some place, even if downloaded from Genbank. By specifying
-       analysis details for all data uploads, it allows an end user to reproduce the
-       data set, but at least indicates the source of the data."),
+    '#value' => t("When alignments are represented in the GFF file (e.g. such as 
+       alignments of cDNA sequences to a whole genome, or blast matches), they are
+       represented using two feature types: 'match' (or cDNA_match, EST_match, etc.) 
+       and 'match_part'.  These features may also have a 'Target' attribute to
+       specify the sequence that is being aligned.  
+       However, the organism to which the aligned sequence belongs may not be present in the
+       GFF file.  Here you can specify the organism and feature type of the target sequences.
+       The options here will apply to all targets unless the organism and type are explicity
+       set in the GFF file using the 'target_organism' and 'target_type' attributes."),
   );
-
-  // get the list of analyses
-  $sql = "SELECT * FROM {analysis} ORDER BY name";
-  $org_rset = chado_query($sql);
-  $analyses = array();
-  $analyses[''] = '';
-  while ($analysis = db_fetch_object($org_rset)) {
-    $analyses[$analysis->analysis_id] = "$analysis->name ($analysis->program $analysis->programversion, $analysis->sourcename)";
-  }
-  $form['analysis']['analysis_id'] = array(
-   '#title'       => t('Analysis'),
-   '#type'        => t('select'),
-   '#description' => t("Choose the analysis to which these features are associated"),
-   '#required'    => TRUE,
-   '#options'     => $analyses,
+  $form['targets']['target_organism_id'] = array(
+    '#title'       => t('Target Organism'),
+    '#type'        => t('select'),
+    '#description' => t("Optional. Choose the organism to which target sequences belong. 
+      Select this only if target sequences belong to a different organism than the 
+      one specified above. And only choose an organism here if all of the target sequences 
+      belong to the same species.  If the targets in the GFF file belong to multiple 
+      different species then the organism must be specified using the 'target_organism=genus,species' 
+      attribute in the GFF file."),
+    '#options'     => $organisms,
+  );
+  $form['targets']['target_type'] = array(
+    '#title'       => t('Target Type'),
+    '#type'        => t('textfield'),
+    '#description' => t("Optional. If the unique name for a target sequence is not unique (e.g. a protein
+       and an mRNA have the same name) then you must specify the type for all targets in the GFF file. If 
+       the targets are of different types then the type must be specified using the 'target_type=type' attribute
+       in the GFF file. This must be a valid Sequence Ontology (SO) term."),
+  );
+  $form['targets']['create_target']= array(
+    '#type' => 'checkbox',
+    '#title' => t('Create Target'),
+    '#required' => FALSE,
+    '#description' => t("If the target feature cannot be found, create one using the organism and type specified above, or
+       using the 'target_organism' and 'target_type' fields specified in the GFF file.  Values specified in the
+       GFF file take precedence over those specified above."),
   );
 
   $form['button'] = array(
@@ -143,6 +174,9 @@ function tripal_feature_gff3_load_form_validate($form, &$form_state) {
 
   $gff_file = $form_state['values']['gff_file'];
   $organism_id = $form_state['values']['organism_id'];
+  $target_organism_id = $form_state['values']['target_organism_id'];
+  $target_type = $form_state['values']['target_type'];
+  $create_target = $form_state['values']['create_target'];
   $add_only = $form_state['values']['add_only'];
   $update   = $form_state['values']['update'];
   $refresh  = $form_state['values']['refresh'];
@@ -169,6 +203,7 @@ function tripal_feature_gff3_load_form_validate($form, &$form_state) {
       ($remove   AND ($update   OR $refresh  OR $add_only))) {
       form_set_error('add_only', t("Please select only one checkbox from the import options section"));
   }
+    
 }
 
 /**
@@ -186,9 +221,14 @@ function tripal_feature_gff3_load_form_submit($form, &$form_state) {
   $remove   = $form_state['values']['remove'];
   $analysis_id = $form_state['values']['analysis_id'];
   $use_transaction   = $form_state['values']['use_transaction'];
-
+  $target_organism_id = $form_state['values']['target_organism_id'];
+  $target_type = $form_state['values']['target_type'];
+  $create_target = $form_state['values']['create_target'];
+  
   $args = array($gff_file, $organism_id, $analysis_id, $add_only, 
-    $update, $refresh, $remove, $use_transaction);
+    $update, $refresh, $remove, $use_transaction, $target_organism_id, 
+    $target_type, $create_target);
+    
   $type = '';
   if ($add_only) {
     $type = 'import only new features';
@@ -216,6 +256,7 @@ function tripal_feature_gff3_load_form_submit($form, &$form_state) {
  */
 function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id, 
   $add_only =0, $update = 0, $refresh = 0, $remove = 0, $use_transaction = 1, 
+  $target_organism_id = NULL, $target_type = NULL,  $create_target = 0, 
   $job = NULL) {  
 
   // make sure our temporary table exists
@@ -293,7 +334,7 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
   $sql = "SELECT * FROM organism WHERE organism_id = %d";
   $organism = db_fetch_object(chado_query($sql, $organism_id));
 
-  $interval = intval($filesize * 0.01);
+  $interval = intval($filesize * 0.0001);
   if ($interval == 0) {
     $interval = 1;
   }
@@ -326,9 +367,10 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
   while ($line = fgets($fh)) {
 
     $line_num++;
-    $num_read += drupal_strlen($line);   
-    $intv_read += $num_read; 
-
+    $size = drupal_strlen($line);
+    $num_read += $size;
+    $intv_read += $size; 
+    
     // update the job status every 1% features
     if ($job and $intv_read >= $interval) {
       $intv_read = 0;
@@ -417,7 +459,7 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
     $attr_is_analysis = 'f';
     $attr_others = '';
     $residues = '';
-    
+
     foreach ($attrs as $attr) {
       $attr = rtrim($attr);
       $attr = ltrim($attr);
@@ -458,7 +500,8 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
               strcmp($tag_name, 'Target') !=0       and strcmp($tag_name, 'Gap') !=0 and
               strcmp($tag_name, 'Derives_from') !=0 and strcmp($tag_name, 'Note') !=0 and
               strcmp($tag_name, 'Dbxref') !=0       and strcmp($tag_name, 'Ontology_term') !=0 and
-              strcmp($tag_name, 'Is_circular') !=0) {
+              strcmp($tag_name, 'Is_circular') !=0  and strcmp($tag_name, 'target_organism') !=0 and
+              strcmp($tag_name, 'target_type') != 0) {
         foreach ($tags[$tag_name] as $value) {
           $attr_others[$tag_name][] = $value;
         }
@@ -563,7 +606,7 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
           $result = tripal_core_chado_insert('tripal_gff_temp', $values, $options);
           if (!$result) {
             watchdog('T_gff3_loader', "Cound not save record in temporary table, Cannot continue.", array(), WATCHDOG_ERROR);
-            return;
+            exit;
           }
         }
 
@@ -596,6 +639,11 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
           // format is: "target_id start end [strand]", where strand is optional and may be "+" or "-"
           $matched = preg_match('/^(.*?)\s+(\d+)\s+(\d+)(\s+[\+|\-])*$/', trim($tags['Target'][0]), $matches);
           
+          // the organism and type of the target may also be specified as an attribute. If so, then get that
+          // information
+          $gff_target_organism = array_key_exists('target_organism', $tags) ? $tags['target_organism'][0] : '';
+          $gff_target_type = array_key_exists('target_type', $tags) ? $tags['target_type'][0] : '';
+          
           // if we have matches and the Target is in the correct format then load the alignment 
           if ($matched) {
             $target_feature = $matches[1]; 
@@ -603,10 +651,10 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
             $end = $matches[3]; 
             // if we have an optional strand, convert it to a numeric value. 
             if ($matches[4]) {
-              if (preg_match('/^+$/', trim($matches[4]))) {
+              if (preg_match('/^\+$/', trim($matches[4]))) {
                 $target_strand = 1;
               }
-              elseif (preg_match('/^-$/', trim($matches[4]))) {
+              elseif (preg_match('/^\-$/', trim($matches[4]))) {
                 $target_strand = -1;
               }
               else {
@@ -624,10 +672,81 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
               $target_fmax = $start;
             }
             
-            #print "Target: $target_feature, $target_fmin-$target_fmax $target_dir\n";
-            tripal_feature_load_gff3_featureloc($feature, $organism,
-              $target_feature, $target_fmin, $target_fmax, $target_strand, $phase, $attr_fmin_partial,
-              $attr_fmax_partial, $attr_residue_info, $attr_locgroup);
+            // default the target organism to be the value passed into the function, but if the GFF
+            // file species the target organism then use that instead.
+            $t_organism_id = $target_organism_id;
+            if ($gff_target_organism) {
+              // get the genus and species
+              $success = preg_match('/^(.*?),(.*?)$/', $gff_target_organism, $matches);
+              if ($success) {
+                $values = array(
+                  'genus' => $matches[1],
+                  'species' => $matches[2],
+                );
+                $options = array('statement_name' => 'sel_organism_gesp');
+                $organism = tripal_core_chado_select('organism', array('organism_id'), $values, $options);
+                if (count($organism) == 1) {
+                  $t_organism_id = $organism[0]->organism_id;
+                }
+                else {
+                  watchdog('T_gff3_loader', "Cannot find organism for target %target.", 
+                    array('%target' => $gff_target_organism), WATCHDOG_WARNING);
+                  $t_organism_id = '';                                   
+                }
+              }
+              else {
+                watchdog('T_gff3_loader', "The target_organism attribute is improperly formatted: %target. 
+                  It should be target_organism=genus,species.", 
+                  array('%target' => $gff_target_organism), WATCHDOG_WARNING);
+                $t_organism_id = '';                
+              }
+            }  
+
+            // default the target type to be the value passed into the function, but if the GFF file
+            // species the target type then use that instead
+            $t_type_id = '';
+            if ($target_type) {
+              $values = array(
+                'name' => $target_type,
+                'cv_id' => array(
+                   'name' => 'sequence',
+                )
+              );
+              $options = array('statement_name' => 'sel_cvterm_nacv');
+              $type = tripal_core_chado_select('cvterm', array('cvterm_id'), $values, $options);
+              if (count($type) == 1) {
+                $t_type_id = $type[0]->cvterm_id;
+              }
+              else {
+                watchdog('T_gff3_loader', "The target type does not exist in the sequence ontology: %type. ", 
+                  array('%type' => $target_type), WATCHDOG_ERROR);
+                exit;  
+              }
+            }
+            if ($gff_target_type) {
+              $values = array(
+                'name' => $gff_target_type,
+                'cv_id' => array(
+                   'name' => 'sequence',
+                )
+              );
+              $options = array('statement_name' => 'sel_cvterm_nacv');
+              $type = tripal_core_chado_select('cvterm', array('cvterm_id'), $values, $options);
+              if (count($type) == 1) {
+                $t_type_id = $type[0]->cvterm_id;
+              }
+              else {
+                watchdog('T_gff3_loader', "The target_type attribute does not exist in the sequence ontology: %type. ", 
+                  array('%type' => $gff_target_type), WATCHDOG_WARNING);
+                $t_type_id = '';
+              }
+            }                       
+            
+            // we want to add a featureloc record that uses the target feature as the srcfeature (landmark)
+            // and the landmark as the feature.
+            tripal_feature_load_gff3_featureloc($feature, $organism, $target_feature, $target_fmin, 
+              $target_fmax, $target_strand, $phase, $attr_fmin_partial, $attr_fmax_partial, $attr_residue_info, 
+              $attr_locgroup, $t_type_id, $t_organism_id, $create_target, TRUE); 
           }
           // the target attribute is not correctly formatted
           else {
@@ -1385,36 +1504,92 @@ function tripal_feature_load_gff3_feature($organism, $analysis_id, $cvterm, $uni
  * @ingroup gff3_loader
  */
 function tripal_feature_load_gff3_featureloc($feature, $organism, $landmark, $fmin,
-  $fmax, $strand, $phase, $is_fmin_partial, $is_fmax_partial, $residue_info, $locgroup) {
+  $fmax, $strand, $phase, $is_fmin_partial, $is_fmax_partial, $residue_info, $locgroup, 
+  $landmark_type_id = '', $landmark_organism_id = '', $create_landmark = 0, $landmark_is_target = 0) {
 
   $select = array(
-    'organism_id' => $organism->organism_id,
+    'organism_id' => $landmark_organism_id ? $landmark_organism_id : $organism->organism_id,
     'uniquename' => $landmark,
   );
-  $options = array('statement_name' => 'sel_feature_orun');  
+  $options = array('statement_name' => 'sel_feature_orun');
+  if ($landmark_type_id) {
+    $select['type_id'] = $landmark_type_id;
+    $options = array('statement_name' => 'sel_feature_orunty');
+  }  
   $results = tripal_core_chado_select('feature', array('feature_id'), $select, $options);
   
+  $srcfeature = '';
   if (count($results)==0) {
     // so we couldn't find the landmark using the uniquename. Let's try the 'name'.
-    // if we return only a singe result then we can proceed. Otherwise give an
-    // error message
+    // if we return only a single result then we can proceed. Otherwise give an
     $select = array(
-      'organism_id' => $organism->organism_id,
+      'organism_id' => $landmark_organism_id ? $landmark_organism_id : $organism->organism_id,
       'name' => $landmark,
     );
-    $options = array('statement_name' => 'sel_feature_orna');
+    $options = array('statement_name' => 'sel_feature_orna');    
+    if ($landmark_type_id) {
+      $select['type_id'] = $landmark_type_id;
+      $options = array('statement_name' => 'sel_feature_ornaty');
+    } 
     $results = tripal_core_chado_select('feature', array('feature_id'), $select, $options);
     if (count($results) == 0) {
-       watchdog("T_gff3_loader", "Cannot find landmark feature: '$landmark'.", array(), WATCHDOG_WARNING);
-       return 0;
+       // if the landmark is the target feature in a matched alignment then try one more time to
+       // find it by querying any feature with the same uniquename. If we find one then use it.
+       if ($landmark_is_target) {
+         $select = array('uniquename' => $landmark);
+         $options = array('statement_name' => 'sel_feature_un');
+         $results = tripal_core_chado_select('feature', array('feature_id'), $select, $options);
+         if (count($results) == 1) {
+           $srcfeature = $results[0]; 
+         }
+       }
+
+       if (!$srcfeature) {       
+         // we couldn't find the landmark feature, so if the user has requested we create it then do so
+         // but only if we have a type id
+         if ($create_landmark and $landmark_type_id) {
+            $values = array(
+              'organism_id' => $landmark_organism_id ? $landmark_organism_id : $organism->organism_id,
+              'name' => $landmark,
+              'uniquename' => $landmark,
+              'type_id' => $landmark_type_id
+            );
+            $options = array('statement_name' => 'ins_feature_ornaunty');
+            $results = tripal_core_chado_insert('feature', $values, $options);
+            if (!$results) {
+              watchdog("T_gff3_loader", "Cannot find landmark feature: '%landmark', nor could it be inserted", 
+                array('%landmark' => $landmark), WATCHDOG_WARNING);
+              return 0;  
+            }  
+            $srcfeature = new stdClass();
+            $srcfeature->feature_id = $results->feature_id;
+         } 
+         else {
+           watchdog("T_gff3_loader", "Cannot find unique landmark feature: '%landmark'.", 
+             array('%landmark' => $landmark), WATCHDOG_WARNING);
+           return 0;
+         } 
+       }        
     } 
     elseif (count($results) > 1) {
-       watchdog("T_gff3_loader", "multiple landmarks exist with the name: '$landmark'.  Cannot resolve which one to use. Cannot add the feature location record", 
-         array(), WATCHDOG_WARNING);
+       watchdog("T_gff3_loader", "multiple landmarks exist with the name: '%landmark'.  Cannot 
+         resolve which one to use. Cannot add the feature location record", 
+         array('%landmark' => $landmark), WATCHDOG_WARNING);
        return 0;    
-    }    
+    } 
+    else {
+      $srcfeature = $results[0];
+    }   
+  }
+  elseif (count($results) > 1) {
+    watchdog("T_gff3_loader", "multiple landmarks exist with the name: '%landmark'.  Cannot 
+      resolve which one to use. Cannot add the feature location record", 
+      array('%landmark' => $landmark), WATCHDOG_WARNING);
+    return 0;  
+  }
+  else {
+    $srcfeature = $results[0];
   }
-  $srcfeature = $results[0];
 
   // TODO: create an attribute that recognizes the residue_info,locgroup, 
   //  is_fmin_partial and is_fmax_partial, right now these are
@@ -1598,14 +1773,16 @@ function tripal_feature_load_gff_fasta($fh, $interval, &$num_read, &$intv_read,
   }
   $id = NULL;
   
-  // iterate through the remainig lines of the file
+  // iterate through the remaining lines of the file
   while ($line = fgets($fh)) {
     
     $line_num++;
-    $num_read += drupal_strlen($line);   
-    $intv_read += $num_read; 
+    $size = drupal_strlen($line);   
+    $num_read += $size;
+    $intv_read += $size; 
+    
     $line = trim($line);      
-
+    
     // update the job status every 1% features
     if ($job and $intv_read >= $interval) {
       $intv_read = 0;

+ 6 - 2
tripal_feature/tripal_feature.module

@@ -2638,8 +2638,7 @@ function tripal_feature_job_describe_args($callback, $args) {
     $new_args['Sequence Type'] = $args[2];
     $new_args['Name Match Type'] = $args[14];
     $new_args['Name RE'] = $args[4];
-    $new_args['Unique Name RE'] = $args[5];
-
+    $new_args['Unique Name RE'] = $args[5];   
 
     // add in the relationship arguments
     $new_args['Relationship Type'] = $args[8];
@@ -2694,6 +2693,11 @@ function tripal_feature_job_describe_args($callback, $args) {
     $new_args['Import all and update'] = ($args[4] == 1) ? "Yes" : "No";
     $new_args['Import all and replace'] = ($args[5] == 1) ? "Yes" : "No";
     $new_args['Delete features'] = ($args[6] == 1) ? "Yes" : "No";
+    $target_organism = tripal_core_chado_select('organism', array('genus', 'species'), array('organism_id' => $args[8]));
+    $new_args['Target organism'] = $target_organism[0]->genus . " " . $target_organism[0]->species;
+    $new_args['Target type'] = $args[9];
+    $new_args['Create target'] = ($args[10] == 1) ? "Yes" : "No";
+    
     
   }
   if ($callback == 'tripal_feature_sync_features') {