Stephen Ficklin 9 years ago
parent
commit
4c3219b361

+ 14 - 2
tripal_analysis/includes/tripal_analysis.chado_node.inc

@@ -38,6 +38,18 @@ function tripal_analysis_node_info() {
   return $nodes;
   return $nodes;
 }
 }
 
 
+/**
+ * Implements hook_chado_node_sync_form()
+ */
+function chado_analysis_chado_node_sync_form($form, &$form_state) {
+  if (array_key_exists('sync', $form) and array_key_exists('ids', $form['sync'])) {
+    $form['sync']['ids']['#prefix'] .= t('Please note that if analyses exist
+      that were created by other Tripal modules (e.g. Tripal Analysis Blast)
+      then you should sync those using that module\'s sync interface. Otherwise
+      they may not have all of the proper functionality.');
+  }
+  return $form;
+}
 /**
 /**
  * Implements hook_form().
  * Implements hook_form().
  * When editing or creating a new node of type 'chado_analysis' we need
  * When editing or creating a new node of type 'chado_analysis' we need
@@ -415,7 +427,7 @@ function chado_analysis_insert($node) {
     $year  = $time['year'];
     $year  = $time['year'];
     $timestamp = $month . '/' . $day . '/' . $year;
     $timestamp = $month . '/' . $day . '/' . $year;
 
 
-    // insert and then get the newly inserted analysis record
+    // Insert and then get the newly inserted analysis record
     $values = array(
     $values = array(
       'name'           => $node->analysisname,
       'name'           => $node->analysisname,
       'description'    => $node->description,
       'description'    => $node->description,
@@ -793,5 +805,5 @@ function chado_analysis_chado_node_default_title_format() {
  * Designates a default URL format for analysis nodes.
  * Designates a default URL format for analysis nodes.
  */
  */
 function chado_analysis_chado_node_default_url_format() {
 function chado_analysis_chado_node_default_url_format() {
-  return '/analysis/[analysis.program]/[analysis.programversion]/[analysis.sourcename]';
+  return '/analysis/[analysis.analysis_id]';
 }
 }

+ 253 - 203
tripal_core/api/tripal_core.chado_nodes.api.inc

@@ -352,7 +352,7 @@ function chado_add_node_form_subtable_ajax_update($form, &$form_state) {
     $module_name = 'tripal_example';
     $module_name = 'tripal_example';
 
 
     // the base specified in hook_node_info
     // the base specified in hook_node_info
-    $linking_table = 'chado_example';
+    $node_type = 'chado_example';
 
 
     // This menu item will be a tab on the admin/tripal/chado/tripal_example page
     // This menu item will be a tab on the admin/tripal/chado/tripal_example page
     // that is not selected by default
     // that is not selected by default
@@ -360,7 +360,7 @@ function chado_add_node_form_subtable_ajax_update($form, &$form_state) {
       'title' => ' Sync',
       'title' => ' Sync',
       'description' => 'Sync examples from Chado with Drupal',
       'description' => 'Sync examples from Chado with Drupal',
       'page callback' => 'drupal_get_form',
       'page callback' => 'drupal_get_form',
-      'page arguments' => array('chado_node_sync_form', $module_name, $linking_table),
+      'page arguments' => array('chado_node_sync_form', $module_name, $node_type),
       'access arguments' => array('administer tripal examples'),
       'access arguments' => array('administer tripal examples'),
       'type' => MENU_LOCAL_TASK,
       'type' => MENU_LOCAL_TASK,
       'weight' => 0
       'weight' => 0
@@ -380,8 +380,12 @@ function chado_add_node_form_subtable_ajax_update($form, &$form_state) {
 
 
         // this is what differs from the regular Drupal-documented hook_node_info()
         // this is what differs from the regular Drupal-documented hook_node_info()
         'chado_node_api' => array(
         'chado_node_api' => array(
-          'base_table' => 'example',            // the name of the chado base table
-          'hook_prefix' => 'chado_example',     // usually the name of the node type
+          'base_table' => 'example',            // The name of the chado base table
+          'hook_prefix' => 'chado_example',     // Usually the name of the node type
+          'linking_table' => 'chado_example',   // Specifies the linking table used
+                                                // to map records to Drupal nodes.
+                                                // if 'linking_table' is not specified
+                                                // it defaults to the node_type name.
           'record_type_title' => array(
           'record_type_title' => array(
             'singular' => t('Example'),         // Singular human-readable title
             'singular' => t('Example'),         // Singular human-readable title
             'plural' => t('Examples')           // Plural human-readable title
             'plural' => t('Examples')           // Plural human-readable title
@@ -414,9 +418,18 @@ function chado_node_sync_form($form, &$form_state) {
 
 
   if (isset($form_state['build_info']['args'][0])) {
   if (isset($form_state['build_info']['args'][0])) {
     $module = $form_state['build_info']['args'][0];
     $module = $form_state['build_info']['args'][0];
-    $linking_table = $form_state['build_info']['args'][1];
+    $node_type = $form_state['build_info']['args'][1];
     $node_info = call_user_func($module . '_node_info');
     $node_info = call_user_func($module . '_node_info');
-    $args = $node_info[$linking_table]['chado_node_api'];
+
+    // If a linking table is set in the node_info array then use that,
+    // otherwise ues the node_type as the linking table.
+    if (array_key_exists('linking_table', $node_info[$node_type]['chado_node_api'])) {
+      $linking_table = $node_info[$node_type]['chado_node_api']['linking_table'];
+    }
+    else {
+      $linking_table = 'chado_' . $node_info[$node_type]['chado_node_api']['base_table'];
+    }
+    $args = $node_info[$node_type]['chado_node_api'];
     $form_state['chado_node_api'] = $args;
     $form_state['chado_node_api'] = $args;
   }
   }
 
 
@@ -425,6 +438,11 @@ function chado_node_sync_form($form, &$form_state) {
     '#value' => $linking_table
     '#value' => $linking_table
   );
   );
 
 
+  $form['node_type'] = array(
+    '#type' => 'hidden',
+    '#value' => $node_type
+  );
+
   // define the fieldsets
   // define the fieldsets
   $form['sync'] = array(
   $form['sync'] = array(
     '#type' => 'fieldset',
     '#type' => 'fieldset',
@@ -498,20 +516,67 @@ function chado_node_sync_form($form, &$form_state) {
     if (array_key_exists('primary key', $table_info) and count($table_info['primary key']) == 1) {
     if (array_key_exists('primary key', $table_info) and count($table_info['primary key']) == 1) {
       $pkey = $table_info['primary key'][0];
       $pkey = $table_info['primary key'][0];
       $columns  = $args['sync_filters']['checkboxes'];
       $columns  = $args['sync_filters']['checkboxes'];
-      $select_cols = implode("|| ' ' ||", $columns);
-
-      // get non-synced records
-      $sql = "
-        SELECT BT.$pkey as id, $select_cols as value
-        FROM {" . $base_table . "} BT
-          LEFT JOIN public.$linking_table LT ON LT.$pkey = BT.$pkey
-        WHERE LT.$pkey IS NULL
-        ORDER BY value ASC
-      ";
-      $results = chado_query($sql);
+      $select_cols = '';
+      foreach ($columns as $column) {
+        $select_cols .= $base_table . '.' . $column . "|| ' ' ||";
+      }
+      // Remove trailing || ' ' ||
+      $select_cols = substr($select_cols, 0, -9);
+      $base_table_id  = $base_table . '_id';
+
+      $select = array($base_table . '.' . $pkey, $select_cols . ' as value');
+      $joins = array();
+      $where_clauses = array();
+      $where_args = array();
+
+      // Allow module to update the query.
+      $hook_query_alter = $node_type . '_chado_node_sync_select_query';
+      if (function_exists($hook_query_alter)) {
+        $update = call_user_func($hook_query_alter, array(
+          'select' => $select,
+          'joins' => $joins,
+          'where_clauses' => $where_clauses,
+          'where_args' => $where_args,
+        ));
+        // Now add in any new changes
+        if ($update and is_array($update)) {
+          $select = $update['select'];
+          $joins = $update['joins'];
+          $where_clauses = $update['where_clauses'];
+          $where_args = $update['where_args'];
+        }
+      }
+
+      // Build Query, we do a left join on the chado_xxxx table in the Drupal schema
+      // so that if no criteria are specified we only get those items that have not
+      // yet been synced.
+      $query = "SELECT " . implode(', ', $select) . ' ' .
+               'FROM {' . $base_table . '} ' . $base_table . ' ' . implode(' ', $joins) . ' '.
+               "  LEFT JOIN public.$linking_table CT ON CT.$base_table_id = $base_table.$base_table_id " .
+               "WHERE CT.$base_table_id IS NULL";
+
+      // extend the where clause if needed
+      $where = '';
+      $sql_args = array();
+      foreach ($where_clauses as $category => $items) {
+        $where .= ' AND (';
+        foreach ($items as $item) {
+          $where .= $item . ' OR ';
+        }
+        $where = substr($where, 0, -4); // remove the trailing 'OR'
+        $where .= ') ';
+        $sql_args = array_merge($sql_args, $where_args[$category]);
+      }
+
+      if ($where) {
+        $query .= $where;
+      }
+      $query .= " ORDER BY $base_table." . implode(", $base_table.", $columns);
+      $results = chado_query($query, $sql_args);
+
       $values = array();
       $values = array();
       foreach ($results as $result) {
       foreach ($results as $result) {
-        $values[$result->id] = $result->value;
+        $values[$result->$pkey] = $result->value;
       }
       }
       if (count($values) > 0) {
       if (count($values) > 0) {
         $form['sync']['ids'] = array(
         $form['sync']['ids'] = array(
@@ -520,7 +585,7 @@ function chado_node_sync_form($form, &$form_state) {
           '#options'       => $values,
           '#options'       => $values,
           '#default_value' => (isset($form_state['values']['ids'])) ? $form_state['values']['ids'] : array(),
           '#default_value' => (isset($form_state['values']['ids'])) ? $form_state['values']['ids'] : array(),
           '#suffix'        => '</div><br>',
           '#suffix'        => '</div><br>',
-          '#prefix'       => t("The following  %title_plural have not been synced. Check those to be synced or leave all unchecked to sync them all.",
+          '#prefix'        => t("The following  %title_plural have not been synced. Check those to be synced or leave all unchecked to sync them all.",
               array(
               array(
                 '%title_singular' => strtolower($args['record_type_title']['singular']),
                 '%title_singular' => strtolower($args['record_type_title']['singular']),
                 '%title_plural'   => strtolower($args['record_type_title']['plural'])
                 '%title_plural'   => strtolower($args['record_type_title']['plural'])
@@ -619,6 +684,7 @@ function chado_node_sync_form_submit($form, $form_state) {
     $module = $form_state['chado_node_api']['hook_prefix'];
     $module = $form_state['chado_node_api']['hook_prefix'];
     $base_table = $form_state['chado_node_api']['base_table'];
     $base_table = $form_state['chado_node_api']['base_table'];
     $linking_table = $form_state['values']['linking_table'];
     $linking_table = $form_state['values']['linking_table'];
+    $node_type = $form_state['values']['node_type'];
 
 
     // Allow each module to hijack the submit if needed
     // Allow each module to hijack the submit if needed
     $hook_form_hijack_submit = $args['hook_prefix'] . '_chado_node_sync_form_submit';
     $hook_form_hijack_submit = $args['hook_prefix'] . '_chado_node_sync_form_submit';
@@ -665,7 +731,8 @@ function chado_node_sync_form_submit($form, $form_state) {
       'organism_id' => $organism_id,
       'organism_id' => $organism_id,
       'types' => $types,
       'types' => $types,
       'ids' => $ids,
       'ids' => $ids,
-      'inking_table' => $linking_table
+      'linking_table' => $linking_table,
+      'node_type' => $node_type
     );
     );
 
 
     $title = "Sync " . $args['record_type_title']['plural'];
     $title = "Sync " . $args['record_type_title']['plural'];
@@ -674,7 +741,9 @@ function chado_node_sync_form_submit($form, $form_state) {
   if (preg_match('/^Clean up orphaned/', $form_state['values']['op'])) {
   if (preg_match('/^Clean up orphaned/', $form_state['values']['op'])) {
     $module = $form_state['chado_node_api']['hook_prefix'];
     $module = $form_state['chado_node_api']['hook_prefix'];
     $base_table = $form_state['chado_node_api']['base_table'];
     $base_table = $form_state['chado_node_api']['base_table'];
-    $job_args = array($base_table, $form_state['values']['cleanup_batch_size']);
+    $linking_table = $form_state['values']['linking_table'];
+    $node_type = $form_state['values']['node_type'];
+    $job_args = array($base_table, $form_state['values']['cleanup_batch_size'], $linking_table, $node_type);
     variable_set('chado_node_api_cleanup_batch_size', $form_state['values']['cleanup_batch_size']);
     variable_set('chado_node_api_cleanup_batch_size', $form_state['values']['cleanup_batch_size']);
     tripal_add_job($form_state['values']['op'], $module, 'chado_cleanup_orphaned_nodes', $job_args, $user->uid);
     tripal_add_job($form_state['values']['op'], $module, 'chado_cleanup_orphaned_nodes', $job_args, $user->uid);
   }
   }
@@ -709,8 +778,9 @@ function chado_node_sync_form_submit($form, $form_state) {
  *
  *
  * @ingroup tripal_chado_node_api
  * @ingroup tripal_chado_node_api
  */
  */
-function chado_node_sync_records($base_table, $max_sync = FALSE, $organism_id = FALSE,
-    $types = array(), $ids = array(), $linking_table = FALSE, $job_id = NULL) {
+function chado_node_sync_records($base_table, $max_sync = FALSE,
+    $organism_id = FALSE, $types = array(), $ids = array(),
+    $linking_table = FALSE, $node_type = FALSE, $job_id = NULL) {
 
 
   global $user;
   global $user;
   $base_table_id = $base_table . '_id';
   $base_table_id = $base_table . '_id';
@@ -762,7 +832,7 @@ function chado_node_sync_records($base_table, $max_sync = FALSE, $organism_id =
   }
   }
 
 
   // Allow module to add to query
   // Allow module to add to query
-  $hook_query_alter = $linking_table . '_chado_node_sync_select_query';
+  $hook_query_alter = $node_type . '_chado_node_sync_select_query';
   if (function_exists($hook_query_alter)) {
   if (function_exists($hook_query_alter)) {
     $update = call_user_func($hook_query_alter, array(
     $update = call_user_func($hook_query_alter, array(
       'select' => $select,
       'select' => $select,
@@ -862,7 +932,7 @@ function chado_node_sync_records($base_table, $max_sync = FALSE, $organism_id =
 
 
         // Create generic new node
         // Create generic new node
         $new_node = new stdClass();
         $new_node = new stdClass();
-        $new_node->type = $linking_table;
+        $new_node->type = $node_type;
         $new_node->uid = $user->uid;
         $new_node->uid = $user->uid;
         $new_node->{$base_table_id} = $record->{$base_table_id};
         $new_node->{$base_table_id} = $record->{$base_table_id};
         $new_node->$base_table = $record;
         $new_node->$base_table = $record;
@@ -870,7 +940,7 @@ function chado_node_sync_records($base_table, $max_sync = FALSE, $organism_id =
 
 
         // TODO: should we get rid of this hook and use hook_node_presave() instead?
         // TODO: should we get rid of this hook and use hook_node_presave() instead?
         // allow base module to set additional fields as needed
         // allow base module to set additional fields as needed
-        $hook_create_new_node = $linking_table . '_chado_node_sync_create_new_node';
+        $hook_create_new_node = $node_type . '_chado_node_sync_create_new_node';
         if (function_exists($hook_create_new_node)) {
         if (function_exists($hook_create_new_node)) {
           $new_node = call_user_func($hook_create_new_node, $new_node, $record);
           $new_node = call_user_func($hook_create_new_node, $new_node, $record);
         }
         }
@@ -917,28 +987,28 @@ function chado_node_sync_records($base_table, $max_sync = FALSE, $organism_id =
  *
  *
  * @ingroup tripal_chado_node_api
  * @ingroup tripal_chado_node_api
  */
  */
-function chado_cleanup_orphaned_nodes($table, $nentries = 25000, $job_id = NULL) {
+function chado_cleanup_orphaned_nodes($table, $nentries = 25000, $linking_table, $node_type, $job_id = NULL) {
   $count = 0;
   $count = 0;
 
 
   // Find the total number of entries in the table.
   // Find the total number of entries in the table.
-  $dsql = "SELECT COUNT(*) FROM {node} WHERE type = 'chado_" . $table . "'";
-  $clsql= "SELECT COUNT(*) FROM {chado_" . $table . "}";
+  $dsql = "SELECT COUNT(*) FROM {node} WHERE type = :node_type";
+  $clsql= "SELECT COUNT(*) FROM {$linking_table}";
 
 
   // Find the number nodes of type chado_$table and find the number of entries
   // Find the number nodes of type chado_$table and find the number of entries
   // in chado_$table; keep the larger of the two numbers.
   // in chado_$table; keep the larger of the two numbers.
-  $ndat = db_query($dsql);
+  $ndat = db_query($dsql, array(':node_type' => $node_type));
   $temp = $ndat->fetchObject();
   $temp = $ndat->fetchObject();
-  $count =  $temp->count;
+  $count = $temp->count;
   $cdat = db_query($clsql);
   $cdat = db_query($clsql);
   $temp = $cdat->fetchObject();
   $temp = $cdat->fetchObject();
-  if(count < $temp->count) {
-    $count =  $temp->count;
+  if (count < $temp->count) {
+    $count = $temp->count;
   }
   }
 
 
   $m = ceil($count / $nentries);
   $m = ceil($count / $nentries);
-  for($i = 0; $i < $m; $i++) {
-    $offset = ($nentries*$i)+1;
-    chado_cleanup_orphaned_nodes_part($table, $job_id, $nentries, $offset);
+  for ($i = 0; $i < $m; $i++) {
+    $offset = ($nentries * $i);
+    chado_cleanup_orphaned_nodes_part($table, $job_id, $nentries, $offset, $linking_table, $node_type);
   }
   }
 
 
   return '';
   return '';
@@ -957,161 +1027,129 @@ function chado_cleanup_orphaned_nodes($table, $nentries = 25000, $job_id = NULL)
  *
  *
  * @ingroup tripal_chado_node_api
  * @ingroup tripal_chado_node_api
  */
  */
-function chado_cleanup_orphaned_nodes_part($table, $job_id = NULL, $nentries, $offset) {
+function chado_cleanup_orphaned_nodes_part($table, $job_id = NULL, $nentries, $offset, $linking_table, $node_type) {
   $count = 0;
   $count = 0;
 
 
-  // Change this variable to TRUE to print debugging values for memory leaks.
-  $debug_memory_leak = FALSE;
-
-  // build the SQL statments needed to check if nodes point to valid analyses
-  $dsql = "SELECT * FROM {node} WHERE type = 'chado_" . $table . "' ORDER BY nid LIMIT $nentries OFFSET $offset";
-  $nsql = "SELECT * FROM {node} WHERE nid = :nid";
-  $csql = "SELECT * FROM {chado_" . $table . "} WHERE nid = :nid ";
-  $clsql= "SELECT * FROM {chado_" . $table . "} ORDER BY nid LIMIT $nentries OFFSET $offset";
-  $lsql = "SELECT * FROM {" . $table . "} where " . $table . "_id = :" . $table . "_id ";
-
-  // load the chado_$table into an array
-  print "Getting chado_$table\n";
+  // Retrieve all of the entries in the linker table for a given node type
+  // and place into an array.
+  print "Verifying $linking_table records...\n";
   $cnodes = array();
   $cnodes = array();
-  $res = db_query($clsql);
+  $clsql= "
+    SELECT *
+    FROM {" . $linking_table . "} LT
+      INNER JOIN {node} N ON N.nid = LT.nid
+    WHERE N.type = :node_type
+    ORDER BY LT.nid LIMIT $nentries OFFSET $offset";
+  $res = db_query($clsql, array(':node_type' => $node_type));
   foreach ($res as $node) {
   foreach ($res as $node) {
     $cnodes[$count] = $node;
     $cnodes[$count] = $node;
     $count++;
     $count++;
   }
   }
 
 
-  // Free $res.
-  $mu = ($debug_memory_leak) ? memory_get_usage() : 0;
-  $res = NULL;
-  if ($debug_memory_leak) {
-    print "\tFreeing res: " . ($mu - memory_get_usage()) ." bytes\n";
-  }
-
-  $interval = intval($count * 0.01);
-  if ($interval < 1) {
-    $interval = 1;
-  }
-
-  // iterate through all of the chado_$table entries and remove those
-  // that don't have a node or don't have a $table record in chado.libary
-  print "Verifying all chado_$table Entries\n";
+  // Iterate through all of the $linking_table entries and remove those
+  // that don't have a node or don't have a $table record.
   $deleted = 0;
   $deleted = 0;
-  foreach ($cnodes as $nid) {
-
-    // update the job status every 1% analyses
-    if ($job_id and $i % $interval == 0) {
-      tripal_set_job_progress($job_id, intval(($i / $count) * 100));
+  if ($count > 0) {
+    $i = 0;
+    $interval = intval($count * 0.01);
+    if ($interval < 1) {
+      $interval = 1;
     }
     }
+    foreach ($cnodes as $nid) {
+      // Update the job status every 1% analyses
+      if ($job_id and $i % $interval == 0) {
+        $percent = sprintf("%.2f", ($i / $count) * 100);
+        tripal_set_job_progress($job_id, intval($percent));
+        print "Percent complete: $percent%. Memory: " . number_format(memory_get_usage()) . " bytes.\r";
+      }
 
 
-    // see if the node exits, if not remove the entry from the chado_$table table
-    $results = db_query($nsql, array(':nid' => $nid->nid));
-    $node = $results->fetchObject();
-    if (!$node) {
-      $deleted++;
-      db_query("DELETE FROM {chado_" . $table . "} WHERE nid = :nid", array(':nid' => $nid->nid));
-      $message = "chado_$table missing node.... DELETING: $nid->nid";
-      watchdog('tripal_core', $message, array(), WATCHDOG_WARNING);
-    }
+      // See if the node exits, if not remove the entry from linking table table.
+      $nsql = "SELECT * FROM {node} WHERE nid = :nid";
+      $results = db_query($nsql, array(':nid' => $nid->nid));
+      $node = $results->fetchObject();
+      if (!$node) {
+        $deleted++;
+        db_query("DELETE FROM {" . $linking_table . "} WHERE nid = :nid", array(':nid' => $nid->nid));
+        //print "$linking_table missing node.... DELETING: $nid->nid\n";
+      }
 
 
-    // see if the record in chado exist, if not remove the entry from the chado_$table
-    $table_id = $table . "_id";
-    $results = chado_query($lsql, array(":" . $table . "_id" => $nid->$table_id));
-    $record = $results->fetchObject();
-    if (!$record) {
-      $deleted++;
-      $sql = "DELETE FROM {chado_" . $table . "} WHERE " . $table . "_id = :" . $table . "_id";
-      db_query($sql, array(":" . $table . "_id" => $nid->$table_id));
-      $message = "chado_$table missing $table.... DELETING entry.";
-      watchdog('tripal_core', $message, array(), WATCHDOG_NOTICE);
+      // Does record in chado exists, if not remove entry from $linking_table.
+      $table_id = $table . "_id";
+      $lsql = "SELECT * FROM {" . $table . "} where " . $table . "_id = :" . $table . "_id";
+      $results = chado_query($lsql, array(":" . $table . "_id" => $nid->$table_id));
+      $record = $results->fetchObject();
+      if (!$record) {
+        $deleted++;
+        $sql = "DELETE FROM {" . $linking_table . "} WHERE " . $table . "_id = :" . $table . "_id";
+        db_query($sql, array(":" . $table . "_id" => $nid->$table_id));
+        //print "$linking_table missing $table.... DELETING entry.\n";
+      }
+      $i++;
     }
     }
-    $i++;
-  }
-
-  // Freeing up various resources: $cnodes, $node, $record, and $results.
-  $mu = ($debug_memory_leak) ? memory_get_usage() : 0;
-  $cnodes = NULL;
-  if ($debug_memory_leak) {
-    print "\tFreeing cnodes: " . ($mu - memory_get_usage()) ." bytes\n";
-  }
-  $mu = ($debug_memory_leak) ? memory_get_usage() : 0;
-  $results = NULL;
-  if ($debug_memory_leak) {
-    print "\tFreeing results: " . ($mu - memory_get_usage()) ." bytes\n";
-  }
-  $mu = ($debug_memory_leak) ? memory_get_usage() : 0;
-  $node = NULL;
-  if ($debug_memory_leak) {
-    print "\tFreeing node: " . ($mu - memory_get_usage()) ." bytes\n";
+    $percent = sprintf("%.2f", ($i / $count) * 100);
+    tripal_set_job_progress($job_id, intval($percent));
+    print "Percent complete: $percent%. Memory: " . number_format(memory_get_usage()) . " bytes.\r";
   }
   }
-  $mu = ($debug_memory_leak) ? memory_get_usage() : 0;
-  $record = NULL;
-  if ($debug_memory_leak) {
-    print "\tFreeing record: " . ($mu - memory_get_usage()) ." bytes\n";
-  }
-
-  print "\t$deleted chado_$table entries missing either a node or chado entry.\n";
-
-  // iterate through all of the nodes and delete those that don't
-  // have a corresponding entry in chado_$table
-  $deleted = 0;
-  // load into nodes array
-  print "Getting nodes\n";
+  print "\nDeleted $deleted record(s) from $linking_table missing either a node or chado entry.\n";
+
+  // Build the SQL statements needed to check if nodes point to valid record.
+  print "Verifying nodes...\n";
+  $dsql = "
+    SELECT *
+    FROM {node}
+    WHERE type = :node_type
+    ORDER BY nid
+    LIMIT $nentries OFFSET $offset
+  ";
+
+  $dsql_args = array(':node_type' => $node_type);
   $nodes = array();
   $nodes = array();
-  $res = db_query($dsql);
+  $res = db_query($dsql, $dsql_args);
+  $count = 0;
   foreach ($res as $node) {
   foreach ($res as $node) {
     $nodes[$count] = $node;
     $nodes[$count] = $node;
     $count++;
     $count++;
   }
   }
 
 
-  // Free $res.
-  $mu = ($debug_memory_leak) ? memory_get_usage() : 0;
-  $res = NULL;
-  if ($debug_memory_leak) {
-    print "\tFreeing res:\n" . ($mu - memory_get_usage()) ." bytes\n";
-  }
-
-  foreach ($nodes as $node) {
-
-    // update the job status every 1% libraries
-    if ($job_id and $i % $interval == 0) {
-      tripal_set_job_progress($job_id, intval(($i / $count) * 100));
+  // Iterate through all of the nodes and delete those that don't
+  // have a corresponding entry in the linking table.
+  $deleted = 0;
+  if ($count > 0) {
+    $i = 0;
+    $interval = intval($count * 0.01);
+    if ($interval < 1) {
+      $interval = 1;
     }
     }
-
-    // check to see if the node has a corresponding entry
-    // in the chado_$table table. If not then delete the node.
-    $results = db_query($csql, array(":nid" => $node->nid));
-    $link = $results->fetchObject();
-    if (!$link) {
-      if (node_access('delete', $node)) {
-        $deleted++;
-        $message = "Node missing in chado_$table table.... DELETING node $node->nid";
-        watchdog("tripal_core", $message, array(), WATCHDOG_NOTICE);
-        node_delete($node->nid);
+    foreach ($nodes as $node) {
+      // update the job status every 1%
+      if ($job_id and $i % $interval == 0) {
+        $percent = sprintf("%.2f", ($i / $count) * 100);
+        tripal_set_job_progress($job_id, intval($percent));
+        print "Percent complete: $percent%. Memory: " . number_format(memory_get_usage()) . " bytes.\r";
       }
       }
-      else {
-        $message = "Node missing in chado_$table table.... but cannot delete due to improper permissions (node $node->nid)";
-        watchdog("tripal_core", $message, array(), WATCHDOG_WARNING);
+
+      // check to see if the node has a corresponding entry
+      // in the $linking_table table. If not then delete the node.
+      $csql = "SELECT * FROM {" . $linking_table . "} WHERE nid = :nid ";
+      $results = db_query($csql, array(':nid' => $node->nid));
+      $link = $results->fetchObject();
+      if (!$link) {
+        if (node_access('delete', $node)) {
+          $deleted++;
+          //print "Node missing in $linking_table table.... DELETING node $node->nid\n";
+          node_delete($node->nid);
+        }
+        else {
+          print "\nNode missing in $linking_table table.... but cannot delete due to improper permissions (node $node->nid)\n";
+        }
       }
       }
+      $i++;
     }
     }
-    $i++;
-  }
-  // // Freeing up various resources: $results, $link and $nodes
-  $mu = ($debug_memory_leak) ? memory_get_usage() : 0;
-  $results = NULL;
-  if ($debug_memory_leak) {
-    print "\tFreeing results: " . ($mu - memory_get_usage()) . " bytes\n";
-  }
-  $mu = ($debug_memory_leak) ? memory_get_usage() : 0;
-  $link = NULL;
-  if ($debug_memory_leak) {
-    print "\tFreeing link: " . ($mu - memory_get_usage()) . " bytes\n";
+    $percent = sprintf("%.2f", ($i / $count) * 100);
+    tripal_set_job_progress($job_id, intval($percent));
+    print "Percent complete: $percent%. Memory: " . number_format(memory_get_usage()) . " bytes.\r";
+    print "\nDeleted $deleted node(s) that did not have corresponding $linking_table entries.\n";
   }
   }
-  $mu = ($debug_memory_leak) ? memory_get_usage() : 0;
-  $nodes = NULL;
-  if ($debug_memory_leak) {
-    print "\tFreeing nodes: " . ($mu - memory_get_usage()) ." bytes\n";
-  }
-
-  print "\t$deleted nodes did not have corresponding chado_$table entries.\n";
 
 
   return '';
   return '';
 }
 }
@@ -1119,8 +1157,8 @@ function chado_cleanup_orphaned_nodes_part($table, $job_id = NULL, $nentries, $o
 /**
 /**
  * Create New Node
  * Create New Node
  *
  *
- * Note: For your own module, replace hook in the function name with the machine-name of
- * your chado node type (ie: chado_feature).
+ * Note: For your own module, replace hook in the function name with the
+ * machine-name of your chado node type (ie: chado_feature).
  *
  *
  * @param $new_node:
  * @param $new_node:
  *   a basic new node object
  *   a basic new node object
@@ -1128,51 +1166,53 @@ function chado_cleanup_orphaned_nodes_part($table, $job_id = NULL, $nentries, $o
  *   the record object from chado specifying the biological data for this node
  *   the record object from chado specifying the biological data for this node
  *
  *
  * @return
  * @return
- *   A node object containing all the fields necessary to create a new node during sync
+ *   A node object containing all the fields necessary to create a new node
+ *   during sync
  *
  *
  * @ingroup tripal_chado_node_api
  * @ingroup tripal_chado_node_api
  */
  */
 function hook_chado_node_sync_create_new_node($new_node, $record) {
 function hook_chado_node_sync_create_new_node($new_node, $record) {
 
 
-  // Add relevant chado details to the new node object
-  // This really only needs to be the fields from the node used during node creation
-  // including values used to generate the title, etc.
-  // All additional chado data will be added via nodetype_load when the node is later used
+  // Add relevant chado details to the new node object. This really only
+  // needs to be the fields from the node used during node creation
+  // including values used to generate the title, etc. All additional chado
+  // data will be added via nodetype_load when the node is later used
   $new_node->uniquename = $record->uniquename;
   $new_node->uniquename = $record->uniquename;
 
 
   return $new_node;
   return $new_node;
 }
 }
 
 
 /**
 /**
- * Alter the sync form (optional)
+ * Alter the Chado node sync form.
  *
  *
- * This might be necessary if you need additional filtering options for choosing which
- * chado records to sync or even if you just want to further customize the help text
- * provided by the form.
+ * This might be necessary if you need additional filtering options for
+ * choosing which chado records to sync or even if you just want to further
+ * customize the help text provided by the form.
  *
  *
- * Note: For your own module, replace hook in the function name with the machine-name of
- * your chado node type (ie: chado_feature).
+ * Note: For your own module, replace hook in the function name with the
+ * machine-name of your chado node type (ie: chado_feature).
  *
  *
  * @ingroup tripal_chado_node_api
  * @ingroup tripal_chado_node_api
  */
  */
 function hook_chado_node_sync_form($form, &$form_state) {
 function hook_chado_node_sync_form($form, &$form_state) {
 
 
-  // Change or add to the form array as needed
-  // Any changes should be made in accordance with the Drupal Form API
+  // Change or add to the form array as needed.
+  // Any changes should be made in accordance with the Drupal Form API.
 
 
   return $form;
   return $form;
 }
 }
 
 
 /**
 /**
- * Bypass chado node api sync form submit (optional). Allows you to use this function
- * as your own submit.
+ * Bypass chado node api sync form submit.
+ *
+ * Allows you to use this function as your own submit.
  *
  *
- * This might be necessary if you want to add additional arguements to the tripal job or
- * to call your own sync'ing function if the generic chado_node_sync_records() is not
- * sufficient.
+ * This might be necessary if you want to add additional arguements to the
+ * tripal job or to call your own sync'ing function if the generic
+ * chado_node_sync_records() is not sufficient.
  *
  *
- * Note: For your own module, replace hook in the function name with the machine-name of
- * your chado node type (ie: chado_feature).
+ * Note: For your own module, replace hook in the function name with the
+ * machine-name of your chado node type (ie: chado_feature).
  *
  *
  * @ingroup tripal_chado_node_api
  * @ingroup tripal_chado_node_api
  */
  */
@@ -1181,19 +1221,28 @@ function hook_chado_node_sync_form_submit ($form, $form_state) {
   global $user;
   global $user;
 
 
   $job_args = array(
   $job_args = array(
-    $base_table,      // the base chado table (ie: feature)
-    $max_sync,        // the maximum number of records to sync or FALSE for sync all that match
-    $organism_id,     // the organism_id to restrict records to or FALSE if not to restrict by organism_id
-    $types            // A string with the cvterm.name of the types to restrict to separated by |||
+    // The base chado table (ie: feature).
+    $base_table,
+    // The maximum number of records to sync or FALSE for sync all that match.
+    $max_sync,
+    // The organism_id to restrict records to or FALSE if not to restrict by organism_id.
+    $organism_id,
+    // A string with the cvterm.name of the types to restrict to separated by |||
+    $types
   );
   );
 
 
   // You should register a tripal job
   // You should register a tripal job
   tripal_add_job(
   tripal_add_job(
-    $title,                                   // the title of the job -be descriptive
-    $module,                                  // the name of your module
-    'chado_node_sync_records',    // the chado node api sync function
-    $job_args,                                // an array with the arguments to pass to the above function
-    $user->uid                                // the user who submitted the job
+    // The title of the job -be descriptive.
+    $title,
+    // The name of your module.
+    $module,
+    // The chado node api sync function.
+    'chado_node_sync_records',
+    // An array with the arguments to pass to the above function.
+    $job_args,
+    // The user who submitted the job.
+    $user->uid
   );
   );
 
 
 }
 }
@@ -1202,13 +1251,14 @@ function hook_chado_node_sync_form_submit ($form, $form_state) {
 /**
 /**
  * Alter the query that retrieves records to be sync'd (optional)
  * Alter the query that retrieves records to be sync'd (optional)
  *
  *
- * This might be necessary if you need fields from other chado tables to create your node
- * or if your chado node type only supports a subset of a given table (ie: a germplasm node
- * type might only support node creation for cerain types of stock records in which case
- * you would need to filter the results to only those types).
+ * This might be necessary if you need fields from other chado tables to
+ * create your node or if your chado node type only supports a subset of a
+ * given table (ie: a germplasm node type might only support node creation for
+ * cerain types of stock records in which case you would need to filter the
+ * results to only those types).
  *
  *
- * Note: For your own module, replace hook in the function name with the machine-name of
- * your chado node type (ie: chado_feature).
+ * Note: For your own module, replace hook in the function name with the
+ * machine-name of your chado node type (ie: chado_feature).
  *
  *
  * @param $query
  * @param $query
  *   An array containing the following:
  *   An array containing the following:

+ 5 - 10
tripal_core/api/tripal_core.chado_nodes.title_and_path.inc

@@ -546,14 +546,9 @@ function chado_get_node_url($node) {
           $url = str_replace($token, $value, $url);
           $url = str_replace($token, $value, $url);
         }
         }
         else {
         else {
-          tripal_report_error(
-            'chado_node_api',
-            TRIPAL_ERROR,
-            'Unable to replace %token. The value in the node should be a string but is instead: %value',
-            array(
-              '%token' => $token,
-              '%value' => print_r($value, TRUE)
-            )
+          tripal_report_error('chado_node_api',TRIPAL_ERROR,
+            'Unable to replace %token. The value in the node should be a string but is instead: \'%value\'',
+            array('%token' => $token, '%value' => print_r($value, TRUE))
           );
           );
         }
         }
       }
       }
@@ -1332,7 +1327,7 @@ function chado_get_token_value($token_info, $node) {
       else {
       else {
         tripal_report_error('chado_node_api', TRIPAL_WARNING,
         tripal_report_error('chado_node_api', TRIPAL_WARNING,
           'Tokens: Unable to determine the value of %token. Things went awry when trying ' .
           'Tokens: Unable to determine the value of %token. Things went awry when trying ' .
-          'to access %index for the following: %var.',
+          'to access \'%index\' for the following: \'%var\'.',
           array('%token' => $token, '%index' => $index, '%var' => print_r($var,TRUE))
           array('%token' => $token, '%index' => $index, '%var' => print_r($var,TRUE))
         );
         );
         return '';
         return '';
@@ -1346,7 +1341,7 @@ function chado_get_token_value($token_info, $node) {
     else {
     else {
       tripal_report_error('chado_node_api', TRIPAL_WARNING,
       tripal_report_error('chado_node_api', TRIPAL_WARNING,
         'Tokens: Unable to determine the value of %token. Things went awry when trying ' .
         'Tokens: Unable to determine the value of %token. Things went awry when trying ' .
-        'to access %index for the following: %var.',
+        'to access \'%index\' for the following: \'%var\'.',
         array('%token' => $token, '%index' => $index, '%var' => print_r($var,TRUE))
         array('%token' => $token, '%index' => $index, '%var' => print_r($var,TRUE))
       );
       );
       return '';
       return '';

+ 11 - 8
tripal_feature/includes/tripal_feature.gff_loader.inc

@@ -1641,8 +1641,6 @@ function tripal_feature_load_gff3_feature($organism, $analysis_id, $cvterm, $uni
       'organism_id' => $organism->organism_id,
       'organism_id' => $organism->organism_id,
       'name' => $name,
       'name' => $name,
       'uniquename' => $uniquename,
       'uniquename' => $uniquename,
-//      'residues' => $residues,
-//      'seqlen' => drupal_strlen($residues),
       'md5checksum' => md5($residues),
       'md5checksum' => md5($residues),
       'type_id' => $cvterm->cvterm_id,
       'type_id' => $cvterm->cvterm_id,
       'is_analysis' => $is_analysis,
       'is_analysis' => $is_analysis,
@@ -1657,8 +1655,6 @@ function tripal_feature_load_gff3_feature($organism, $analysis_id, $cvterm, $uni
   elseif (!$add_only) {
   elseif (!$add_only) {
     $values = array(
     $values = array(
       'name' => $name,
       'name' => $name,
-//      'residues' => $residues,
-//      'seqlen' => drupal_strlen($residues),
       'md5checksum' => md5($residues),
       'md5checksum' => md5($residues),
       'is_analysis' => $is_analysis,
       'is_analysis' => $is_analysis,
       'is_obsolete' => $is_obsolete,
       'is_obsolete' => $is_obsolete,
@@ -2040,14 +2036,18 @@ function tripal_feature_load_gff3_fasta($fh, $interval, &$num_read, &$intv_read,
         else {
         else {
           // if we have a feature then add the residues
           // if we have a feature then add the residues
           $feature = $result[0];
           $feature = $result[0];
-          $values = array('residues' => $residues);
+          $values = array(
+            'residues' => $residues,
+            'seqlen' => strlen($residues)
+          );
           $match = array('feature_id' => $feature->feature_id);
           $match = array('feature_id' => $feature->feature_id);
           chado_update_record('feature', $match, $values);
           chado_update_record('feature', $match, $values);
         }
         }
       }
       }
 
 
-      // get the feature ID for this ID from the tripal_gff_temp table
-      $id = preg_replace('/^>(.*)$/', '\1', $line);
+      // get the feature ID for this ID from the tripal_gff_temp table. It
+      // should be the name up to the first space
+      $id = preg_replace('/^>([^\s]+).*$/', '\1', $line);
       $residues = '';
       $residues = '';
     }
     }
     else {
     else {
@@ -2065,7 +2065,10 @@ function tripal_feature_load_gff3_fasta($fh, $interval, &$num_read, &$intv_read,
   else {
   else {
     // if we have a feature then add the residues
     // if we have a feature then add the residues
     $feature = $result[0];
     $feature = $result[0];
-    $values = array('residues' => $residues);
+    $values = array(
+      'residues' => $residues,
+      'seqlen' => strlen($residues)
+    );
     $match = array('feature_id' => $feature->feature_id);
     $match = array('feature_id' => $feature->feature_id);
     chado_update_record('feature', $match, $values);
     chado_update_record('feature', $match, $values);
   }
   }

+ 5 - 17
tripal_pub/includes/tripal_pub.pub_citation.inc

@@ -215,7 +215,7 @@ function tripal_pub_create_citation($pub) {
     if (array_key_exists('Authors', $pub)) {
     if (array_key_exists('Authors', $pub)) {
       $citation = $pub['Authors'] . '. ';
       $citation = $pub['Authors'] . '. ';
     }
     }
-    
+
     $citation .= $pub['Title'] .  '. ';
     $citation .= $pub['Title'] .  '. ';
 
 
     if (array_key_exists('Journal Name', $pub)) {
     if (array_key_exists('Journal Name', $pub)) {
@@ -260,7 +260,7 @@ function tripal_pub_create_citation($pub) {
     if (array_key_exists('Authors', $pub)) {
     if (array_key_exists('Authors', $pub)) {
       $citation = $pub['Authors'] . '. ';
       $citation = $pub['Authors'] . '. ';
     }
     }
-    
+
     $citation .= $pub['Title'] .  '. ';
     $citation .= $pub['Title'] .  '. ';
 
 
     if (array_key_exists('Journal Name', $pub)) {
     if (array_key_exists('Journal Name', $pub)) {
@@ -305,7 +305,7 @@ function tripal_pub_create_citation($pub) {
     if (array_key_exists('Authors', $pub)) {
     if (array_key_exists('Authors', $pub)) {
       $citation = $pub['Authors'] . '. ';
       $citation = $pub['Authors'] . '. ';
     }
     }
-    
+
     $citation .= $pub['Title'] .  '. ';
     $citation .= $pub['Title'] .  '. ';
 
 
     if (array_key_exists('Journal Name', $pub)) {
     if (array_key_exists('Journal Name', $pub)) {
@@ -326,7 +326,7 @@ function tripal_pub_create_citation($pub) {
     if (array_key_exists('Authors', $pub)) {
     if (array_key_exists('Authors', $pub)) {
       $citation = $pub['Authors'] . '. ';
       $citation = $pub['Authors'] . '. ';
     }
     }
-    
+
     $citation .= $pub['Title'] .  '. ';
     $citation .= $pub['Title'] .  '. ';
     if (array_key_exists('Journal Name', $pub)) {
     if (array_key_exists('Journal Name', $pub)) {
       $citation .= $pub['Journal Name'] . '. ';
       $citation .= $pub['Journal Name'] . '. ';
@@ -362,18 +362,6 @@ function tripal_pub_create_citation($pub) {
       $citation .= $pub['Pages'];
       $citation .= $pub['Pages'];
     }
     }
     $citation .= '.';
     $citation .= '.';
-  }
-  //----------------------
-  // Book
-  //----------------------
-  elseif ($pub_type == 'Book') {
-
-  }
-  //----------------------
-  // Book Chapter
-  //----------------------
-  elseif ($pub_type == 'Book Chapter') {
-
   }
   }
   //----------------------
   //----------------------
   // Conference Proceedings
   // Conference Proceedings
@@ -382,7 +370,7 @@ function tripal_pub_create_citation($pub) {
     if (array_key_exists('Authors', $pub)) {
     if (array_key_exists('Authors', $pub)) {
       $citation = $pub['Authors'] . '. ';
       $citation = $pub['Authors'] . '. ';
     }
     }
-    
+
     $citation .= $pub['Title'] .  '. ';
     $citation .= $pub['Title'] .  '. ';
     if (array_key_exists('Conference Name', $pub)) {
     if (array_key_exists('Conference Name', $pub)) {
       $citation .= $pub['Conference Name'] . '. ';
       $citation .= $pub['Conference Name'] . '. ';