Ver Fonte

Bug (Issue #2484067): Memory Leaks when cleaning-up orphaned chado content; Improved: patch supplied by nhenry with addition of form UI to allow Tripal admin to set the batch size.

nhenry há 9 anos atrás
pai
commit
cb7c1ea654
1 ficheiros alterados com 156 adições e 29 exclusões
  1. 156 29
      tripal_core/api/tripal_core.chado_nodes.api.inc

+ 156 - 29
tripal_core/api/tripal_core.chado_nodes.api.inc

@@ -133,36 +133,36 @@ function chado_node_get_base_table($content_type, $module = FALSE) {
 
 }
 
-/** 
+/**
  * @section
  * Common Functionality for Properties, Dbxrefs and relationships chado node API
  */
 
 /**
  * Validate the Triggering element from a node form.
- * 
+ *
  * We are going to inspect the post to determine what PHP knows is the triggering
  * element and if it doesn't agree with Drupal then we are actually going to
  * change it in Drupal.
- * 
+ *
  * This fixes an obscure bug triggered when a property is added and then
  * a relationship removed, Drupal thinks the first property remove button was
  * clicked and instead removes a property (not a relationship) and renders the new
  * property table in the relationship table page space.
- * 
+ *
  * NOTE: Many Drupal issues state that this problem is solved if the #name
  * of the button is unique (which it is in our case) but we are still experiencing
  * incorrectly determined triggering elements so we need to handle it ourselves.
  */
 function chado_validate_node_form_triggering_element($form, &$form_state) {
-  
+
   // We are going to inspect the post to determine what PHP knows is the triggering
   // element and if it doesn't agree with Drupal then we are actually going to
   // change it in Drupal.
   if ($_POST['_triggering_element_name'] != $form_state['triggering_element']['#name']) {
     $form_state['triggering_element']['#name'] = $_POST['_triggering_element_name'];
   }
-  
+
 }
 
 /**
@@ -221,7 +221,7 @@ function chado_add_node_form_subtables_add_button_submit($form, &$form_state) {
         break;
     }
   }
-  
+
   // This is needed to ensure the form builder function is called for the node
   // form in order for any of these changes to be seen.
   $form_state['rebuild'] = TRUE;
@@ -230,28 +230,28 @@ function chado_add_node_form_subtables_add_button_submit($form, &$form_state) {
 /**
  * Validate Removing Subtables entries from the node forms.
  * Supported subtables: Properties, Relationships, Additional DBxrefs.
- * 
+ *
  * Since Removing isn't associated with any user input the only thing we
  * need to validate is that Drupal has determined the triggering element correctly.
  * That said, we will call each subtables associated validate function just incase
  * there is some case-specific validation we do not know of or have not anticipated.
- * 
+ *
  * @param array $form
  * @param array $form_state
  */
 function chado_add_node_form_subtables_remove_button_validate($form, &$form_state) {
- 
+
   // We need to validate the trigerring element since Drupal has known
   // issues determining this correctly when there are multiple buttons
   // with the same label.
   chado_validate_node_form_triggering_element($form, $form_state);
-  
+
   // Based on triggering element call the correct validation function
   // ASUMPTION #1: each of the buttons must have property, dbxref or relationship
   // as the first part of the #name to uniquely identify the subsection.
   if (preg_match('/^([a-z]+).*/', $form_state['triggering_element']['#name'], $matches)) {
     $subsection = $matches[1];
-    
+
       switch($subsection) {
       case 'properties':
         chado_add_node_form_properties_remove_button_validate($form, $form_state);
@@ -306,18 +306,18 @@ function chado_add_node_form_subtables_remove_button_submit($form, &$form_state)
  * @ingroup tripal_core
  */
 function chado_add_node_form_subtable_ajax_update($form, &$form_state) {
-  
+
   // We need to validate the trigerring element since Drupal has known
   // issues determining this correctly when there are multiple buttons
   // with the same label.
   chado_validate_node_form_triggering_element($form, $form_state);
-  
+
   // Based on triggering element render the correct part of the form.
   // ASUMPTION: each of the buttons must have property, dbxref or relationship
   // as the first part of the #name to uniquely identify the subsection.
   if (preg_match('/^([a-z]+).*/', $form_state['triggering_element']['#name'], $matches)) {
     $subsection = $matches[1];
-    
+
     switch($subsection) {
       case 'properties':
         return $form['properties']['property_table'];
@@ -561,7 +561,13 @@ function chado_node_sync_form($form, &$form_state) {
         "\"orphaned\".  This can occur if a node in Drupal is " .
         "deleted but the corresponding chado records is not and/or vice " .
         "versa. Click the button below to resolve these discrepancies.</p>"),
-    '#weight' => 1,
+    '#weight' => -10,
+  );
+  $form['cleanup']['cleanup_batch_size'] = array(
+      '#type' => 'textfield',
+      '#title' => t('Batch Size'),
+      '#description' => t('The number of records to analyze together in a batch. If you are having memory issues you might want to decrease this number.'),
+      '#default_value' => variable_get('chado_node_api_cleanup_batch_size', 25000),
   );
   $form['cleanup']['button'] = array(
     '#type' => 'submit',
@@ -578,6 +584,26 @@ function chado_node_sync_form($form, &$form_state) {
   return $form;
 }
 
+/**
+ * Generic Sync Form Validate
+ *
+ * @ingroup tripal_core
+ */
+function chado_node_sync_form_validate($form, &$form_state) {
+
+  if (empty($form_state['values']['cleanup_batch_size'])) {
+    $form_state['values']['cleanup_batch_size'] = 25000;
+    drupal_set_message('You entered a Batch Size of 0 for Cleaning-up orphaned nodes. Since this is not valid, we reset it to the default of 25,000.', 'warning');
+  }
+  elseif (!is_numeric($form_state['values']['cleanup_batch_size'])) {
+    form_set_error('cleanup_batch_size', 'The batch size must be a postitive whole number.');
+  }
+  else {
+    // Round the value just to make sure.
+    $form_state['values']['cleanup_batch_size'] = abs(round($form_state['values']['cleanup_batch_size']));
+  }
+}
+
 /**
  * Generic Sync Form Submit
  *
@@ -648,7 +674,8 @@ function chado_node_sync_form_submit($form, $form_state) {
   if (preg_match('/^Clean up orphaned/', $form_state['values']['op'])) {
     $module = $form_state['chado_node_api']['hook_prefix'];
     $base_table = $form_state['chado_node_api']['base_table'];
-    $job_args = array($base_table);
+    $job_args = array($base_table, $form_state['values']['cleanup_batch_size']);
+    variable_set('chado_node_api_cleanup_batch_size', $form_state['values']['cleanup_batch_size']);
     tripal_add_job($form_state['values']['op'], $module, 'chado_cleanup_orphaned_nodes', $job_args, $user->uid);
   }
 }
@@ -875,6 +902,48 @@ function chado_node_sync_records($base_table, $max_sync = FALSE, $organism_id =
   }
 }
 
+/**
+ * This function is a wrapper for the chado_cleanup_orphaned_nodes function.
+ * It breaks up the work of chado_cleanup_orphaned_nodes into smaller pieces
+ * that are more managable for servers that may  have low php memory settings.
+ *
+ * @param $table
+ *   The name of the table that corresonds to the node type we want to clean up.
+ * @param $nentries
+ *   The number of entries to parse at one time (ie: the batch size).
+ * @param $job_id
+ *   This should be the job id from the Tripal jobs system.  This function
+ *   will update the job status using the provided job ID.
+ *
+ * @ingroup tripal_chado_node_api
+ */
+function chado_cleanup_orphaned_nodes($table, $nentries = 25000, $job_id = NULL) {
+  $count = 0;
+
+  // Find the total number of entries in the table.
+  $dsql = "SELECT COUNT(*) FROM {node} WHERE type = 'chado_" . $table . "'";
+  $clsql= "SELECT COUNT(*) FROM {chado_" . $table . "}";
+
+  // Find the number nodes of type chado_$table and find the number of entries
+  // in chado_$table; keep the larger of the two numbers.
+  $ndat = db_query($dsql);
+  $temp = $ndat->fetchObject();
+  $count =  $temp->count;
+  $cdat = db_query($clsql);
+  $temp = $cdat->fetchObject();
+  if(count < $temp->count) {
+    $count =  $temp->count;
+  }
+
+  $m = ceil($count / $nentries);
+  for($i = 0; $i < $m; $i++) {
+    $offset = ($nentries*$i)+1;
+    chado_cleanup_orphaned_nodes_part($table, $job_id, $nentries, $offset);
+  }
+
+  return '';
+}
+
 /**
  * This function will delete Drupal nodes for any sync'ed table (e.g.
  * feature, organism, analysis, stock, library) if the chado record has been
@@ -888,25 +957,19 @@ function chado_node_sync_records($base_table, $max_sync = FALSE, $organism_id =
  *
  * @ingroup tripal_chado_node_api
  */
-function chado_cleanup_orphaned_nodes($table, $job_id = NULL) {
+function chado_cleanup_orphaned_nodes_part($table, $job_id = NULL, $nentries, $offset) {
   $count = 0;
 
+  // Change this variable to TRUE to print debugging values for memory leaks.
+  $debug_memory_leak = FALSE;
+
   // build the SQL statments needed to check if nodes point to valid analyses
-  $dsql = "SELECT * FROM {node} WHERE type = 'chado_" . $table . "' order by nid";
+  $dsql = "SELECT * FROM {node} WHERE type = 'chado_" . $table . "' ORDER BY nid LIMIT $nentries OFFSET $offset";
   $nsql = "SELECT * FROM {node} WHERE nid = :nid";
   $csql = "SELECT * FROM {chado_" . $table . "} WHERE nid = :nid ";
-  $clsql= "SELECT * FROM {chado_" . $table . "}";
+  $clsql= "SELECT * FROM {chado_" . $table . "} ORDER BY nid LIMIT $nentries OFFSET $offset";
   $lsql = "SELECT * FROM {" . $table . "} where " . $table . "_id = :" . $table . "_id ";
 
-  // load into nodes array
-  print "Getting nodes\n";
-  $nodes = array();
-  $res = db_query($dsql);
-  foreach ($res as $node) {
-    $nodes[$count] = $node;
-    $count++;
-  }
-
   // load the chado_$table into an array
   print "Getting chado_$table\n";
   $cnodes = array();
@@ -915,6 +978,14 @@ function chado_cleanup_orphaned_nodes($table, $job_id = NULL) {
     $cnodes[$count] = $node;
     $count++;
   }
+
+  // Free $res.
+  $mu = ($debug_memory_leak) ? memory_get_usage() : 0;
+  $res = NULL;
+  if ($debug_memory_leak) {
+    print "\tFreeing res: " . ($mu - memory_get_usage()) ." bytes\n";
+  }
+
   $interval = intval($count * 0.01);
   if ($interval < 1) {
     $interval = 1;
@@ -954,11 +1025,50 @@ function chado_cleanup_orphaned_nodes($table, $job_id = NULL) {
     }
     $i++;
   }
+
+  // Freeing up various resources: $cnodes, $node, $record, and $results.
+  $mu = ($debug_memory_leak) ? memory_get_usage() : 0;
+  $cnodes = NULL;
+  if ($debug_memory_leak) {
+    print "\tFreeing cnodes: " . ($mu - memory_get_usage()) ." bytes\n";
+  }
+  $mu = ($debug_memory_leak) ? memory_get_usage() : 0;
+  $results = NULL;
+  if ($debug_memory_leak) {
+    print "\tFreeing results: " . ($mu - memory_get_usage()) ." bytes\n";
+  }
+  $mu = ($debug_memory_leak) ? memory_get_usage() : 0;
+  $node = NULL;
+  if ($debug_memory_leak) {
+    print "\tFreeing node: " . ($mu - memory_get_usage()) ." bytes\n";
+  }
+  $mu = ($debug_memory_leak) ? memory_get_usage() : 0;
+  $record = NULL;
+  if ($debug_memory_leak) {
+    print "\tFreeing record: " . ($mu - memory_get_usage()) ." bytes\n";
+  }
+
   print "\t$deleted chado_$table entries missing either a node or chado entry.\n";
 
   // iterate through all of the nodes and delete those that don't
   // have a corresponding entry in chado_$table
   $deleted = 0;
+  // load into nodes array
+  print "Getting nodes\n";
+  $nodes = array();
+  $res = db_query($dsql);
+  foreach ($res as $node) {
+    $nodes[$count] = $node;
+    $count++;
+  }
+
+  // Free $res.
+  $mu = ($debug_memory_leak) ? memory_get_usage() : 0;
+  $res = NULL;
+  if ($debug_memory_leak) {
+    print "\tFreeing res:\n" . ($mu - memory_get_usage()) ." bytes\n";
+  }
+
   foreach ($nodes as $node) {
 
     // update the job status every 1% libraries
@@ -984,6 +1094,23 @@ function chado_cleanup_orphaned_nodes($table, $job_id = NULL) {
     }
     $i++;
   }
+  // // Freeing up various resources: $results, $link and $nodes
+  $mu = ($debug_memory_leak) ? memory_get_usage() : 0;
+  $results = NULL;
+  if ($debug_memory_leak) {
+    print "\tFreeing results: " . ($mu - memory_get_usage()) . " bytes\n";
+  }
+  $mu = ($debug_memory_leak) ? memory_get_usage() : 0;
+  $link = NULL;
+  if ($debug_memory_leak) {
+    print "\tFreeing link: " . ($mu - memory_get_usage()) . " bytes\n";
+  }
+  $mu = ($debug_memory_leak) ? memory_get_usage() : 0;
+  $nodes = NULL;
+  if ($debug_memory_leak) {
+    print "\tFreeing nodes: " . ($mu - memory_get_usage()) ." bytes\n";
+  }
+
   print "\t$deleted nodes did not have corresponding chado_$table entries.\n";
 
   return '';