Эх сурвалжийг харах

Fixed memory leaks in publishing content and memory overload in sequence display

Stephen Ficklin 8 жил өмнө
parent
commit
ef93343397

+ 7 - 12
tripal/api/tripal.entities.api.inc

@@ -783,7 +783,6 @@ function tripal_replace_entity_tokens($string, &$entity, $bundle_entity = NULL)
       foreach ($fields as $field) {
         $field_ids[$field['id']] = array($entity->id);
       }
-
       $entities = array($entity->id => $entity);
       module_invoke($storage['module'], 'field_storage_load', 'TripalEntity',
         $entities, FIELD_LOAD_CURRENT, $field_ids, array());
@@ -794,17 +793,13 @@ function tripal_replace_entity_tokens($string, &$entity, $bundle_entity = NULL)
   foreach($used_tokens as $token) {
     $field_name = str_replace(array('.','[',']'), array('__','',''), $token);
     $value = '';
-    if (isset($entity->{$field_name})) {
-
-      // Render the value from the field.
-      // First get the items to be rendered.
-      $field_value = field_get_items('TripalEntity', $entity, $field_name);
-      if (isset($field_value[0])) {
-        // Then get a render array for just the value of the first item (no markup).
-        $field_render_arr = field_view_value('TripalEntity', $entity, $field_name, $field_value[0]);
-        // Finally render the value from the render array.
-        $value = render($field_render_arr);
-      }
+
+    if (property_exists($entity, $field_name)) {
+      // Note: there is a memory leak in field_get_items() so we can't use it
+      // here or bulk publising will slowly erode memory.
+      //$field_value = field_get_items('TripalEntity', $entity, $field_name);
+      $value = $entity->{$field_name}['und'][0]['value'];
+      // TODO: deal with the value when it is not a scalar.
     }
     // The TripalBundle__bundle_id is a special token for substituting the
     // bundle id.

+ 1 - 1
tripal/includes/TripalEntityUIController.inc

@@ -212,8 +212,8 @@ function tripal_view_entity($entity, $view_mode = 'full') {
      //$query->fieldCondition('content_type', 'content_type', 'organism');
      //$query->fieldOrderBy('content_type', 'content_type', 'DESC');
      //$query->fieldCondition('organism__genus', 'organism__genus', 'Oryza');
-
    }
+   $query->range(0, 25);
    $results = $query->execute();
 
    $headers = array('Title', 'Vocabulary', 'Term', 'Author', 'Status', 'Updated', 'Operations');

+ 23 - 3
tripal_chado/api/tripal_chado.api.inc

@@ -104,14 +104,34 @@ function tripal_chado_publish_records($values, $job_id = NULL) {
     $where .= "AND $column = $cvterm_id";
   }
 
+  // First get the count
+  $sql = "SELECT count(*) as num_records " . $from . $where;
+    $result = chado_query($sql);
+  $count = $result->fetchField();
+
+  // calculate the interval for updates
+  $interval = intval($count / 1000);
+  if ($interval < 1) {
+    $interval = 1;
+  }
+
   // Perform the query.
   $sql = $select . $from . $where;
   $records = chado_query($sql);
-  $num_published = 0;
   $transaction  = db_transaction();
+
+  printf("%d of %d records. (%0.2f%%) Memory: %s bytes\r", $i, $count, 0, number_format(memory_get_usage()));
   try {
+    $i = 0;
     while($record = $records->fetchObject()) {
 
+      // update the job status every interval
+      //if ($jobid and $i % $interval == 0) {
+        $complete = ($i / $count) * 33.33333333;
+        //tripal_set_job_progress($jobid, intval($complete + 33.33333333));
+        printf("%d of %d records. (%0.2f%%) Memory: %s bytes\r", $i, $count, $complete * 3, number_format(memory_get_usage()));
+      //}
+
       // First save the tripal_entity record.
       $record_id = $record->record_id;
       $ec = entity_get_controller('TripalEntity');
@@ -142,7 +162,7 @@ function tripal_chado_publish_records($values, $job_id = NULL) {
       }
       $success = drupal_write_record('chado_entity', $entity_record);
 
-      $num_published++;
+      $i++;
     }
   }
   catch (Exception $e) {
@@ -152,7 +172,7 @@ function tripal_chado_publish_records($values, $job_id = NULL) {
     drupal_set_message('Failed publishing record. See recent logs for more details.', 'error');
     return FALSE;
   }
-  drupal_set_message("Succesfully published $num_published " . $bundle->label . " record(s).");
+  drupal_set_message("Succesfully published $i " . $bundle->label . " record(s).");
   return TRUE;
 }
 

+ 40 - 50
tripal_chado/includes/fields/chado_feature__residues.inc

@@ -260,34 +260,29 @@ class chado_feature__residues extends TripalField {
     }
     $num_seqs++;
 
-    // Add in the protein sequences
-    $values = array(
-      'object_id' => $feature->feature_id,
-      'subject_id' => array(
-        'type_id' => array(
-          'name' => 'polypeptide'
-        ),
-      ),
-      'type_id' => array(
-        'name' => 'derives_from',
-      ),
-    );
-    $options = array(
-      'return_array' => 1,
-      'include_fk' => array(
-        'subject_id'  => 1
-      ),
-    );
-    $protein_rels = chado_generate_var('feature_relationship', $values, $options);
-    foreach ($protein_rels as $protein_rel) {
-      $protein_rel = chado_expand_var($protein_rel, 'field', 'feature.residues');
-      if ($protein_rel->subject_id->residues) {
+    // Add in the protein sequences. It's faster to provide the SQL rather than
+    // to use chado_generate_var based on the type.
+    $sql = "
+      SELECT F.*
+      FROM {feature_relationship} FR
+        INNER JOIN {feature} F on FR.subject_id = F.feature_id
+        INNER JOIN {cvterm} CVT on CVT.cvterm_id = F.type_id
+        INNER JOIN {cvterm} RCVT on RCVT.cvterm_id = FR.type_id
+      WHERE
+        FR.object_id = :feature_id and
+        CVT.name = 'polypeptide' and
+        RCVT.name = 'derives_from'
+      ORDER BY FR.rank ASC
+    ";
+    $results = chado_query($sql, array(':feature_id' => $feature->feature_id));
+    while ($protein = $results->fetchObject()) {
+      if ($protein->residues) {
         $entity->{$field_name}['und'][$num_seqs++]['value'] = array(
           '@type' => 'SO:0000104',
           'type' => 'polypeptide',
           'label' => 'Protein Sequence',
-          'defline' => tripal_get_fasta_defline($protein_rel->subject_id, '', NULL, '', strlen($protein_rel->subject_id->residues)),
-          'residues' => $protein_rel->subject_id->residues,
+          'defline' => tripal_get_fasta_defline($protein, '', NULL, '', strlen($protein->residues)),
+          'residues' => $protein->residues,
         );
       }
     }
@@ -306,34 +301,28 @@ class chado_feature__residues extends TripalField {
       ),
     );
     $feature = chado_expand_var($feature, 'table', 'featureloc', $options);
-
     $featureloc_sequences = self::get_featureloc_sequences($feature->feature_id, $feature->featureloc->feature_id);
 
-    // Add in the coding sequences.
-    $values = array(
-      'object_id' => $feature->feature_id,
-      'subject_id' => array(
-        'type_id' => array(
-          'name' => 'CDS'
-        ),
-      ),
-      'type_id' => array(
-        'name' => 'part_of',
-      ),
-    );
-    $options = array(
-      'order_by' => array('rank' => 'ASC'),
-      'return_array' => 1,
-      'include_fk' => array(
-        'subject_id'  => 1
-      ),
-    );
-    $cds_rels = chado_generate_var('feature_relationship', $values, $options);
+    // Add in the coding sequences. It's faster to provide the SQL rather than
+    // to use chado_generate_var based on the type.
+    $sql = "
+      SELECT F.*
+      FROM {feature_relationship} FR
+        INNER JOIN {feature} F on FR.subject_id = F.feature_id
+        INNER JOIN {cvterm} CVT on CVT.cvterm_id = F.type_id
+        INNER JOIN {cvterm} RCVT on RCVT.cvterm_id = FR.type_id
+        INNER JOIN {featureloc} FL on FL.feature_id = F.feature_id
+      WHERE
+        FR.object_id = :feature_id and
+        CVT.name = 'CDS' and
+        RCVT.name = 'part_of'
+      ORDER BY FR.rank ASC
+    ";
+    $results = chado_query($sql, array(':feature_id' => $feature->feature_id));
     $coding_seq = '';
-    foreach ($cds_rels as $cds_rel) {
-      $cds_rel = chado_expand_var($cds_rel, 'field', 'feature.residues');
-      if ($cds_rel->subject_id->residues) {
-        $coding_seq .= $cds_rel->subject_id->residues;
+    while ($CDS = $results->fetchObject) {
+      if ($CDS->residues) {
+        $coding_seq .= $CDS->residues;
       }
     }
     if ($coding_seq) {
@@ -341,7 +330,7 @@ class chado_feature__residues extends TripalField {
         '@type' => 'SO:0000316',
         'type' => 'coding_sequence',
         'label' => 'Coding sequence (CDS)',
-        'defline' => tripal_get_fasta_defline($feature, '', $feature->featureloc->feature_id[0], '', strlen($coding_seq)),
+        'defline' => tripal_get_fasta_defline($feature, 'CDS', NULL, '', strlen($coding_seq)),
         'residues' => $coding_seq,
       );
     }
@@ -378,6 +367,7 @@ class chado_feature__residues extends TripalField {
           'is_html' => 0
         )
       );
+
       if (count($cds_sequence) > 0) {
         // the tripal_get_feature_sequences() function can return multiple sequences
         // if a feature is aligned to multiple places. In the case of CDSs we expect

+ 1 - 1
tripal_chado/includes/fields/chado_linker__expression.inc

@@ -210,7 +210,7 @@ class chado_linker__expression extends TripalField {
       'sticky' => FALSE,
       'caption' => "",
       'colgroups' => array(),
-      'empty' => 'There are no curated expression data.',
+      'empty' => 'There is no curated expression data.',
     );
     $content = theme_table($table);
     if (count($items) > 0) {

+ 1 - 1
tripal_chado/includes/loaders/tripal_chado.obo_loader.inc

@@ -101,7 +101,7 @@ function tripal_cv_obo_form($form, &$form_state) {
       $matches = array();
       if (preg_match('/\{(.*?)\}/', $uobo_file, $matches)) {
         $modpath = drupal_get_path('module', $matches[1]);
-        $uobo_file = 'preg_replace('/\{.*?\}/', $modpath, $uobo_file);
+        $uobo_file = preg_replace('/\{.*?\}/', $modpath, $uobo_file);
       }
     }
     // We don't want the previous value to remain. We want the new default to