Răsfoiți Sursa

Performance thoughts+timing for publishing entities.

Lacey Sanderson 6 ani în urmă
părinte
comite
1425c21b94
1 a modificat fișierele cu 46 adăugiri și 5 ștergeri
  1. 46 5
      tripal_chado/api/tripal_chado.api.inc

+ 46 - 5
tripal_chado/api/tripal_chado.api.inc

@@ -4,7 +4,7 @@
  * @file
  *
  * This file contains miscellaneous API functions specific to working with
- * records in Chado that do not have a home in any other sub category of 
+ * records in Chado that do not have a home in any other sub category of
  * API functions.
  */
 
@@ -12,9 +12,9 @@
  * @defgroup tripal_chado_api Chado
  *
  * @ingroup tripal_api
- * The Tripal Chado API is a set of functions for interacting with data 
+ * The Tripal Chado API is a set of functions for interacting with data
  * inside of a Chado relational database. Entities (or pages) in Drupal
- * that are provided by Tripal can supply data from any supported database 
+ * that are provided by Tripal can supply data from any supported database
  * back-end, and Chado is the default. This API contains a variety of sub
  * categories (or groups) where functions are organized.  Any extension module
  * that desires to work with data in Chado will find these functions useful.
@@ -39,6 +39,9 @@
  */
 function chado_publish_records($values, $job_id = NULL) {
 
+  // @performance remove after development
+  $started_at = microtime(true);
+
   // We want the job object in order to report progress.
   if (is_object($job_id)) {
     $job = $job_id;
@@ -65,6 +68,8 @@ function chado_publish_records($values, $job_id = NULL) {
   $filters = array_key_exists('filters', $values) ? $values['filters'] : array();
   $sync_node = array_key_exists('sync_node', $values) ? $values['sync_node'] : '';
 
+  // @performance remove after development: 0.00059294700622559s
+
   // Load the bundle entity so we can get information about which Chado
   // table/field this entity belongs to.
   $bundle = tripal_load_bundle_entity(array('name' => $bundle_name));
@@ -76,6 +81,7 @@ function chado_publish_records($values, $job_id = NULL) {
   }
   $chado_entity_table = chado_get_bundle_entity_table($bundle);
 
+  // @performance remove after development: 0.05065393447876s
 
   // Get the mapping of the bio data type to the Chado table.
   $chado_bundle = db_select('chado_bundle', 'cb')
@@ -95,11 +101,16 @@ function chado_publish_records($values, $job_id = NULL) {
   $cvterm_id  = $chado_bundle->type_id;
   $type_value = $chado_bundle->type_value;
 
+  // @performance remove after development:0.051163911819458s
+
   // Get the table information for the Chado table.
   $table_schema = chado_get_schema($table);
   $pkey_field = $table_schema['primary key'][0];
 
+  // @performance remove after development:0.05134105682373s
+
   // Construct the SQL for identifying which records should be published.
+  // @performance find a way to optimize this?
   $args = array();
   $select = "SELECT T.$pkey_field as record_id ";
   $from = "
@@ -181,7 +192,11 @@ function chado_publish_records($values, $job_id = NULL) {
       }
     }
   }
+
+  // @performance remove after development:0.060441970825195s
+
   // First get the count
+  // @performance optimize, estimate or remove this. It's only used for reporting progress on the command-line.
   $sql = "SELECT count(*) as num_records " . $from . $where;
   $result = chado_query($sql, $args);
   $count = $result->fetchField();
@@ -192,9 +207,21 @@ function chado_publish_records($values, $job_id = NULL) {
     $interval = 1;
   }
 
+  // @performance remove after development:0.25212502479553s
+  print 'Count amount to do :' . (microtime(true) - $started_at) . "s.\n";
+
   // Perform the query.
   $sql = $select . $from . $where;
   $records = chado_query($sql, $args);
+
+  // @performance remove after development:0.43729090690613s
+  print 'Perform Query :' . (microtime(true) - $started_at) . "s.\n";
+
+  // @performance evaluate this transaction. Long running transactions can have serious
+  // performance issues in PostgreSQL. One option is to move the transaction within the
+  // loop so that each one is not very long but then we end up with more overhead creating
+  // transactions. A better albeit more complicated approach might be to break the job into
+  // chunks where each one is a single transaction.
   $transaction = db_transaction();
 
   print "\nNOTE: publishing records is performed using a database transaction. \n" .
@@ -206,6 +233,9 @@ function chado_publish_records($values, $job_id = NULL) {
   try {
     while($record = $records->fetchObject()) {
 
+      // @performance remove after development
+      print 'Start current entity :' . (microtime(true) - $started_at) . "s.\n";
+
       // update the job status every interval
       if ($i % $interval == 0) {
         $complete = ($i / $count) * 33.33333333;
@@ -215,6 +245,8 @@ function chado_publish_records($values, $job_id = NULL) {
       }
 
       // First save the tripal_entity record.
+      // @performace This is likely a bottleneck. Too bad we can't create
+      // multiple entities at once... sort of like the copy method.
       $record_id = $record->record_id;
       $ec = entity_get_controller('TripalEntity');
       $entity = $ec->create(array(
@@ -223,6 +255,7 @@ function chado_publish_records($values, $job_id = NULL) {
         // Add in the Chado details for when the hook_entity_create()
         // is called and our tripal_chado_entity_create() implementation
         // can deal with it.
+        // @performance maybe there is something we can easily do here?
         'chado_record' => chado_generate_var($table, array($pkey_field => $record_id)),
         'chado_record_id' => $record_id,
         'publish' => TRUE,
@@ -232,6 +265,9 @@ function chado_publish_records($values, $job_id = NULL) {
         throw new Exception('Could not create entity.');
       }
 
+        // @performance remove after development: this takes 0.2-0.3s.
+        //print 'Create entity itself :' . (microtime(true) - $started_at) . "s.\n";
+
       // Next save the chado entity record.
       $entity_record = array(
         'entity_id' => $entity->id,
@@ -250,6 +286,9 @@ function chado_publish_records($values, $job_id = NULL) {
         throw new Exception('Could not create mapping of entity to Chado record.');
       }
 
+      // @performance remove after development: this takes <0.001s.
+      // print 'Relate back to chado :' . (microtime(true) - $started_at) . "s.\n";
+
       $i++;
     }
   }
@@ -261,6 +300,8 @@ function chado_publish_records($values, $job_id = NULL) {
     return FALSE;
   }
   drupal_set_message("Succesfully published $i " . $bundle->label . " record(s).");
+    // @performance remove after development
+  print 'Complete :' . (microtime(true) - $started_at) . "s.\n";
   return TRUE;
 }
 
@@ -271,7 +312,7 @@ function chado_publish_records($values, $job_id = NULL) {
  *    The name of a base table in Chado.
  * @return
  *    An array of tokens where the key is the machine_name of the token.
- * 
+ *
  * @ingroup tripal_chado_api
  */
 function chado_get_tokens($base_table) {
@@ -329,7 +370,7 @@ function chado_get_tokens($base_table) {
  *
  * @return
  *   The string will all tokens replaced with values.
- * 
+ *
  *  @ingroup tripal_chado_api
  */
 function chado_replace_tokens($string, $record) {