|
@@ -4,7 +4,7 @@
|
|
|
* @file
|
|
|
*
|
|
|
* This file contains miscellaneous API functions specific to working with
|
|
|
- * records in Chado that do not have a home in any other sub category of
|
|
|
+ * records in Chado that do not have a home in any other sub category of
|
|
|
* API functions.
|
|
|
*/
|
|
|
|
|
@@ -12,9 +12,9 @@
|
|
|
* @defgroup tripal_chado_api Chado
|
|
|
*
|
|
|
* @ingroup tripal_api
|
|
|
- * The Tripal Chado API is a set of functions for interacting with data
|
|
|
+ * The Tripal Chado API is a set of functions for interacting with data
|
|
|
* inside of a Chado relational database. Entities (or pages) in Drupal
|
|
|
- * that are provided by Tripal can supply data from any supported database
|
|
|
+ * that are provided by Tripal can supply data from any supported database
|
|
|
* back-end, and Chado is the default. This API contains a variety of sub
|
|
|
* categories (or groups) where functions are organized. Any extension module
|
|
|
* that desires to work with data in Chado will find these functions useful.
|
|
@@ -39,6 +39,9 @@
|
|
|
*/
|
|
|
function chado_publish_records($values, $job_id = NULL) {
|
|
|
|
|
|
+ // @performance remove after development
|
|
|
+ $started_at = microtime(true);
|
|
|
+
|
|
|
// We want the job object in order to report progress.
|
|
|
if (is_object($job_id)) {
|
|
|
$job = $job_id;
|
|
@@ -65,6 +68,8 @@ function chado_publish_records($values, $job_id = NULL) {
|
|
|
$filters = array_key_exists('filters', $values) ? $values['filters'] : array();
|
|
|
$sync_node = array_key_exists('sync_node', $values) ? $values['sync_node'] : '';
|
|
|
|
|
|
+ // @performance remove after development: 0.00059294700622559s
|
|
|
+
|
|
|
// Load the bundle entity so we can get information about which Chado
|
|
|
// table/field this entity belongs to.
|
|
|
$bundle = tripal_load_bundle_entity(array('name' => $bundle_name));
|
|
@@ -76,6 +81,7 @@ function chado_publish_records($values, $job_id = NULL) {
|
|
|
}
|
|
|
$chado_entity_table = chado_get_bundle_entity_table($bundle);
|
|
|
|
|
|
+ // @performance remove after development: 0.05065393447876s
|
|
|
|
|
|
// Get the mapping of the bio data type to the Chado table.
|
|
|
$chado_bundle = db_select('chado_bundle', 'cb')
|
|
@@ -95,11 +101,16 @@ function chado_publish_records($values, $job_id = NULL) {
|
|
|
$cvterm_id = $chado_bundle->type_id;
|
|
|
$type_value = $chado_bundle->type_value;
|
|
|
|
|
|
+ // @performance remove after development:0.051163911819458s
|
|
|
+
|
|
|
// Get the table information for the Chado table.
|
|
|
$table_schema = chado_get_schema($table);
|
|
|
$pkey_field = $table_schema['primary key'][0];
|
|
|
|
|
|
+ // @performance remove after development:0.05134105682373s
|
|
|
+
|
|
|
// Construct the SQL for identifying which records should be published.
|
|
|
+ // @performance find a way to optimize this?
|
|
|
$args = array();
|
|
|
$select = "SELECT T.$pkey_field as record_id ";
|
|
|
$from = "
|
|
@@ -181,7 +192,11 @@ function chado_publish_records($values, $job_id = NULL) {
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
+ // @performance remove after development:0.060441970825195s
|
|
|
+
|
|
|
// First get the count
|
|
|
+ // @performance optimize, estimate or remove this. It's only used for reporting progress on the command-line.
|
|
|
$sql = "SELECT count(*) as num_records " . $from . $where;
|
|
|
$result = chado_query($sql, $args);
|
|
|
$count = $result->fetchField();
|
|
@@ -192,9 +207,21 @@ function chado_publish_records($values, $job_id = NULL) {
|
|
|
$interval = 1;
|
|
|
}
|
|
|
|
|
|
+ // @performance remove after development:0.25212502479553s
|
|
|
+ print 'Count amount to do :' . (microtime(true) - $started_at) . "s.\n";
|
|
|
+
|
|
|
// Perform the query.
|
|
|
$sql = $select . $from . $where;
|
|
|
$records = chado_query($sql, $args);
|
|
|
+
|
|
|
+ // @performance remove after development:0.43729090690613s
|
|
|
+ print 'Perform Query :' . (microtime(true) - $started_at) . "s.\n";
|
|
|
+
|
|
|
+ // @performance evaluate this transaction. Long running transactions can have serious
|
|
|
+ // performance issues in PostgreSQL. One option is to move the transaction within the
|
|
|
+ // loop so that each one is not very long but then we end up with more overhead creating
|
|
|
+ // transactions. A better albeit more complicated approach might be to break the job into
|
|
|
+ // chunks where each one is a single transaction.
|
|
|
$transaction = db_transaction();
|
|
|
|
|
|
print "\nNOTE: publishing records is performed using a database transaction. \n" .
|
|
@@ -206,6 +233,9 @@ function chado_publish_records($values, $job_id = NULL) {
|
|
|
try {
|
|
|
while($record = $records->fetchObject()) {
|
|
|
|
|
|
+ // @performance remove after development
|
|
|
+ print 'Start current entity :' . (microtime(true) - $started_at) . "s.\n";
|
|
|
+
|
|
|
// update the job status every interval
|
|
|
if ($i % $interval == 0) {
|
|
|
$complete = ($i / $count) * 33.33333333;
|
|
@@ -215,6 +245,8 @@ function chado_publish_records($values, $job_id = NULL) {
|
|
|
}
|
|
|
|
|
|
// First save the tripal_entity record.
|
|
|
+ // @performace This is likely a bottleneck. Too bad we can't create
|
|
|
+ // multiple entities at once... sort of like the copy method.
|
|
|
$record_id = $record->record_id;
|
|
|
$ec = entity_get_controller('TripalEntity');
|
|
|
$entity = $ec->create(array(
|
|
@@ -223,6 +255,7 @@ function chado_publish_records($values, $job_id = NULL) {
|
|
|
// Add in the Chado details for when the hook_entity_create()
|
|
|
// is called and our tripal_chado_entity_create() implementation
|
|
|
// can deal with it.
|
|
|
+ // @performance maybe there is something we can easily do here?
|
|
|
'chado_record' => chado_generate_var($table, array($pkey_field => $record_id)),
|
|
|
'chado_record_id' => $record_id,
|
|
|
'publish' => TRUE,
|
|
@@ -232,6 +265,9 @@ function chado_publish_records($values, $job_id = NULL) {
|
|
|
throw new Exception('Could not create entity.');
|
|
|
}
|
|
|
|
|
|
+ // @performance remove after development: this takes 0.2-0.3s.
|
|
|
+ //print 'Create entity itself :' . (microtime(true) - $started_at) . "s.\n";
|
|
|
+
|
|
|
// Next save the chado entity record.
|
|
|
$entity_record = array(
|
|
|
'entity_id' => $entity->id,
|
|
@@ -250,6 +286,9 @@ function chado_publish_records($values, $job_id = NULL) {
|
|
|
throw new Exception('Could not create mapping of entity to Chado record.');
|
|
|
}
|
|
|
|
|
|
+ // @performance remove after development: this takes <0.001s.
|
|
|
+ // print 'Relate back to chado :' . (microtime(true) - $started_at) . "s.\n";
|
|
|
+
|
|
|
$i++;
|
|
|
}
|
|
|
}
|
|
@@ -261,6 +300,8 @@ function chado_publish_records($values, $job_id = NULL) {
|
|
|
return FALSE;
|
|
|
}
|
|
|
drupal_set_message("Succesfully published $i " . $bundle->label . " record(s).");
|
|
|
+ // @performance remove after development
|
|
|
+ print 'Complete :' . (microtime(true) - $started_at) . "s.\n";
|
|
|
return TRUE;
|
|
|
}
|
|
|
|
|
@@ -271,7 +312,7 @@ function chado_publish_records($values, $job_id = NULL) {
|
|
|
* The name of a base table in Chado.
|
|
|
* @return
|
|
|
* An array of tokens where the key is the machine_name of the token.
|
|
|
- *
|
|
|
+ *
|
|
|
* @ingroup tripal_chado_api
|
|
|
*/
|
|
|
function chado_get_tokens($base_table) {
|
|
@@ -329,7 +370,7 @@ function chado_get_tokens($base_table) {
|
|
|
*
|
|
|
* @return
|
|
|
* The string will all tokens replaced with values.
|
|
|
- *
|
|
|
+ *
|
|
|
* @ingroup tripal_chado_api
|
|
|
*/
|
|
|
function chado_replace_tokens($string, $record) {
|