Kaynağa Gözat

Merge pull request #1139 from dsenalik/20201217sbo__database_cross_reference

Tripal field sbo__database_cross_reference timeout fix
Lacey-Anne Sanderson 4 yıl önce
ebeveyn
işleme
bdb0233407

+ 61 - 20
tripal_chado/api/modules/tripal_chado.pub.api.inc

@@ -105,7 +105,7 @@ function chado_get_publication($identifiers, $options = []) {
     }
     else {
       tripal_report_error('tripal_pub_api', TRIPAL_ERROR,
-        "chado_get_publication: The dbxref identifier is not correctly formatted.",
+        "chado_get_publication: The dbxref identifier is not correctly formatted. Identifiers passed: %identifier.",
         ['%identifier' => print_r($identifiers, TRUE)]
       );
     }
@@ -1169,6 +1169,47 @@ function chado_pub_create_citation($pub) {
   return $citation;
 }
 
+/**
+ * Retrieves an array with all database cross references
+ *
+ * Implemented as SQL for performance reasons because chado_expand_var
+ * can take too long as it loads more information than needed
+ *
+ * @param $pub_id
+ *   A pub_id from the 'chado.pub' table
+ *
+ * @return
+ *   An array of records with the following keys:  'accession', 'version',
+ *   'description', 'name', 'url', 'urlprefix'.
+ *   These are the column names from the 'dbxref' and 'db' tables
+ *
+ * @ingroup tripal_pub_api
+ */
+function chado_get_pub_dbxrefs($pub_id) {
+  $fkey = 'pub_id';  // Should this be looked up in the schema?
+  $options = ['return_array' => 1];
+  $sql = "SELECT REF.accession, REF.version, REF.description, DB.name, DB.url, DB.urlprefix "
+       . "FROM {pub_dbxref} LINK "
+       . "INNER JOIN {dbxref} REF on LINK.dbxref_id = REF.dbxref_id "
+       . "INNER JOIN {db} DB on REF.db_id = DB.db_id "
+       . "WHERE LINK.$fkey = :pub_id";
+  $args = [':pub_id' => $pub_id];
+  $records = chado_query($sql, $args);
+
+  $results = [];
+  $delta = 0;
+  while($record = $records->fetchObject()) {
+    $results[$delta]['accession'] = $record->accession;
+    $results[$delta]['version'] = $record->version;
+    $results[$delta]['description'] = $record->description;
+    $results[$delta]['name'] = $record->name;
+    $results[$delta]['url'] = $record->url;
+    $results[$delta]['urlprefix'] = $record->urlprefix;
+    $delta++;
+  }
+  return $results;
+}
+
 /**
  * Retrieves the minimal information to uniquely describe any publication.
  *
@@ -1238,17 +1279,24 @@ function chado_get_minimal_pub_info($pub) {
     }
   }
 
+  // Load all database cross references.
+  $pub_dbxrefs = chado_get_pub_dbxrefs($pub->pub_id);
+
   // Get the first database cross-reference with a url.
-  $options = ['return_array' => 1];
-  $pub = chado_expand_var($pub, 'table', 'pub_dbxref', $options);
-  $dbxref = NULL;
-  if ($pub->pub_dbxref) {
-    foreach ($pub->pub_dbxref as $index => $pub_dbxref) {
-      if ($pub_dbxref->dbxref_id->db_id->urlprefix) {
-        $dbxref = $pub_dbxref->dbxref_id;
-      }
-    }
-  }
+// it is not clear what this was doing, it would have retrieved the last not the first
+// dbxref with a url, but the variable $dbxref is not referenced later. It could have
+// added information to $pub, but that does not appear to be referenced later.
+// chado_expand_var() can sometimes take a long time to execute, so just remove it?
+//  $options = ['return_array' => 1];
+//  $pub = chado_expand_var($pub, 'table', 'pub_dbxref', $options);
+//  $dbxref = NULL;
+//  if ($pub->pub_dbxref) {
+//    foreach ($pub->pub_dbxref as $index => $pub_dbxref) {
+//      if ($pub_dbxref->dbxref_id->db_id->urlprefix) {
+//        $dbxref = $pub_dbxref->dbxref_id;
+//      }
+//    }
+//  }
 
   // Get the URL.
   $values = [
@@ -1270,17 +1318,10 @@ function chado_get_minimal_pub_info($pub) {
     }
   }
 
-  // Get the list of database cross references.
-  $values = [
-    'pub_id' => $pub->pub_id,
-  ];
-  $options = [
-    'return_array' => 1,
-  ];
-  $pub_dbxrefs = chado_generate_var('pub_dbxref', $values, $options);
+  // Generate a list of database cross references formatted as "DB:accession".
   $dbxrefs = [];
   foreach ($pub_dbxrefs as $pub_dbxref) {
-    $dbxrefs[] = $pub_dbxref->dbxref_id->db_id->name . ':' . $pub_dbxref->dbxref_id->accession;
+    $dbxrefs[] = $pub_dbxref['name'] . ':' . $pub_dbxref['accession'];
   }
 
   // Get the citation.

+ 36 - 26
tripal_chado/includes/TripalFields/sbo__database_cross_reference/sbo__database_cross_reference.inc

@@ -37,6 +37,10 @@ class sbo__database_cross_reference extends ChadoField {
     // type. This will create form elements when editing the field instance
     // to allow the site admin to change the term settings above.
     'term_fixed' => FALSE,
+    // The number of items to show on a page.
+    'items_per_page' => 10,
+    // Limit to the number of items to show in cases of large number of cross references.
+    'max_items' => 10000,
   ];
 
   // The default widget for this field.
@@ -138,33 +142,39 @@ class sbo__database_cross_reference extends ChadoField {
 
     $linker_table = $base_table . '_dbxref';
     $options = ['return_array' => 1];
-    $record = chado_expand_var($record, 'table', $linker_table, $options);
-    if (property_exists($record, $linker_table) and is_array($record->$linker_table) and count($record->$linker_table) > 0) {
-      $i = 0;
-      foreach ($record->$linker_table as $index => $linker) {
-        $dbxref = $linker->dbxref_id;
-
-        // Ignore the GFF_source database. This is a weird thing required by
-        // GBrowse and is added by the GFF loader. We don't want to show it.
-        if ($dbxref->db_id->name == 'GFF_source') {
-          continue;
-        }
-
-        $URL = chado_get_dbxref_url($dbxref);
-        $entity->{$field_name}['und'][$i] = [
-          'value' => [
-            $dbname_term => $dbxref->db_id->name,
-            $accession_term => $dbxref->accession,
-            $dburl_term => $URL,
-          ],
-          'chado-' . $field_table . '__' . $pkey => $linker->$pkey,
-          'chado-' . $field_table . '__' . $fkey_lcolumn => $linker->$fkey_lcolumn->$fkey_lcolumn,
-          'chado-' . $field_table . '__dbxref_id' => $dbxref->dbxref_id,
-          'db_id' => $dbxref->db_id->db_id,
-          'accession' => $dbxref->accession,
-        ];
-        $i++;
+
+    // Build the SQL to find records associated with this publication.
+    $max_items = array_key_exists('max_items', $this->instance['settings']) ? $this->instance['settings']['max_items'] : 10000;
+    $sql = "SELECT REF.accession, DB.name, DB.urlprefix "
+         . "FROM {".$linker_table."} LINK "
+         . "INNER JOIN {dbxref} REF on LINK.dbxref_id = REF.dbxref_id "
+         . "INNER JOIN {db} DB on REF.db_id = DB.db_id "
+         . "WHERE LINK.$fkey_lcolumn = :id "
+         // Ignore the GFF_source database. This is a weird thing required by
+         // GBrowse and is added by the GFF loader. We don't want to show it.
+         . "AND NOT DB.name = 'GFF_source' "
+         . "ORDER BY REF.accession "  // if we hit the limit, the subset should be consistent
+         . "LIMIT :limit";
+    $args = [':id' => $entity->{'chado_record_id'},
+             ':limit' => $max_items + 1];
+    $records = chado_query($sql, $args);
+
+    // Store the query results
+    $delta = 0;
+    while($record = $records->fetchObject()) {
+      // Need this check to detect the case where the limit exactly equals the number of records
+      if ($delta < $max_items) {
+        $entity->{$field_name}['und'][$delta] = [];
+        $entity->{$field_name}['und'][$delta]['value'][$dbname_term] = $record->name;
+        $entity->{$field_name}['und'][$delta]['value'][$accession_term] = $record->accession;
+        $entity->{$field_name}['und'][$delta]['value'][$dburl_term] = $record->urlprefix;
       }
+      $delta++;
+    }
+    // Display a warning if we have exceeded the maximum number of cross references
+    if ( $delta > $max_items ) {
+      $entity->{$field_name}['und'][$delta]['value'][$dbname_term] = 'Note';
+      $entity->{$field_name}['und'][$delta]['value'][$accession_term] = "Only the first $max_items cross references are shown";
     }
   }
 

+ 76 - 15
tripal_chado/includes/TripalFields/sbo__database_cross_reference/sbo__database_cross_reference_formatter.inc

@@ -15,6 +15,11 @@ class sbo__database_cross_reference_formatter extends ChadoFieldFormatter {
   public function view(&$element, $entity_type, $entity, $langcode, $items, $display) {
     $content = '';
 
+    // Do we have an empty list? If so, just return.
+    if (!$items[0]['value']) {
+      return;
+    }
+
     $field_name = $this->field['field_name'];
     $field_type = $this->field['type'];
     $field_table = $this->instance['settings']['chado_table'];
@@ -26,27 +31,83 @@ class sbo__database_cross_reference_formatter extends ChadoFieldFormatter {
     $accession_term = chado_get_semweb_term('dbxref', 'accession');
     $dburl_term = chado_get_semweb_term('db', 'url');
 
+    // First, organize the values by their databases.
+    $ordered_items = [];
     foreach ($items as $delta => $item) {
-      if (!$item['value']) {
-        continue;
+      $db = $item['value'][$dbname_term];
+      $accession = $item['value'][$accession_term];
+      // It is possible that a database does not have a url, in which case no link can be generated.
+      if (array_key_exists($dburl_term, $item['value']) and $item['value'][$dburl_term]) {
+        $url = $item['value'][$dburl_term];
+
+        // This emulates chado_get_dbxref_url() but implemented inline here for better performance.
+        $db_count = 0;
+        $acc_count = 0;
+        $url = preg_replace('/\{db\}/', $db, $url, -1, $acc_count);
+        $url = preg_replace('/\{accession\}/', $accession, $url, -1, $acc_count);
+
+        $content = l($accession, $url, ['attributes' => ['target' => '_blank']]);
       }
-      $content = $item['value'][$dbname_term] . ':' . $item['value'][$accession_term];
-      if ($item['value'][$dburl_term]) {
-        $dbxref = chado_get_dbxref(['dbxref_id' => $item['chado-' . $linker_table . '__dbxref_id']]);
-        $url = chado_get_dbxref_url($dbxref);
-        $content = l($content, $url, ['attributes' => ['target' => '_blank']]);
+      else {
+        $content = $accession;
       }
-      $element[$delta] = [
-        '#type' => 'markup',
-        '#markup' => $content,
-      ];
+      $ordered_items[ucfirst($db)][] = $content;
     }
 
-    if (count($element) == 0) {
-      $element[0] = [
-        '#type' => 'markup',
-        '#markup' => 'There are no cross references.',
+    // Reorder the list so it's compatible with theming a list.
+    ksort($ordered_items);
+
+    // Generate the pagers for each type.
+    $list_items = [];
+    $headers = [];
+    $rows = [];
+    foreach ($ordered_items as $type => $children) {
+      $items_per_page = array_key_exists('items_per_page', $this->instance['settings']) ? $this->instance['settings']['items_per_page'] : 10;
+      $total_records = count($children);
+      $total_pages = (int) ($total_records / $items_per_page) + 1;
+      $pelement = 0;
+      $current_page = pager_default_initialize($total_records, $items_per_page, $pelement);
+      $pager = theme('pager', [
+        'tags' => [],
+        'element' => $pelement,
+        'parameters' => [],
+        'quantity' => 5,
+      ]);
+      $pager = $this->ajaxifyPager($pager, $entity);
+      $page_items = array_chunk($children, $items_per_page);
+
+      $rows[] = [
+        [
+          'data' => ucfirst($type),
+          'header' => TRUE,
+          'width' => '20%',
+        ],
+        theme_item_list([
+          'items' => $page_items[$current_page],
+          'title' => '',
+          'type' => 'ul',
+          'attributes' => [],
+        ]) . $pager,
       ];
     }
+
+    $table = [
+      'header' => [],
+      'rows' => $rows,
+      'attributes' => [
+        'id' => 'sbo__database_cross_reference',
+        'class' => 'tripal-data-table',
+      ],
+      'sticky' => FALSE,
+      'caption' => "",
+      'colgroups' => [],
+      'empty' => 'There are no cross references.',
+    ];
+    $content = theme_table($table);
+    $element[0] = [
+      '#type' => 'markup',
+      '#markup' => $content,
+    ];
+
   }
 }