Browse Source

Working on speeding remote queries

Stephen Ficklin 7 years ago
parent
commit
29726b1d3e

+ 31 - 24
tripal/includes/TripalEntityCollection.inc

@@ -515,48 +515,55 @@ class TripalEntityCollection {
       $job->setTotalItems($total_entities);
       $job->setTotalItems($total_entities);
     }
     }
 
 
-    // Next load the entities and write them to the files.
+    // Iterate through the bundles in this collection and get the entities.
     foreach ($this->bundles as $bundle) {
     foreach ($this->bundles as $bundle) {
       $bundle_name = $bundle->bundle_name;
       $bundle_name = $bundle->bundle_name;
       $site_id = $bundle->site_id;
       $site_id = $bundle->site_id;
-      $entity_ids  = $this->getEntityIDs($bundle_name);
-      $field_ids = $this->getFieldIDs($bundle_name);
+      $entity_ids  = array_unique($this->getEntityIDs($bundle_name));
+      $field_ids = array_unique($this->getFieldIDs($bundle_name));
 
 
       // Clear any cached @context or API docs.
       // Clear any cached @context or API docs.
       if ($site_id and module_exists('tripal_ws')) {
       if ($site_id and module_exists('tripal_ws')) {
         tripal_clear_remote_cache($site_id);
         tripal_clear_remote_cache($site_id);
       }
       }
 
 
-      foreach ($entity_ids as $entity_id) {
-        $num_handled++;
-
+      // We want to load entities in batches to speed up performance.
+      $num_eids = count($entity_ids);
+      $bundle_eids_handled = 0;
+      $slice_size = 100;
+      while ($bundle_eids_handled < $num_eids) {
+        // Get a bantch of $slice_size elements from the entities array.
+        $slice = array_slice($entity_ids, $bundle_eids_handled, $slice_size);
         if ($job) {
         if ($job) {
-          $job->setItemsHandled($num_handled);
+          $job->logMessage('Getting entities for ids !start to !end of !total',
+              array('!start' => $bundle_eids_handled,
+                    '!end' => $bundle_eids_handled + count($slice),
+                    '!total' => $num_eids));
         }
         }
+        $bundle_eids_handled += count($slice);
 
 
-        // if we have a site_id then we need to get the entity from the
-        // remote service. Otherwise create the entity from the local system.
-        $entity = NULL;
+        // If the bundle is from a remote site then call the appropriate
+        // function, otherwise, call the local function.
         if ($site_id and module_exists('tripal_ws')) {
         if ($site_id and module_exists('tripal_ws')) {
-          $entity = tripal_load_remote_entity($entity_id, $site_id, $bundle_name, $field_ids);
-          if (!$entity) {
-            continue;
-          }
+          $entities = tripal_load_remote_entities($slice, $site_id, $bundle_name, $field_ids);
         }
         }
         else {
         else {
-          $result = tripal_load_entity('TripalEntity', array($entity_id), FALSE, $field_ids, FALSE);
-          $entity = $result[$entity_id];
+          $entities = tripal_load_entity('TripalEntity', $slice, FALSE, $field_ids, FALSE);
         }
         }
+        $job->logMessage('Got !count entities.', array('!count' => count($entities)));
 
 
-        if (!$entity) {
-          continue;
-        }
+        // Now write each entity one at a time to the files.
+        foreach ($entities as $entity_id => $entity) {
 
 
-        // Write the same entity to all the formatters that are supported.
-        foreach ($formatters as $class => $formatter) {
-          //if ($class == 'TripalTabDownloader') {
-            $formatter->writeEntity($entity, $job);
-          //}
+          // Write the same entity to all the formatters that are supported.
+          foreach ($formatters as $class => $formatter) {
+            //if ($class == 'TripalTabDownloader') {
+              $formatter->writeEntity($entity, $job);
+            //}
+          }
+        }
+        if ($job) {
+          $job->setItemsHandled($num_handled + count($slice));
         }
         }
       }
       }
     }
     }

+ 19 - 17
tripal/includes/tripal.collections.inc

@@ -100,18 +100,22 @@ function tripal_user_collections_view_page($uid, $collection_id) {
   drupal_set_title('Data Collection: ' . $collection->getName());
   drupal_set_title('Data Collection: ' . $collection->getName());
 
 
   // Get the content types for this data collection.
   // Get the content types for this data collection.
-  $bundles = $collection->getBundles();
+  $cbundles = $collection->getBundles();
   $content_types = array();
   $content_types = array();
   $field_list = array();
   $field_list = array();
   $formatters_list = array();
   $formatters_list = array();
-  $entities_list = array();
-  foreach ($bundles as $bundle) {
-    $bundle = tripal_load_bundle_entity(array('name' => $bundle->bundle_name));
-    $content_types[] = $bundle->label;
-
-    $fields = $collection->getFields($bundle->name);
-    foreach ($fields as $index => $flist) {
-      foreach ($flist as $field_id) {
+  $num_entities = 0;
+
+  foreach ($cbundles as $cbundle) {
+    $eids = $collection->getEntityIDs($cbundle->bundle_name);
+    $fields = $collection->getFieldIDs($cbundle->bundle_name);
+
+    // Convert local field IDs to their names.
+    if (!$cbundle->site_id) {
+      $bundle = tripal_load_bundle_entity(array('name' => $cbundle->bundle_name));
+      $content_types[] = $bundle->label;
+
+      foreach ($fields as $field_id) {
         $field = field_info_field_by_id($field_id);
         $field = field_info_field_by_id($field_id);
         $instance = field_info_instance('TripalEntity', $field['field_name'], $bundle->name);
         $instance = field_info_instance('TripalEntity', $field['field_name'], $bundle->name);
         $field_list[] = $instance['label'];
         $field_list[] = $instance['label'];
@@ -119,17 +123,15 @@ function tripal_user_collections_view_page($uid, $collection_id) {
         $field_formatters = tripal_get_field_field_formatters($field, $instance);
         $field_formatters = tripal_get_field_field_formatters($field, $instance);
         foreach ($field_formatters as $class_name => $label) {
         foreach ($field_formatters as $class_name => $label) {
           tripal_load_include_downloader_class($class_name);
           tripal_load_include_downloader_class($class_name);
-          $formatters_list[] = $label . ' (' . $class_name::$full_label . ')';
+          $formatters_list[] = $class_name::$label . ' (' . $class_name::$full_label . ')';
         }
         }
       }
       }
     }
     }
+    // Convert remote field IDs to their names.
+    // TODO: add in retrieval of remote details.
 
 
-    $eids = $collection->getEntityIDs($bundle->name);
-    foreach ($eids as $index => $elist) {
-      foreach ($elist as $eid) {
-        $entities_list[] = $eid;
-      }
-    }
+
+    $num_entities += count($eids);
   }
   }
 
 
 
 
@@ -156,7 +158,7 @@ function tripal_user_collections_view_page($uid, $collection_id) {
     '#type' => 'item',
     '#type' => 'item',
     '#title' => 'Records',
     '#title' => 'Records',
     '#description' => t('The number of records contained in this data collection.'),
     '#description' => t('The number of records contained in this data collection.'),
-    '#markup' => count($entities_list),
+    '#markup' => number_format($num_entities),
   );
   );
 
 
   $contents['peek'] = array(
   $contents['peek'] = array(

+ 131 - 30
tripal_ws/api/tripal_ws.api.inc

@@ -186,44 +186,54 @@ function tripal_remove_site($record_id) {
 }
 }
 
 
 /**
 /**
- * Makes a request to the "content"service of a remote Tripal web site.
+ * Makes a request to the "content" service of a remote Tripal web site.
  *
  *
  * @param $site_id
  * @param $site_id
  *   The numeric site ID for the remote Tripal site.
  *   The numeric site ID for the remote Tripal site.
- *
+ * @param $path
+ *   The web service path for the content (excluding
+ *   'web-servcies/vX.x/content').  To retrieve the full content listing
+ *   leave this paramter empty.
  * @param $query
  * @param $query
- *   The query string. This string is added to the URL for the remote
- *   website.
+ *   An query string to appear after the ? in a URL.
  *
  *
  * @return
  * @return
  *   The JSON response formatted in a PHP array or FALSE if a problem occured.
  *   The JSON response formatted in a PHP array or FALSE if a problem occured.
  */
  */
-function tripal_query_remote_site($site_id, $query) {
-  $ctype = $query;
-  $qdata = '';
-  if (preg_match('/\?/', $query)) {
-    list($ctype, $qdata) = explode('?', $query);
+function tripal_get_remote_content($site_id, $path = '', $query = '') {
+
+  if (!$site_id) {
+    throw new Exception('Please provide a numeric site ID for the tripal_get_remote_content function.');
   }
   }
 
 
-  if ($site_id) {
-    $remote_site = db_select('tripal_sites', 'ts')
-      ->fields('ts')
-      ->condition('ts.id', $site_id)
-      ->execute()
-      ->fetchObject();
+  // Fetch the record for this site id.
+  $remote_site = db_select('tripal_sites', 'ts')
+    ->fields('ts')
+    ->condition('ts.id', $site_id)
+    ->execute()
+    ->fetchObject();
+
+  if (!$remote_site) {
+    tripal_report_error('tripal_ws', TRIPAL_ERROR,
+      t('Could not find a remote tripal site using the id provided: !id.',
+        array('!id' => $site_id)));
+    return FALSE;
   }
   }
 
 
   // Build the URL to the remote web services.
   // Build the URL to the remote web services.
   $ws_version = $remote_site->version;
   $ws_version = $remote_site->version;
   $ws_url = $remote_site->url;
   $ws_url = $remote_site->url;
   $ws_url = trim($ws_url, '/');
   $ws_url = trim($ws_url, '/');
-  $ws_url .= '/web-services/content/' . $ws_version . '/' . $ctype;
+  $ws_url .= '/web-services/content/' . $ws_version . '/' . $path;
+
   // Build the Query and make and substitions needed.
   // Build the Query and make and substitions needed.
-  //dpm($ws_url . '?' . $query);
-  $options = array(
-    'data' => $qdata,
-  );
-  $data = drupal_http_request($ws_url, $options);
+  if ($query) {
+    $ws_url = $ws_url . '?' . $query;
+  }
+
+  // TODO: something is wrong here, the query is not being recognized on
+  // the remote Tripal site. It's just returning the default.
+  $data = drupal_http_request($ws_url);
 
 
   if (!$data) {
   if (!$data) {
     tripal_report_error('tripal_ws', TRIPAL_ERROR,
     tripal_report_error('tripal_ws', TRIPAL_ERROR,
@@ -416,9 +426,12 @@ function tripal_get_remote_API_doc($site_id) {
 }
 }
 
 
 /**
 /**
- * Build and return a "fake" entity for a record from a remote Tripal site.
+ * Queries a remote site for an array of bulk entity ids.
  *
  *
- * @param $remote_entity_id
+ * This function returns an array of "fake" entities containing values
+ * for fields specified.
+ *
+ * @param $remote_entity_ids
  *   Array of the remote ids.
  *   Array of the remote ids.
  * @param $site_id
  * @param $site_id
  *   The numeric site ID for the remote Tripal site.
  *   The numeric site ID for the remote Tripal site.
@@ -430,6 +443,96 @@ function tripal_get_remote_API_doc($site_id) {
  *   on the remote content type.  Any remote fields that matches these IDs will
  *   on the remote content type.  Any remote fields that matches these IDs will
  *   be added to the entity returned.
  *   be added to the entity returned.
  * @return
  * @return
+ *    An array of fake entity objects where the key is the entity_id and
+ *    the value is the object.
+ */
+function tripal_load_remote_entities($remote_entity_ids, $site_id, $bundle_accession, $field_ids) {
+
+  if (!$remote_entity_ids) {
+    throw new Exception('Please provide the list of remote entity ids for the tripal_load_remote_entities function.');
+  }
+  if (!is_array($remote_entity_ids)) {
+    throw new Exception('Please provide an array for the remote entity ids for the tripal_load_remote_entities function.');
+  }
+  if (!$site_id) {
+    throw new Exception('Please provide a numeric site ID for the tripal_load_remote_entities function.');
+  }
+  if (!$bundle_accession) {
+    throw new Exception('Please provide the bundle accession for the tripal_load_remote_entities function.');
+  }
+  if (!$field_ids) {
+    throw new Exception('Please provide the list of field IDs for the tripal_load_remote_entities function.');
+  }
+  if (!is_array($field_ids)) {
+    throw new Exception('Please provide an array for the field IDs for the tripal_load_remote_entities function.');
+  }
+
+  // Get the site documentation (loads from cache if already retrieved).
+  $site_doc = tripal_get_remote_API_doc($site_id);
+
+  // Generate an array for the query and then execute it.
+  $query = 'page=1&limit=' . count($remote_entity_ids) .
+    '&ids=' . urlencode(implode(",", $remote_entity_ids)) .
+    '&fields=' . urlencode(implode(",", $field_ids));
+
+  $results = tripal_get_remote_content($site_id, $bundle_accession, $query);
+  if (!$results) {
+    return FALSE;
+  }
+
+  // Get the context JSON for this remote entity, we'll use it to map
+  $context_url = $results['@context'];
+  $context = tripal_get_remote_content_context($site_id, $context_url, $bundle_accession);
+  if (!$context) {
+    return $entity;
+  }
+
+  $total_items = $results['totalItems'];
+  $members = $results['member'];
+
+  $entities = array();
+  foreach($members as $member) {
+    // Start building the fake entity.
+    $entity_id = preg_replace('/^.*?(\d+)$/', '$1', $member['@id']);
+    $entity = new stdClass();
+    $entity->entityType = 'TripalEntity';
+    $entity->entityInfo = [];
+    $entity->id = $entity_id;
+    $entity->type = 'TripalEntity';
+    $entity->bundle = $bundle_accession;
+    $entity->site_id = $site_id;
+
+    $member = _tripal_update_remote_entity_field($member, $context, 1);
+    foreach ($member as $field_id => $value) {
+      $field = tripal_get_remote_field_info($site_id, $bundle_accession, $field_id);
+      $instance = tripal_get_remote_field_instance_info($site_id, $bundle_accession, $field_id);
+      $field_name = $field['field_name'];
+      $entity->{$field_name}['und'][0]['value'] = $value;
+    }
+
+    $entities[$entity_id] = $entity;
+  }
+  return $entities;
+}
+
+/**
+ * Queries a remote site for an entity.
+ *
+ * This function returns a "fake" entity containing values for all fields
+ * specified.
+ *
+ * @param $remote_entity_id
+ *   A remote entity ID.
+ * @param $site_id
+ *   The numeric site ID for the remote Tripal site.
+ * @param $bundle_accession
+ *   The controlled vocabulary term accession for the content type
+ *   on the remote Tripal site.
+ * @param $field_ids
+ *   The controlled vocabulary term accessions for the fields available
+ *   on the remote content type.  Any remote fields that matches these IDs will
+ *   be added to the entity returned.
+ * @return
  *    A fake entity object.
  *    A fake entity object.
  */
  */
 function tripal_load_remote_entity($remote_entity_id, $site_id, $bundle_accession, $field_ids) {
 function tripal_load_remote_entity($remote_entity_id, $site_id, $bundle_accession, $field_ids) {
@@ -438,13 +541,12 @@ function tripal_load_remote_entity($remote_entity_id, $site_id, $bundle_accessio
   $site_doc = tripal_get_remote_API_doc($site_id);
   $site_doc = tripal_get_remote_API_doc($site_id);
 
 
   // Get the remote entity and create the fake entity.
   // Get the remote entity and create the fake entity.
-  $query = $bundle_accession . '/' . $remote_entity_id;
-  $remote_entity = tripal_query_remote_site($site_id, $query);
+  $remote_entity = tripal_get_remote_content($site_id, $bundle_accession . '/' . $remote_entity_id);
   if (!$remote_entity) {
   if (!$remote_entity) {
     return FALSE;
     return FALSE;
   }
   }
 
 
-  // Start building the fake id.
+  // Start building the fake entity.
   $entity = new stdClass();
   $entity = new stdClass();
   $entity->entityType = 'TripalEntity';
   $entity->entityType = 'TripalEntity';
   $entity->entityInfo = [];
   $entity->entityInfo = [];
@@ -453,7 +555,6 @@ function tripal_load_remote_entity($remote_entity_id, $site_id, $bundle_accessio
   $entity->bundle = $bundle_accession;
   $entity->bundle = $bundle_accession;
   $entity->site_id = $site_id;
   $entity->site_id = $site_id;
 
 
-
   // Get the context JSON for this remote entity, we'll use it to map
   // Get the context JSON for this remote entity, we'll use it to map
   $context_url = $remote_entity['@context'];
   $context_url = $remote_entity['@context'];
   $context = tripal_get_remote_content_context($site_id, $context_url, $bundle_accession);
   $context = tripal_get_remote_content_context($site_id, $context_url, $bundle_accession);
@@ -678,7 +779,7 @@ function tripal_get_remote_field_instance_info($site_id, $bundle_accession, $fie
 
 
 
 
 /**
 /**
- * Retreive the bundle (content type) information from a remote Tripal site.
+ * Retreive the content type information from a remote Tripal site.
  *
  *
  * The array returned is equivalent to the Hydra Vocabulary "supportedClass"
  * The array returned is equivalent to the Hydra Vocabulary "supportedClass"
  * stanza.
  * stanza.
@@ -692,7 +793,7 @@ function tripal_get_remote_field_instance_info($site_id, $bundle_accession, $fie
  *   A PHP array corresponding to the Hydra Class stanza (i.e. a content type).
  *   A PHP array corresponding to the Hydra Class stanza (i.e. a content type).
  *   Returns NULL if the class ID cannot be found.
  *   Returns NULL if the class ID cannot be found.
  */
  */
-function tripal_get_remote_bundle_doc($site_id, $bundle_accession){
+function tripal_get_remote_content_doc($site_id, $bundle_accession){
 
 
   // Get the site documentation (loads from cache if already retrieved).
   // Get the site documentation (loads from cache if already retrieved).
   $site_doc = tripal_get_remote_API_doc($site_id);
   $site_doc = tripal_get_remote_API_doc($site_id);
@@ -732,7 +833,7 @@ function tripal_get_remote_field_doc($site_id, $bundle_accession, $field_accessi
   // Get the site documentation (loads from cache if already retrieved).
   // Get the site documentation (loads from cache if already retrieved).
   $site_doc = tripal_get_remote_API_doc($site_id);
   $site_doc = tripal_get_remote_API_doc($site_id);
 
 
-  $class = tripal_get_remote_bundle_doc($site_id, $bundle_accession);
+  $class = tripal_get_remote_content_doc($site_id, $bundle_accession);
   $properties = $class['supportedProperty'];
   $properties = $class['supportedProperty'];
   foreach ($properties as $item) {
   foreach ($properties as $item) {
     if ($item['property'] == $field_accession) {
     if ($item['property'] == $field_accession) {

+ 1 - 1
tripal_ws/includes/TripalFields/remote__data/remote__data.inc

@@ -205,7 +205,7 @@ class remote__data extends WebServicesField {
        list($ctype, $qdata) = explode('?', $query);
        list($ctype, $qdata) = explode('?', $query);
      }
      }
 
 
-     $data = tripal_query_remote_site($this->remote_site->site_id, $query);
+     $data = tripal_get_remote_content($this->remote_site->site_id, $query);
 
 
      return $data;
      return $data;
    }
    }