Forráskód Böngészése

Added Search API integration including a default search view based on the search api default node index.

Lacey Sanderson 9 éve
szülő
commit
55f65cba1c

+ 104 - 0
tripal_core/api/tripal_core.chado_nodes.title_and_path.api.inc

@@ -1138,6 +1138,60 @@ function chado_node_get_token_format($application, $content_type, $options = arr
   }
 }
 
+/**
+ * Generate a Readable but not necessarily unique format based on a given primary
+ * key token.
+ *
+ * For example, given the token [feature.type_id>cvterm.cvterm_id] you don't
+ * want the actual id indexed but instead would want the term name, [feature.type_id>cvterm.name]
+ */
+function chado_node_get_readable_format($token) {
+
+  // First, lets break down the token into it's parts.
+  // 1. Remove containing brackets.
+  $parts = str_replace(array('[',']'),'',$token);
+  // 2. Break into table clauses.
+  $parts = explode('>',$parts);
+  // 3. Break each table clause into table & field.
+  foreach ($parts as $k => $v) {
+    $parts[$k] = explode('.', $v);
+    if (sizeof($parts[$k]) == 1) {
+      $parts[$k] = explode(':', $v);
+    }
+  }
+  $last_k = $k;
+
+  // Now, we want to find readable fields for the last table specified in the token.
+  // (ie: for cvterm in [feature.type_id>cvterm.cvterm_id])
+  $table = $parts[$last_k][0];
+  $format = array();
+  if ($table == 'organism') {
+    $format[] = preg_replace('/(\w+)\]$/', 'genus]', $token);
+    $format[] = preg_replace('/(\w+)\]$/', 'species]', $token);
+    $format[] = preg_replace('/(\w+)\]$/', 'common_name]', $token);
+    $format = $format[0] . ' ' . $format[1] . ' (' . $format[2] . ')';
+  }
+  elseif ($table == 'dbxref') {
+    $format[] = preg_replace('/(\w+)\]$/', 'accession]', $token);
+    $format[] = preg_replace('/(\w+)\]$/', 'db_id>db.name]', $token);
+    $format = $format[0] . ' (' . $format[1] . ')';
+  }
+  else {
+    $schema = chado_get_schema($table);
+    foreach ($schema['fields'] as $field_name => $details) {
+      if (preg_match('/name/',$field_name)) {
+        $format[] = preg_replace('/(\w+)\]$/', $field_name.']', $token);
+      }
+    }
+    $format = implode(', ',$format);
+  }
+  if (empty($format)) {
+    return FALSE;
+  }
+
+  return $format;
+}
+
 /**
  * Generate the unique constraint for a given base table using the
  * Chado Schema API definition
@@ -1388,6 +1442,56 @@ function chado_node_format_tokens($tokens) {
   return theme_table($table);
 }
 
+/**
+ * Returns the "location" as specified in the token information based on the token.
+ */
+function chado_node_get_location_from_token($token) {
+
+  if (is_array($token) and isset($token['location'])) {
+    return $token['location'];
+  }
+  // If we have been given the token as a string, we can still determine the location
+  // but it takes more work...
+  // First, lets clarify what the location is: the location shows which keys in which
+  // order need to be travelled in order to access the value. For example, the token
+  // [feature.organism_id>organism.genus] would have a location of
+  // feature > organism_id > genus to show that the value is at
+  // $node->feature->organism->genus.
+  elseif (is_string($token)) {
+
+    // First, lets break down the token into it's parts.
+    // 1. Remove containing brackets.
+    $parts = str_replace(array('[',']'),'',$token);
+    // 2. Break into table clauses.
+    $parts = explode('>',$parts);
+    // 3. Break each table clause into table & field.
+    foreach ($parts as $k => $v) {
+      $parts[$k] = explode('.', $v);
+      if (sizeof($parts[$k]) == 1) {
+        $parts[$k] = explode(':', $v);
+      }
+    }
+
+    // This is a base level field that is not a foreign key.
+    if (sizeof($parts) == 1 AND sizeof($parts[0]) == 2) {
+      return $parts[0][0] . ' > ' . $parts[0][1];
+    }
+    // Darn, we have at least one foreign key...
+    elseif (sizeof($parts) > 1 AND sizeof($parts[0]) == 2) {
+      $location = $parts[0][0] . ' > ' . $parts[0][1];
+      foreach ($parts as $k => $p) {
+        if ($k != 0 AND isset($p[1])) {
+          $location .= ' > ' . $p[1];
+        }
+      }
+      return $location;
+    }
+    else {
+      return FALSE;
+    }
+  }
+}
+
 /**
  * This sorts tokens first by depth (ie: stock.* is before stock.*>subtable.*) and
  * then alphabetically within a level (ie: stock.name comes before stock.type_id)

+ 339 - 0
tripal_core/includes/tripal_core.search.inc

@@ -0,0 +1,339 @@
+<?php
+/**
+ * @file
+ * Adds support for Drupal indexing of Chado.
+ * It's important to note that not all of Chado is indexed but instead
+ * Only fields indicated in hook_search_include_chado_fields().
+ */
+
+/**
+ * Implements hook_search_include_chado_fields().
+ *
+ * This hook allows Tripal Admin/modules to specify which chado fields should be indexed
+ * for searching in a simple manner.
+ *
+ * @return
+ *   An array of chado fields you would like available for indexing. Each element should
+ *   be the name of the table followed by the field and separated by a period. For example.
+ *   feature.uniquename to indicate the uniquename field from the feature table.
+ */
+function tripal_core_search_include_chado_fields() {
+  return array(
+    'organism.genus',
+    'organism.species',
+  );
+}
+
+/**
+ * Implements hook_entity_property_info_alter().
+ *
+ * This is where we actually add the properties to the node entity in order to indicate
+ * which chado fields should be indexed.
+ */
+function tripal_core_entity_property_info_alter(&$info) {
+
+  // We provide a hook to allow Tripal admin to easily add fields to the search api.
+  // We want to invoke all implementations of that hook now for use below.
+  $fields_to_include = module_invoke_all('search_include_chado_fields');
+  $fields_to_include = array_unique($fields_to_include);
+
+  // Retrieve information for all nodes.
+  // We focus on nodes at this point because we need to link search results back to
+  // the entity and we have no entites for non-node chado content in Tripal2.
+  $node_info = module_invoke_all('node_info');
+
+  foreach ($node_info as $n) {
+
+    // Now keep in mind this hook is defined for ALL THE NODE TYPES and we only want
+    // to add extra support for chado so we onle care about chado node types.
+    // We can distinguish chado node types from all others by the existence of
+    // the 'chado_node_api' key which is used for all sorts of beautiful tripal/chado
+    // node integration (ie: adding properties, relationships and dbxrefs to node forms).
+    if (isset($n['chado_node_api'])) {
+      $schema = chado_get_schema($n['chado_node_api']['base_table']);
+
+      // Now we are going to start by adding some defaults. It feels safe to say, we
+      // probably want to index all the "names" so we are going to look through
+      // all the fields and if they contain "name" we are going to add them automatically.
+      foreach ($schema['fields'] as $field_name => $details) {
+
+        $machine_name = $n['chado_node_api']['base_table'] . '.' . $field_name;
+
+        // Try to create a readable label.
+        $label = ucwords(str_replace(array('.','_'),' ',$machine_name));
+
+        // We want to add all name fields and any fields previously indicated to be indexed.
+        if (preg_match('/name/', $field_name) OR in_array($machine_name, $fields_to_include)) {
+
+          if (!isset($info['node']['bundles'][ $n['base'] ]['properties'][$machine_name])) {
+            $info['node']['bundles'][ $n['base'] ]['properties'][$machine_name] = array(
+              'label' => $label,
+              'description' => $details['description'],
+              'type' => ($details['type'] == 'varchar') ? 'text' : $details['type'],
+              'schema field' => '[' . $machine_name . ']',
+              // The following getter callback is a generic function that can retrieve
+              // values for any chado field.
+              'getter callback' => 'tripal_search_chado_token_getter_callback'
+            );
+          }
+        }
+      }
+
+      // We want to add any base foreign keys. This allows you to search for all features
+      // from a given organism.
+      foreach ($schema['foreign keys'] as $table => $fk_details) {
+        foreach ($fk_details['columns'] as $left_field => $right_field) {
+
+          $machine_name = $n['chado_node_api']['base_table'] . '.' . $left_field;
+          $field_details = $schema['fields'][$left_field];
+
+          // Try to create a readable label.
+          $label = $table . ' (' . $machine_name . ')';
+          if (preg_match('/(\w+)_id/',$left_field,$matches)) {
+            $label = str_replace('_', ' ', $n['chado_node_api']['base_table']);
+            $label .= ' ' . str_replace('_', ' ', $matches[1]);
+            $label = ucwords($label);
+          }
+
+          $pretoken = '[' . $n['chado_node_api']['base_table'] . '.' . $left_field . '>' . $table . '.' . $right_field . ']';
+          $format = chado_node_get_readable_format($pretoken);
+          if (!$format) $format = chado_node_get_unique_constraint_format($table);
+
+          $info['node']['bundles'][ $n['base'] ]['properties'][$machine_name] = array(
+            'label' => $label,
+            'description' => $field_details['description'],
+            'type' => 'text',
+            'schema field' => $format,
+            // The following getter callback is a generic function that can retrieve
+            // values for any chado foreign key.
+            'getter callback' => 'tripal_search_chado_token_getter_callback'
+          );
+        }
+      }
+    }
+  }
+
+  // Provide our own hook for altering properties to make it easier for our users.
+  drupal_alter('tripal_search_properties', $info);
+}
+
+/**
+ * Allows tripal admin to alter entity property information after it has. This is currently
+ * being used to indicate chado fields to be indexed for search.
+ *
+ * NOTE: If you simply need to add a field to be indexed, use hook_search_include_chado_fields()
+ * which provides the much easier method of simply listing fields to include.
+ *
+ * This function is most useful if you want to change the way the value is retrieved
+ * (done by changing the 'getter callback') or add your own custom computed field.
+ */
+function hook_tripal_search_properties_alter(&$info) { }
+
+/**
+ * Implements a getter callback for chado token formats.
+ *
+ * A chado token format is a string containing chado tokens.
+ *
+ * Chado tokens are expected to follow the format of tokens auto-generated using
+ *   chado_node_generate_tokens(). For example, [feature.uniquename] indicates you
+ *   should return the uniquename of a feature node and [feature.organism_id>organism.species]
+ *   indicates you should return the organism genus of the feature node.
+ *
+ * The chado token format must be stored in the 'schema field' when defining the property in
+ *  hook_entity_property_info() in order for this getter to work.
+ *
+ * @param $data
+ *   Must be a chado node object.
+ */
+function tripal_search_chado_token_getter_callback($data, $options, $field_name, $type, $info) {
+
+  if (isset($info['schema field'])) {
+    $format = $info['schema field'];
+
+    // Determine which tokens were used in the format string
+    if (preg_match_all('/\[[^]]+\]/', $format, $used_tokens)) {
+      $used_tokens = $used_tokens[0];
+
+      // If there are no tokens then return the format as is...
+      if (empty($used_tokens)) {
+        tripal_report_error(
+          'tripal_search',
+          TRIPAL_NOTICE,
+          'Returned static text for :field since there were no tokens in the supplied format: :format',
+          array(':field' => $field_name, ':format' => $format)
+        );
+        return $format;
+      }
+
+      // Determine our base table so we know if this is even the right node type.
+      if (preg_match('/^\[(\w+)\.(\w+)/',$used_tokens[0], $matches)) {
+        $base_table = $matches[1];
+        $field_name = $matches[2];
+
+        // For some weird reason nodes of all types are trying to get a value for fields
+        // that we defined as specific to a given node type (ie: bundle). As such we need
+        // this check here to ensure this field is actually for this node type.
+        if (!isset($data->{$base_table})) return NULL;
+
+        // Get the value of each token.
+        $null_tokens = array();
+        foreach ($used_tokens as $token) {
+          $token_info = array(
+              'name' => $info['label'],
+              'table' => $base_table,
+              'field' => $field_name,
+              'token' => $token,
+              'description' => $info['description'],
+              'location' => chado_node_get_location_from_token($token),
+          );
+
+          $value = chado_get_token_value($token_info, $data);
+          if (empty($value)) $null_tokens[] = $token;
+
+          // And sub it in to the format.
+          $format = str_replace($token, $value, $format);
+        }
+
+        // If none of the tokens had values then this node doesn't have this field.
+        // As such we return null so the search api doesn't bother indexing an empty format.
+        if (sizeof($used_tokens) == sizeof($null_tokens)) return NULL;
+
+        // At this poin the format should have all tokens replaced for values and is
+        // thus the value we want to index for this field!
+        return $format;
+      }
+      else {
+        // Not able to determine table?
+        tripal_report_error(
+          'tripal_search',
+          TRIPAL_ERROR,
+          'Unable to extract the base table from the first token (:token) for :field because it didn\'t match the expected format: [tablename.field...',
+          array(':field' => $field_name, ':token' => $used_tokens[0])
+        );
+        return NULL;
+      }
+    }
+    // There were no tokens?
+    else {
+      tripal_report_error(
+        'tripal_search',
+        TRIPAL_NOTICE,
+        'Returned static text for :field since there were no tokens of a recognized format in the supplied format: :format',
+        array(':field' => $field_name, ':format' => $format)
+      );
+      return $format;
+    }
+  }
+  else {
+    tripal_report_error(
+      'tripal_search',
+      TRIPAL_ERROR,
+      'Unable to get value for :field because the schema field was not set.',
+      array(':field' => $field_name)
+    );
+    return NULL;
+  }
+}
+
+/**
+ * Implements hook_modules_enabled().
+ *
+ * This hook is called when ANY module is enabled. This allows us to update the
+ * the search api "Default node index" when any Tripal module is enabled thus allowing us
+ * to catch new node types right after they're created.
+ */
+function tripal_core_modules_enabled($modules) {
+  tripal_search_update_default_index();
+}
+
+/**
+ * The Search API provides a default node index which has a number of
+ * node-specific fields enabled by default. We want to ensure our
+ * chado fields are also enabled by default thus making for easier
+ * enabling of Tripal search.
+ *
+ * This function should be called whenever new nodes might have been
+ * added to ensure that their fields are added as well.
+ *
+ * We should only modify the default node index if it has no database service yet.
+ * That way we ensure we don't override user changes!
+ */
+function tripal_search_update_default_index() {
+
+  // First we need the index object for the "Default node index".
+  $index_id = db_query('SELECT id FROM search_api_index WHERE machine_name=:name',
+    array(':name' => 'default_node_index'))->fetchField();
+  if (!$index_id) {
+    // ERROR
+    return FALSE;
+  }
+  $index = search_api_index_load($index_id);
+
+  // Collect all the fields already added to the search index.
+  $changes = array('options' => $index->options);
+
+  // Now we only want to update the index if it's both enabled and has no server indicated.
+  // That way we can be reasonably sure that it was been untouched by admin users.
+  if ($index->enabled == FALSE AND $index->server == NULL) {
+
+    // We need information about all the fields available to nodes before we can
+    // go crazy enabling them... That information is stored as properties of nodes
+    // so we'll grab that.
+    $info = entity_get_property_info('node');
+
+    // Now we want to loop through each node type and add all the properties for the
+    // chado node types.
+    // Assumption #1: We are assuming that all chado node types are prefixed 'chado_'.
+    foreach ($info['bundles'] as $node_type => $details) {
+      if (preg_match('/^chado_/', $node_type)) {
+
+        // Now add each chado fields to the index but only if they are not already added.
+        foreach ($details['properties'] as $field_name => $field_details) {
+          if (!isset($changes['options']['fields'][$field_name])) {
+            $changes['options']['fields'][$field_name]['type'] = ($field_details['type'] == 'varchar') ? 'text' : $field_details['type'];
+
+            // Furthermore if this is a name then we want to add a boost to ensure it carries
+            // more weight in the search results.
+            if (preg_match('/name/',$field_name)) {
+              $changes['options']['fields'][$field_name]['boost'] = '3.0';
+            }
+          }
+        }
+
+      }
+    }
+
+    // We also want to enable highlighting to ensure an excerpt is generated since this
+    // will be used in the default search view distributed with Tripal.
+    if (!isset($index->options['processors']['search_api_highlighting'])) {
+      $changes['options']['processors']['search_api_highlighting'] = array(
+        'status' => 1,
+        'weight' => 35,
+        'settings' => array(
+          'prefix' => '<strong>',
+          'suffix' => '</strong>',
+          'excerpt' => 1,
+          'excerpt_length' => 256,
+          'exclude_fields' => array(),
+          'highlight' => 'always',
+        ),
+      );
+    }
+    else {
+      $changes['options']['processors']['search_api_highlighting']['status'] = 1;
+      $changes['options']['processors']['search_api_highlighting']['settings']['excerpt'] = 1;
+    }
+
+    // Finally we save all of our changes :-).
+    search_api_index_edit($index_id, $changes);
+    drupal_set_message('The Search API "Default Node Index" was updated.');
+  }
+  else {
+    tripal_report_error(
+      'tripal_search',
+      TRIPAL_NOTICE,
+      'The Search API "Default Node Index" was not updated with Tripal Fields. If you would like to enable more Tripal/Chado fields to be indexed, edit the Field Listing for the "Default Node Index" now.'
+    );
+  }
+}
+

+ 1 - 2
tripal_core/tripal_core.info

@@ -11,5 +11,4 @@ scripts[]          = theme/js/tripal.js
 
 dependencies[]     = views
 dependencies[]     = path
-dependencies[]     = search
-dependencies[]     = php
+dependencies[]     = php

+ 1 - 0
tripal_core/tripal_core.module

@@ -22,6 +22,7 @@ require_once 'includes/tripal_core.mviews.inc';
 require_once 'includes/tripal_core.custom_tables.inc';
 require_once 'includes/tripal_core.chado_install.inc';
 require_once 'includes/tripal_core.form_elements.inc';
+require_once 'includes/tripal_core.search.inc';
 
 // Set some global variables.
 tripal_core_set_globals();

+ 79 - 2
tripal_core/tripal_core.views_default.inc

@@ -21,6 +21,14 @@ function tripal_core_views_default_views() {
   $view = tripal_core_admin_defaultview_mviews();
   $views[$view->name] = $view;
 
+  // We only want to make this view available if the Search API module is installed
+  // and the "Default Node Index has been enabled.
+  $index_enabled = db_query('SELECT enabled FROM search_api_index WHERE machine_name=:name', array(':name' => 'default_node_index'))->fetchField();
+  if (module_exists('search_api') AND $index_enabled) {
+    $view = tripal_core_search_default_node_index();
+    $views[$view->name] = $view;
+  }
+
   return $views;
 }
 
@@ -317,7 +325,7 @@ function tripal_core_admin_defaultview_custom_tables() {
   $view->core = 7;
   $view->api_version = '3.0';
   $view->disabled = FALSE; /* Edit this to true to make a default view disabled initially */
-  
+
   /* Display: Master */
   $handler = $view->new_display('default', 'Master', 'default');
   $handler->display->display_options['title'] = 'Custom Tables';
@@ -416,7 +424,7 @@ function tripal_core_admin_defaultview_custom_tables() {
     1 => 0,
     3 => 0,
   );
-  
+
   /* Display: Page */
   $handler = $view->new_display('page', 'Page', 'page');
   $handler->display->display_options['path'] = 'admin/tripal/schema/custom_tables/tables';
@@ -698,3 +706,72 @@ function tripal_core_admin_defaultview_mviews() {
 
   return $view;
 }
+
+/**
+ * Provides a default search view for the Search API "Default Node Index".
+ */
+function tripal_core_search_default_node_index() {
+
+  $view = new view();
+  $view->name = 'search_api_default_node_search';
+  $view->description = 'A default keyword-based search view using the search API "Default node index".';
+  $view->tag = 'search';
+  $view->base_table = 'search_api_index_default_node_index';
+  $view->human_name = 'Search Nodes';
+  $view->core = 7;
+  $view->api_version = '3.0';
+  $view->disabled = TRUE; /* Edit this to true to make a default view disabled initially */
+
+  /* Display: Master */
+  $handler = $view->new_display('default', 'Master', 'default');
+  $handler->display->display_options['title'] = 'Search Pages';
+  $handler->display->display_options['use_more_always'] = FALSE;
+  $handler->display->display_options['access']['type'] = 'none';
+  $handler->display->display_options['cache']['type'] = 'none';
+  $handler->display->display_options['query']['type'] = 'views_query';
+  $handler->display->display_options['exposed_form']['type'] = 'input_required';
+  $handler->display->display_options['exposed_form']['options']['submit_button'] = 'Search';
+  $handler->display->display_options['exposed_form']['options']['expose_sort_order'] = FALSE;
+  $handler->display->display_options['exposed_form']['options']['text_input_required'] = '';
+  $handler->display->display_options['exposed_form']['options']['text_input_required_format'] = 'filtered_html';
+  $handler->display->display_options['pager']['type'] = 'full';
+  $handler->display->display_options['pager']['options']['items_per_page'] = '10';
+  $handler->display->display_options['style_plugin'] = 'default';
+  $handler->display->display_options['row_plugin'] = 'fields';
+  /* Field: Content: Rendered Node */
+  $handler->display->display_options['fields']['rendered_entity']['id'] = 'rendered_entity';
+  $handler->display->display_options['fields']['rendered_entity']['table'] = 'views_entity_node';
+  $handler->display->display_options['fields']['rendered_entity']['field'] = 'rendered_entity';
+  $handler->display->display_options['fields']['rendered_entity']['label'] = '';
+  $handler->display->display_options['fields']['rendered_entity']['element_label_colon'] = FALSE;
+  $handler->display->display_options['fields']['rendered_entity']['link_to_entity'] = 1;
+  $handler->display->display_options['fields']['rendered_entity']['display'] = 'view';
+  $handler->display->display_options['fields']['rendered_entity']['view_mode'] = 'teaser';
+  $handler->display->display_options['fields']['rendered_entity']['bypass_access'] = 0;
+  /* Sort criterion: Search: Relevance */
+  $handler->display->display_options['sorts']['search_api_relevance']['id'] = 'search_api_relevance';
+  $handler->display->display_options['sorts']['search_api_relevance']['table'] = 'search_api_index_default_node_index';
+  $handler->display->display_options['sorts']['search_api_relevance']['field'] = 'search_api_relevance';
+  $handler->display->display_options['sorts']['search_api_relevance']['order'] = 'DESC';
+  /* Filter criterion: Search: Fulltext search */
+  $handler->display->display_options['filters']['search_api_views_fulltext']['id'] = 'search_api_views_fulltext';
+  $handler->display->display_options['filters']['search_api_views_fulltext']['table'] = 'search_api_index_default_node_index';
+  $handler->display->display_options['filters']['search_api_views_fulltext']['field'] = 'search_api_views_fulltext';
+  $handler->display->display_options['filters']['search_api_views_fulltext']['operator'] = 'OR';
+  $handler->display->display_options['filters']['search_api_views_fulltext']['exposed'] = TRUE;
+  $handler->display->display_options['filters']['search_api_views_fulltext']['expose']['operator_id'] = 'search_api_views_fulltext_op';
+  $handler->display->display_options['filters']['search_api_views_fulltext']['expose']['label'] = 'Keywords';
+  $handler->display->display_options['filters']['search_api_views_fulltext']['expose']['operator'] = 'search_api_views_fulltext_op';
+  $handler->display->display_options['filters']['search_api_views_fulltext']['expose']['identifier'] = 'keywords';
+  $handler->display->display_options['filters']['search_api_views_fulltext']['expose']['remember_roles'] = array(
+    2 => '2',
+    1 => 0,
+    3 => 0,
+  );
+
+  /* Display: Page */
+  $handler = $view->new_display('page', 'Page', 'page');
+  $handler->display->display_options['path'] = 'search/node';
+
+  return $view;
+}