tripal_core.search.inc 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339
  1. <?php
  2. /**
  3. * @file
  4. * Adds support for Drupal indexing of Chado.
  5. * It's important to note that not all of Chado is indexed but instead
  6. * Only fields indicated in hook_search_include_chado_fields().
  7. */
  8. /**
  9. * Implements hook_search_include_chado_fields().
  10. *
  11. * This hook allows Tripal Admin/modules to specify which chado fields should be indexed
  12. * for searching in a simple manner.
  13. *
  14. * @return
  15. * An array of chado fields you would like available for indexing. Each element should
  16. * be the name of the table followed by the field and separated by a period. For example.
  17. * feature.uniquename to indicate the uniquename field from the feature table.
  18. */
  19. function tripal_core_search_include_chado_fields() {
  20. return array(
  21. 'organism.genus',
  22. 'organism.species',
  23. );
  24. }
  25. /**
  26. * Implements hook_entity_property_info_alter().
  27. *
  28. * This is where we actually add the properties to the node entity in order to indicate
  29. * which chado fields should be indexed.
  30. */
  31. function tripal_core_entity_property_info_alter(&$info) {
  32. // We provide a hook to allow Tripal admin to easily add fields to the search api.
  33. // We want to invoke all implementations of that hook now for use below.
  34. $fields_to_include = module_invoke_all('search_include_chado_fields');
  35. $fields_to_include = array_unique($fields_to_include);
  36. // Retrieve information for all nodes.
  37. // We focus on nodes at this point because we need to link search results back to
  38. // the entity and we have no entites for non-node chado content in Tripal2.
  39. $node_info = module_invoke_all('node_info');
  40. foreach ($node_info as $n) {
  41. // Now keep in mind this hook is defined for ALL THE NODE TYPES and we only want
  42. // to add extra support for chado so we onle care about chado node types.
  43. // We can distinguish chado node types from all others by the existence of
  44. // the 'chado_node_api' key which is used for all sorts of beautiful tripal/chado
  45. // node integration (ie: adding properties, relationships and dbxrefs to node forms).
  46. if (isset($n['chado_node_api'])) {
  47. $schema = chado_get_schema($n['chado_node_api']['base_table']);
  48. // Now we are going to start by adding some defaults. It feels safe to say, we
  49. // probably want to index all the "names" so we are going to look through
  50. // all the fields and if they contain "name" we are going to add them automatically.
  51. foreach ($schema['fields'] as $field_name => $details) {
  52. $machine_name = $n['chado_node_api']['base_table'] . '.' . $field_name;
  53. // Try to create a readable label.
  54. $label = ucwords(str_replace(array('.','_'),' ',$machine_name));
  55. // We want to add all name fields and any fields previously indicated to be indexed.
  56. if (preg_match('/name/', $field_name) OR in_array($machine_name, $fields_to_include)) {
  57. if (!isset($info['node']['bundles'][ $n['base'] ]['properties'][$machine_name])) {
  58. $info['node']['bundles'][ $n['base'] ]['properties'][$machine_name] = array(
  59. 'label' => $label,
  60. 'description' => $details['description'],
  61. 'type' => ($details['type'] == 'varchar') ? 'text' : $details['type'],
  62. 'schema field' => '[' . $machine_name . ']',
  63. // The following getter callback is a generic function that can retrieve
  64. // values for any chado field.
  65. 'getter callback' => 'tripal_search_chado_token_getter_callback'
  66. );
  67. }
  68. }
  69. }
  70. // We want to add any base foreign keys. This allows you to search for all features
  71. // from a given organism.
  72. foreach ($schema['foreign keys'] as $table => $fk_details) {
  73. foreach ($fk_details['columns'] as $left_field => $right_field) {
  74. $machine_name = $n['chado_node_api']['base_table'] . '.' . $left_field;
  75. $field_details = $schema['fields'][$left_field];
  76. // Try to create a readable label.
  77. $label = $table . ' (' . $machine_name . ')';
  78. if (preg_match('/(\w+)_id/',$left_field,$matches)) {
  79. $label = str_replace('_', ' ', $n['chado_node_api']['base_table']);
  80. $label .= ' ' . str_replace('_', ' ', $matches[1]);
  81. $label = ucwords($label);
  82. }
  83. $pretoken = '[' . $n['chado_node_api']['base_table'] . '.' . $left_field . '>' . $table . '.' . $right_field . ']';
  84. $format = chado_node_get_readable_format($pretoken);
  85. if (!$format) $format = chado_node_get_unique_constraint_format($table);
  86. $info['node']['bundles'][ $n['base'] ]['properties'][$machine_name] = array(
  87. 'label' => $label,
  88. 'description' => $field_details['description'],
  89. 'type' => 'text',
  90. 'schema field' => $format,
  91. // The following getter callback is a generic function that can retrieve
  92. // values for any chado foreign key.
  93. 'getter callback' => 'tripal_search_chado_token_getter_callback'
  94. );
  95. }
  96. }
  97. }
  98. }
  99. // Provide our own hook for altering properties to make it easier for our users.
  100. drupal_alter('tripal_search_properties', $info);
  101. }
  102. /**
  103. * Allows tripal admin to alter entity property information after it has. This is currently
  104. * being used to indicate chado fields to be indexed for search.
  105. *
  106. * NOTE: If you simply need to add a field to be indexed, use hook_search_include_chado_fields()
  107. * which provides the much easier method of simply listing fields to include.
  108. *
  109. * This function is most useful if you want to change the way the value is retrieved
  110. * (done by changing the 'getter callback') or add your own custom computed field.
  111. */
  112. function hook_tripal_search_properties_alter(&$info) { }
  113. /**
  114. * Implements a getter callback for chado token formats.
  115. *
  116. * A chado token format is a string containing chado tokens.
  117. *
  118. * Chado tokens are expected to follow the format of tokens auto-generated using
  119. * chado_node_generate_tokens(). For example, [feature.uniquename] indicates you
  120. * should return the uniquename of a feature node and [feature.organism_id>organism.species]
  121. * indicates you should return the organism genus of the feature node.
  122. *
  123. * The chado token format must be stored in the 'schema field' when defining the property in
  124. * hook_entity_property_info() in order for this getter to work.
  125. *
  126. * @param $data
  127. * Must be a chado node object.
  128. */
  129. function tripal_search_chado_token_getter_callback($data, $options, $field_name, $type, $info) {
  130. if (isset($info['schema field'])) {
  131. $format = $info['schema field'];
  132. // Determine which tokens were used in the format string
  133. if (preg_match_all('/\[[^]]+\]/', $format, $used_tokens)) {
  134. $used_tokens = $used_tokens[0];
  135. // If there are no tokens then return the format as is...
  136. if (empty($used_tokens)) {
  137. tripal_report_error(
  138. 'tripal_search',
  139. TRIPAL_NOTICE,
  140. 'Returned static text for :field since there were no tokens in the supplied format: :format',
  141. array(':field' => $field_name, ':format' => $format)
  142. );
  143. return $format;
  144. }
  145. // Determine our base table so we know if this is even the right node type.
  146. if (preg_match('/^\[(\w+)\.(\w+)/',$used_tokens[0], $matches)) {
  147. $base_table = $matches[1];
  148. $field_name = $matches[2];
  149. // For some weird reason nodes of all types are trying to get a value for fields
  150. // that we defined as specific to a given node type (ie: bundle). As such we need
  151. // this check here to ensure this field is actually for this node type.
  152. if (!isset($data->{$base_table})) return NULL;
  153. // Get the value of each token.
  154. $null_tokens = array();
  155. foreach ($used_tokens as $token) {
  156. $token_info = array(
  157. 'name' => $info['label'],
  158. 'table' => $base_table,
  159. 'field' => $field_name,
  160. 'token' => $token,
  161. 'description' => $info['description'],
  162. 'location' => chado_node_get_location_from_token($token),
  163. );
  164. $value = chado_get_token_value($token_info, $data);
  165. if (empty($value)) $null_tokens[] = $token;
  166. // And sub it in to the format.
  167. $format = str_replace($token, $value, $format);
  168. }
  169. // If none of the tokens had values then this node doesn't have this field.
  170. // As such we return null so the search api doesn't bother indexing an empty format.
  171. if (sizeof($used_tokens) == sizeof($null_tokens)) return NULL;
  172. // At this poin the format should have all tokens replaced for values and is
  173. // thus the value we want to index for this field!
  174. return $format;
  175. }
  176. else {
  177. // Not able to determine table?
  178. tripal_report_error(
  179. 'tripal_search',
  180. TRIPAL_ERROR,
  181. 'Unable to extract the base table from the first token (:token) for :field because it didn\'t match the expected format: [tablename.field...',
  182. array(':field' => $field_name, ':token' => $used_tokens[0])
  183. );
  184. return NULL;
  185. }
  186. }
  187. // There were no tokens?
  188. else {
  189. tripal_report_error(
  190. 'tripal_search',
  191. TRIPAL_NOTICE,
  192. 'Returned static text for :field since there were no tokens of a recognized format in the supplied format: :format',
  193. array(':field' => $field_name, ':format' => $format)
  194. );
  195. return $format;
  196. }
  197. }
  198. else {
  199. tripal_report_error(
  200. 'tripal_search',
  201. TRIPAL_ERROR,
  202. 'Unable to get value for :field because the schema field was not set.',
  203. array(':field' => $field_name)
  204. );
  205. return NULL;
  206. }
  207. }
  208. /**
  209. * Implements hook_modules_enabled().
  210. *
  211. * This hook is called when ANY module is enabled. This allows us to update the
  212. * the search api "Default node index" when any Tripal module is enabled thus allowing us
  213. * to catch new node types right after they're created.
  214. */
  215. function tripal_core_modules_enabled($modules) {
  216. tripal_search_update_default_index();
  217. }
  218. /**
  219. * The Search API provides a default node index which has a number of
  220. * node-specific fields enabled by default. We want to ensure our
  221. * chado fields are also enabled by default thus making for easier
  222. * enabling of Tripal search.
  223. *
  224. * This function should be called whenever new nodes might have been
  225. * added to ensure that their fields are added as well.
  226. *
  227. * We should only modify the default node index if it has no database service yet.
  228. * That way we ensure we don't override user changes!
  229. */
  230. function tripal_search_update_default_index() {
  231. // First we need the index object for the "Default node index".
  232. $index_id = db_query('SELECT id FROM search_api_index WHERE machine_name=:name',
  233. array(':name' => 'default_node_index'))->fetchField();
  234. if (!$index_id) {
  235. // ERROR
  236. return FALSE;
  237. }
  238. $index = search_api_index_load($index_id);
  239. // Collect all the fields already added to the search index.
  240. $changes = array('options' => $index->options);
  241. // Now we only want to update the index if it's both enabled and has no server indicated.
  242. // That way we can be reasonably sure that it was been untouched by admin users.
  243. if ($index->enabled == FALSE AND $index->server == NULL) {
  244. // We need information about all the fields available to nodes before we can
  245. // go crazy enabling them... That information is stored as properties of nodes
  246. // so we'll grab that.
  247. $info = entity_get_property_info('node');
  248. // Now we want to loop through each node type and add all the properties for the
  249. // chado node types.
  250. // Assumption #1: We are assuming that all chado node types are prefixed 'chado_'.
  251. foreach ($info['bundles'] as $node_type => $details) {
  252. if (preg_match('/^chado_/', $node_type)) {
  253. // Now add each chado fields to the index but only if they are not already added.
  254. foreach ($details['properties'] as $field_name => $field_details) {
  255. if (!isset($changes['options']['fields'][$field_name])) {
  256. $changes['options']['fields'][$field_name]['type'] = ($field_details['type'] == 'varchar') ? 'text' : $field_details['type'];
  257. // Furthermore if this is a name then we want to add a boost to ensure it carries
  258. // more weight in the search results.
  259. if (preg_match('/name/',$field_name)) {
  260. $changes['options']['fields'][$field_name]['boost'] = '3.0';
  261. }
  262. }
  263. }
  264. }
  265. }
  266. // We also want to enable highlighting to ensure an excerpt is generated since this
  267. // will be used in the default search view distributed with Tripal.
  268. if (!isset($index->options['processors']['search_api_highlighting'])) {
  269. $changes['options']['processors']['search_api_highlighting'] = array(
  270. 'status' => 1,
  271. 'weight' => 35,
  272. 'settings' => array(
  273. 'prefix' => '<strong>',
  274. 'suffix' => '</strong>',
  275. 'excerpt' => 1,
  276. 'excerpt_length' => 256,
  277. 'exclude_fields' => array(),
  278. 'highlight' => 'always',
  279. ),
  280. );
  281. }
  282. else {
  283. $changes['options']['processors']['search_api_highlighting']['status'] = 1;
  284. $changes['options']['processors']['search_api_highlighting']['settings']['excerpt'] = 1;
  285. }
  286. // Finally we save all of our changes :-).
  287. search_api_index_edit($index_id, $changes);
  288. drupal_set_message('The Search API "Default Node Index" was updated.');
  289. }
  290. else {
  291. tripal_report_error(
  292. 'tripal_search',
  293. TRIPAL_NOTICE,
  294. 'The Search API "Default Node Index" was not updated with Tripal Fields. If you would like to enable more Tripal/Chado fields to be indexed, edit the Field Listing for the "Default Node Index" now.'
  295. );
  296. }
  297. }