tripal_core.search.inc 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455
  1. <?php
  2. /**
  3. * @file
  4. * Adds support for Drupal indexing of Chado.
  5. * It's important to note that not all of Chado is indexed but instead
  6. * Only fields indicated in hook_search_include_chado_fields().
  7. */
  8. /**
  9. * Implements hook_search_include_chado_fields().
  10. *
  11. * This hook allows Tripal Admin/modules to specify which chado fields should
  12. * be indexed for searching in a simple manner.
  13. *
  14. * @return
  15. * An array of chado fields you would like available for indexing. Each
  16. * element should be the name of the table followed by the field and
  17. * separated by a period. For example. feature.uniquename to indicate the
  18. * uniquename field from the feature table.
  19. */
  20. function tripal_core_search_include_chado_fields() {
  21. return [
  22. 'organism.genus',
  23. 'organism.species',
  24. ];
  25. }
  26. /**
  27. * Implements hook_entity_property_info_alter().
  28. *
  29. * This is where we actually add the properties to the node entity in order to
  30. * indicate which chado fields should be indexed.
  31. */
  32. function tripal_core_entity_property_info_alter(&$info) {
  33. // We provide a hook to allow Tripal admin to easily add fields to the search api.
  34. // We want to invoke all implementations of that hook now for use below.
  35. $fields_to_include = module_invoke_all('search_include_chado_fields');
  36. $fields_to_include = array_unique($fields_to_include);
  37. // Retrieve information for all nodes.
  38. // We focus on nodes at this point because we need to link search results back to
  39. // the entity and we have no entites for non-node chado content in Tripal2.
  40. $node_info = module_invoke_all('node_info');
  41. foreach ($node_info as $n) {
  42. // Now keep in mind this hook is defined for ALL THE NODE TYPES and we only want
  43. // to add extra support for chado so we onle care about chado node types.
  44. // We can distinguish chado node types from all others by the existence of
  45. // the 'chado_node_api' key which is used for all sorts of beautiful tripal/chado
  46. // node integration (ie: adding properties, relationships and dbxrefs to node forms).
  47. if (isset($n['chado_node_api'])) {
  48. $schema = chado_get_schema($n['chado_node_api']['base_table']);
  49. // Now we are going to start by adding some defaults. It feels safe to say, we
  50. // probably want to index all the "names" so we are going to look through
  51. // all the fields and if they contain "name" we are going to add them automatically.
  52. foreach ($schema['fields'] as $field_name => $details) {
  53. $machine_name = $n['chado_node_api']['base_table'] . '.' . $field_name;
  54. // Try to create a readable label.
  55. $label = ucwords(str_replace(['.', '_'], ' ', $machine_name));
  56. // We want to add all name fields and any fields previously indicated to be indexed.
  57. if (preg_match('/name/', $field_name) OR in_array($machine_name, $fields_to_include)) {
  58. if (!isset($info['node']['bundles'][$n['base']]['properties'][$machine_name])) {
  59. $info['node']['bundles'][$n['base']]['properties'][$machine_name] = [
  60. 'label' => $label,
  61. 'description' => (isset($details['description'])) ? $details['description'] : '',
  62. 'type' => ($details['type'] == 'varchar') ? 'text' : $details['type'],
  63. 'schema field' => '[' . $machine_name . ']',
  64. // The following getter callback is a generic function that can retrieve
  65. // values for any chado field.
  66. 'getter callback' => 'tripal_search_chado_token_getter_callback',
  67. ];
  68. }
  69. }
  70. }
  71. // We want to add any base foreign keys. This allows you to search for all features
  72. // from a given organism. Furthermore, we want to add a single field for each foreign
  73. // key that will span content types in order to be exposed as facets.
  74. foreach ($schema['foreign keys'] as $table => $fk_details) {
  75. foreach ($fk_details['columns'] as $left_field => $right_field) {
  76. $machine_name = $n['chado_node_api']['base_table'] . '.' . $left_field;
  77. $field_details = $schema['fields'][$left_field];
  78. // Try to create a readable label.
  79. $label = $table . ' (' . $machine_name . ')';
  80. if (preg_match('/(\w+)_id/', $left_field, $matches)) {
  81. // Key only field.
  82. $key_label = ucwords(str_replace('_', ' ', $matches[1]));
  83. // Expanded field.
  84. $label = str_replace('_', ' ', $n['chado_node_api']['base_table']);
  85. $label .= ' ' . str_replace('_', ' ', $matches[1]);
  86. $label = ucwords($label);
  87. }
  88. $keytoken = '[BASE.' . $left_field . '>' . $table . '.' . $right_field . ']';
  89. $format = chado_node_get_readable_format($keytoken);
  90. // First, create the key version. This is best used for facets since it
  91. // won't/can't be tokenized along with the other fields. This will be shared
  92. // among node types to facillitate use as a facet.
  93. $info['node']['properties'][$table . '.' . $right_field . ' key'] = [
  94. 'label' => $key_label . ' (All Content Types)',
  95. 'description' => (isset($field_details['description'])) ? $field_details['description'] : '',
  96. 'type' => 'text',
  97. // We include both the token for the current node type and the token for
  98. // the parent table. That way the organism node will appear in the results
  99. // for the organism key.
  100. 'schema field' => $format,
  101. // The following getter callback is a generic function that can retrieve
  102. // values for any chado foreign key.
  103. 'getter callback' => 'tripal_search_chado_token_across_nodetypes_getter_callback',
  104. ];
  105. $pretoken = '[' . $n['chado_node_api']['base_table'] . '.' . $left_field . '>' . $table . '.' . $right_field . ']';
  106. $format = chado_node_get_readable_format($pretoken);
  107. // Add a more readable version that will be tokenized so users can
  108. // search for fruitfly and get all features with that as an organism.
  109. $info['node']['bundles'][$n['base']]['properties'][$machine_name . ' expanded'] = [
  110. 'label' => $label . ' (Expanded)',
  111. 'description' => (isset($field_details['description'])) ? $field_details['description'] : '',
  112. 'type' => 'text',
  113. 'schema field' => $format,
  114. // The following getter callback is a generic function that can retrieve
  115. // values for any chado foreign key.
  116. 'getter callback' => 'tripal_search_chado_token_getter_callback',
  117. ];
  118. }
  119. }
  120. }
  121. }
  122. // Provide our own hook for altering properties to make it easier for our users.
  123. drupal_alter('tripal_search_properties', $info);
  124. }
  125. /**
  126. * Allows tripal admin to alter entity property information after it has. This
  127. * is currently being used to indicate chado fields to be indexed for search.
  128. *
  129. * NOTE: If you simply need to add a field to be indexed, use
  130. * hook_search_include_chado_fields() which provides the much easier method of
  131. * simply listing fields to include.
  132. *
  133. * This function is most useful if you want to change the way the value is
  134. * retrieved
  135. * (done by changing the 'getter callback') or add your own custom computed
  136. * field.
  137. */
  138. function hook_tripal_search_properties_alter(&$info) {
  139. }
  140. /**
  141. * Implements a getter callback for chado token formats.
  142. *
  143. * A chado token format is a string containing chado tokens.
  144. *
  145. * Chado tokens are expected to follow the format of tokens auto-generated
  146. * using
  147. * chado_node_generate_tokens(). For example, [feature.uniquename] indicates
  148. * you should return the uniquename of a feature node and
  149. * [feature.organism_id>organism.species] indicates you should return the
  150. * organism genus of the feature node.
  151. *
  152. * The chado token format must be stored in the 'schema field' when defining
  153. * the property in hook_entity_property_info() in order for this getter to
  154. * work.
  155. *
  156. * @param $data
  157. * The entity object (in our case the node we need to retrieve feature
  158. * properties for).
  159. * @param $options
  160. * @param $field_name
  161. * The machine name for the entity property.
  162. * @param $info
  163. * The full property definition from entity property info.
  164. *
  165. * @return
  166. * A string representing the "value" of the field.
  167. */
  168. function tripal_search_chado_token_getter_callback($data, $options, $field_name, $type, $info) {
  169. if (isset($data->nid)) {
  170. if (isset($info['schema field'])) {
  171. $format = $info['schema field'];
  172. // Determine our base table so we know if this is even the right node type.
  173. if (preg_match('/\[(\w+)\.(\w+)/', $format, $matches)) {
  174. $base_table = $matches[1];
  175. $field_name = $matches[2];
  176. // For some weird reason nodes of all types are trying to get a value for fields
  177. // that we defined as specific to a given node type (ie: bundle). As such we need
  178. // this check here to ensure this field is actually for this node type.
  179. if (!isset($data->{$base_table})) {
  180. return NULL;
  181. }
  182. $format = tripal_core_get_token_value_for_property($base_table, $field_name, $format, $data, $info);
  183. return $format;
  184. }
  185. else {
  186. // Not able to determine table?
  187. tripal_report_error(
  188. 'tripal_search',
  189. TRIPAL_ERROR,
  190. 'Unable to extract the base table from the format (:format) for :field because it didn\'t match the expected format: [tablename.field...',
  191. [':field' => $field_name, ':format' => $format]
  192. );
  193. return NULL;
  194. }
  195. }
  196. else {
  197. tripal_report_error(
  198. 'tripal_search',
  199. TRIPAL_ERROR,
  200. 'Unable to get value for :field because the schema field was not set.',
  201. [':field' => $field_name]
  202. );
  203. return NULL;
  204. }
  205. }
  206. }
  207. /**
  208. * Implements a getter callback for foreign keys collon between content types.
  209. *
  210. * @param $data
  211. * The entity object (in our case the node we need to retrieve feature
  212. * properties for).
  213. * @param $options
  214. * @param $field_name
  215. * The machine name for the entity property.
  216. * @param $info
  217. * The full property definition from entity property info.
  218. *
  219. * @return
  220. * A string representing the "value" of the field.
  221. */
  222. function tripal_search_chado_token_across_nodetypes_getter_callback($data, $options, $field_name, $type, $info) {
  223. // First, make sure this is a chado node.
  224. // Assumption #1: All chado node types are prefixed with chado_
  225. if (isset($data->nid)) {
  226. if (preg_match('/^chado_(\w+)/', $data->type, $matches)) {
  227. if (isset($info['schema field'])) {
  228. // Assumption #2: The base table is the suffix of the node type.
  229. $base_table = $matches[1];
  230. // Substitute in the base table for "BASE" in the schema field.
  231. $format = str_replace('BASE', $base_table, $info['schema field']);
  232. // Replace all tokens for values and return the result.
  233. $format = tripal_core_get_token_value_for_property($base_table, $field_name, $format, $data, $info);
  234. return $format;
  235. }
  236. else {
  237. // Not able to determine table?
  238. tripal_report_error(
  239. 'tripal_search',
  240. TRIPAL_ERROR,
  241. 'Unable to extract the base table from the format (:format) for :field because it didn\'t match the expected format: [tablename.field...',
  242. [':field' => $field_name, ':format' => $format]
  243. );
  244. }
  245. }
  246. else {
  247. tripal_report_error(
  248. 'tripal_search',
  249. TRIPAL_ERROR,
  250. 'Unable to get value for :field because the schema field was not set.',
  251. [':field' => $field_name]
  252. );
  253. }
  254. }
  255. return NULL;
  256. }
  257. /**
  258. * Retrieve values for all tokens for an entity property getter function.
  259. */
  260. function tripal_core_get_token_value_for_property($base_table, $field_name, $format, $data, $info) {
  261. // Determine which tokens were used in the format string
  262. if (preg_match_all('/\[[^]]+\]/', $format, $used_tokens)) {
  263. $used_tokens = $used_tokens[0];
  264. // If there are no tokens then return the format as is...
  265. if (empty($used_tokens)) {
  266. tripal_report_error(
  267. 'tripal_search',
  268. TRIPAL_NOTICE,
  269. 'Returned static text for :field since there were no tokens in the supplied format: :format',
  270. [':field' => $field_name, ':format' => $format]
  271. );
  272. return $format;
  273. }
  274. // Get the value of each token.
  275. $null_tokens = [];
  276. foreach ($used_tokens as $token) {
  277. $token_info = [
  278. 'name' => $info['label'],
  279. 'table' => $base_table,
  280. 'field' => $field_name,
  281. 'token' => $token,
  282. 'description' => $info['description'],
  283. 'location' => chado_node_get_location_from_token($token),
  284. ];
  285. $value = chado_get_token_value($token_info, $data, ['supress_errors' => TRUE]);
  286. if (empty($value)) {
  287. $null_tokens[] = $token;
  288. }
  289. // And sub it in to the format.
  290. $format = str_replace($token, $value, $format);
  291. }
  292. // If none of the tokens had values then this node doesn't have this field.
  293. // As such we return null so the search api doesn't bother indexing an empty format.
  294. if (sizeof($used_tokens) == sizeof($null_tokens)) {
  295. return NULL;
  296. }
  297. }
  298. else {
  299. tripal_report_error(
  300. 'tripal_search',
  301. TRIPAL_NOTICE,
  302. 'Returned static text for :field since there were no tokens of a recognized format in the supplied format: :format',
  303. [':field' => $field_name, ':format' => $format]
  304. );
  305. }
  306. return $format;
  307. }
  308. /**
  309. * Implements hook_modules_enabled().
  310. *
  311. * This hook is called when ANY module is enabled. This allows us to update the
  312. * the search api "Default node index" when any Tripal module is enabled thus
  313. * allowing us to catch new node types right after they're created.
  314. */
  315. function tripal_core_modules_enabled($modules) {
  316. if (module_exists('search_api')) {
  317. $index_enabled = db_query('SELECT enabled FROM search_api_index WHERE machine_name=:name', [':name' => 'default_node_index'])->fetchField();
  318. if ($index_enabled) {
  319. tripal_search_update_default_index();
  320. }
  321. }
  322. }
  323. /**
  324. * The Search API provides a default node index which has a number of
  325. * node-specific fields enabled by default. We want to ensure our
  326. * chado fields are also enabled by default thus making for easier
  327. * enabling of Tripal search.
  328. *
  329. * This function should be called whenever new nodes might have been
  330. * added to ensure that their fields are added as well.
  331. *
  332. * We should only modify the default node index if it has no database service
  333. * yet. That way we ensure we don't override user changes!
  334. */
  335. function tripal_search_update_default_index() {
  336. // First we need the index object for the "Default node index".
  337. $index_id = db_query('SELECT id FROM search_api_index WHERE machine_name=:name',
  338. [':name' => 'default_node_index'])->fetchField();
  339. if (!$index_id) {
  340. // ERROR
  341. return FALSE;
  342. }
  343. $index = search_api_index_load($index_id);
  344. // Collect all the fields already added to the search index.
  345. $changes = ['options' => $index->options];
  346. // Now we only want to update the index if it's both enabled and has no server indicated.
  347. // That way we can be reasonably sure that it was been untouched by admin users.
  348. if ($index->enabled == FALSE AND $index->server == NULL) {
  349. // We need information about all the fields available to nodes before we can
  350. // go crazy enabling them... That information is stored as properties of nodes
  351. // so we'll grab that.
  352. $info = entity_get_property_info('node');
  353. // Now we want to loop through each node type and add all the properties for the
  354. // chado node types.
  355. // Assumption #1: We are assuming that all chado node types are prefixed 'chado_'.
  356. foreach ($info['bundles'] as $node_type => $details) {
  357. if (preg_match('/^chado_/', $node_type)) {
  358. // Now add each chado fields to the index but only if they are not already added.
  359. foreach ($details['properties'] as $field_name => $field_details) {
  360. if (!isset($changes['options']['fields'][$field_name])) {
  361. $changes['options']['fields'][$field_name]['type'] = ($field_details['type'] == 'varchar') ? 'text' : $field_details['type'];
  362. // Furthermore if this is a name then we want to add a boost to ensure it carries
  363. // more weight in the search results.
  364. if (preg_match('/name/', $field_name)) {
  365. $changes['options']['fields'][$field_name]['boost'] = '3.0';
  366. }
  367. }
  368. }
  369. }
  370. }
  371. // We also want to enable highlighting to ensure an excerpt is generated since this
  372. // will be used in the default search view distributed with Tripal.
  373. if (!isset($index->options['processors']['search_api_highlighting'])) {
  374. $changes['options']['processors']['search_api_highlighting'] = [
  375. 'status' => 1,
  376. 'weight' => 35,
  377. 'settings' => [
  378. 'prefix' => '<strong>',
  379. 'suffix' => '</strong>',
  380. 'excerpt' => 1,
  381. 'excerpt_length' => 256,
  382. 'exclude_fields' => [],
  383. 'highlight' => 'always',
  384. ],
  385. ];
  386. }
  387. else {
  388. $changes['options']['processors']['search_api_highlighting']['status'] = 1;
  389. $changes['options']['processors']['search_api_highlighting']['settings']['excerpt'] = 1;
  390. }
  391. // Finally we save all of our changes :-).
  392. search_api_index_edit($index_id, $changes);
  393. drupal_set_message('The Search API "Default Node Index" was updated.');
  394. }
  395. else {
  396. tripal_report_error(
  397. 'tripal_search',
  398. TRIPAL_NOTICE,
  399. 'The Search API "Default Node Index" was not updated with Tripal Fields. If you would like to enable more Tripal/Chado fields to be indexed, edit the Field Listing for the "Default Node Index" now.'
  400. );
  401. }
  402. }