tripal_chado.api.inc 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441
  1. <?php
  2. /**
  3. * @file
  4. *
  5. * This file contains miscellaneous API functions specific to working with
  6. * records in Chado that do not have a home in any other sub category of
  7. * API functions.
  8. */
  9. /**
  10. * @defgroup tripal_chado_api Chado
  11. *
  12. * @ingroup tripal_api
  13. * The Tripal Chado API is a set of functions for interacting with data
  14. * inside of a Chado relational database. Entities (or pages) in Drupal
  15. * that are provided by Tripal can supply data from any supported database
  16. * back-end, and Chado is the default. This API contains a variety of sub
  17. * categories (or groups) where functions are organized. Any extension module
  18. * that desires to work with data in Chado will find these functions useful.
  19. */
  20. /**
  21. * Publishes content in Chado as a new TripalEntity entity.
  22. *
  23. * @param $values
  24. * A key/value associative array that supports the following keys:
  25. * - bundle_name: The name of the the TripalBundle (e.g. bio_data-12345).
  26. * @param $job
  27. * The jobs management object for the job if this function is run as a job.
  28. * This argument is added by Tripal during a job run and is not needed if
  29. * this function is run directly.
  30. *
  31. * @return boolean
  32. * TRUE if all of the records of the given bundle type were published, and
  33. * FALSE if a failure occured.
  34. *
  35. * @ingroup tripal_chado_api
  36. */
  37. function chado_publish_records($values, $job = NULL) {
  38. // Used for adding runtime to the progress report.
  39. $started_at = microtime(TRUE);
  40. // We want the job object in order to report progress with the job.
  41. if (is_numeric($job)) {
  42. $job_id = $job;
  43. $job = new TripalJob();
  44. $job->load($job_id);
  45. }
  46. $report_progress = TRUE;
  47. if (!is_object($job)) {
  48. $report_progress = FALSE;
  49. }
  50. // These are options for the tripal_report_error function. We do not
  51. // want to log messages to the watchdog but we do for the job and to
  52. // the terminal
  53. $message_type = 'publish_records';
  54. $message_opts = [
  55. 'watchdog' == FALSE,
  56. 'job' => $job,
  57. 'print' => TRUE,
  58. ];
  59. // Start an array for caching objects to save performance.
  60. $cache = [];
  61. // Make sure we have the required options: bundle_name.
  62. if (!array_key_exists('bundle_name', $values) or !$values['bundle_name']) {
  63. tripal_report_error('tripal_chado', TRIPAL_ERROR,
  64. "Could not publish record: @error",
  65. ['@error' => 'The bundle name must be provided'], $message_opts);
  66. return FALSE;
  67. }
  68. // Get the incoming arguments from the $values array.
  69. $bundle_name = $values['bundle_name'];
  70. $filters = array_key_exists('filters', $values) ? $values['filters'] : [];
  71. $sync_node = array_key_exists('sync_node', $values) ? $values['sync_node'] : '';
  72. // We want to break the number of records to publish into chunks in order to ensure
  73. // transactions do not run for too long (performance issue). The number of records
  74. // to be processed per chunk is set here:
  75. $chunk_size = 500;
  76. // Load the bundle entity so we can get information about which Chado
  77. // table/field this entity belongs to.
  78. $bundle = tripal_load_bundle_entity(['name' => $bundle_name]);
  79. $cache['bundle'] = $bundle;
  80. if (!$bundle) {
  81. tripal_report_error($message_type, TRIPAL_ERROR,
  82. "Unknown bundle. Could not publish record: @error",
  83. ['@error' => 'The bundle name must be provided'], $message_opts);
  84. return FALSE;
  85. }
  86. $chado_entity_table = chado_get_bundle_entity_table($bundle);
  87. // Get the mapping of the bio data type to the Chado table.
  88. $chado_bundle = db_select('chado_bundle', 'cb')
  89. ->fields('cb')
  90. ->condition('bundle_id', $bundle->id)
  91. ->execute()
  92. ->fetchObject();
  93. if (!$chado_bundle) {
  94. tripal_report_error('tripal_chado', TRIPAL_ERROR,
  95. "Cannot find mapping of bundle to Chado tables. Could not publish record.");
  96. return FALSE;
  97. }
  98. // Load the term for use in setting the alias for each entity created.
  99. $term = entity_load('TripalTerm', ['id' => $bundle->term_id]);
  100. $cache['term'] = $term;
  101. $table = $chado_bundle->data_table;
  102. $type_column = $chado_bundle->type_column;
  103. $type_linker_table = $chado_bundle->type_linker_table;
  104. $cvterm_id = $chado_bundle->type_id;
  105. $type_value = $chado_bundle->type_value;
  106. // Get the table information for the Chado table.
  107. $table_schema = chado_get_schema($table);
  108. $pkey_field = $table_schema['primary key'][0];
  109. // Construct the SQL for identifying which records should be published.
  110. $args = [];
  111. $select = "SELECT T.$pkey_field as record_id ";
  112. $from = "
  113. FROM {" . $table . "} T
  114. LEFT JOIN [" . $chado_entity_table . "] CE on CE.record_id = T.$pkey_field
  115. ";
  116. // For migration of Tripal v2 nodes to entities we want to include the
  117. // coresponding chado linker table.
  118. if ($sync_node && db_table_exists('chado_' . $table)) {
  119. $select = "SELECT T.$pkey_field as record_id, CT.nid ";
  120. $from .= " INNER JOIN [chado_" . $table . "] CT ON CT.$pkey_field = T.$pkey_field";
  121. }
  122. $where = " WHERE CE.record_id IS NULL ";
  123. // Handle records that are mapped to property tables.
  124. if ($type_linker_table and $type_column and $type_value) {
  125. $propschema = chado_get_schema($type_linker_table);
  126. $fkeys = $propschema['foreign keys'][$table]['columns'];
  127. foreach ($fkeys as $leftkey => $rightkey) {
  128. if ($rightkey == $pkey_field) {
  129. $from .= " INNER JOIN {" . $type_linker_table . "} LT ON T.$pkey_field = LT.$leftkey ";
  130. }
  131. }
  132. $where .= "AND LT.$type_column = :cvterm_id and LT.value = :prop_value";
  133. $args[':cvterm_id'] = $cvterm_id;
  134. $args[':prop_value'] = $type_value;
  135. }
  136. // Handle records that are mapped to cvterm linking tables.
  137. if ($type_linker_table and $type_column and !$type_value) {
  138. $cvtschema = chado_get_schema($type_linker_table);
  139. $fkeys = $cvtschema['foreign keys'][$table]['columns'];
  140. foreach ($fkeys as $leftkey => $rightkey) {
  141. if ($rightkey == $pkey_field) {
  142. $from .= " INNER JOIN {" . $type_linker_table . "} LT ON T.$pkey_field = LT.$leftkey ";
  143. }
  144. }
  145. $where .= "AND LT.$type_column = :cvterm_id";
  146. $args[':cvterm_id'] = $cvterm_id;
  147. }
  148. // Handle records that are mapped via a type_id column in the base table.
  149. if (!$type_linker_table and $type_column) {
  150. $where .= "AND T.$type_column = :cvterm_id";
  151. $args[':cvterm_id'] = $cvterm_id;
  152. }
  153. // Handle the case where records are in the cvterm table and mapped via a single
  154. // vocab. Here we use the type_value for the cv_id.
  155. if ($table == 'cvterm' and $type_value) {
  156. $where .= "AND T.cv_id = :cv_id";
  157. $args[':cv_id'] = $type_value;
  158. }
  159. // Handle the case where records are in the cvterm table but we want to
  160. // use all of the child terms.
  161. if ($table == 'cvterm' and !$type_value) {
  162. $where .= "AND T.cvterm_id IN (
  163. SELECT CVTP.subject_id
  164. FROM {cvtermpath} CVTP
  165. WHERE CVTP.object_id = :cvterm_id)
  166. ";
  167. $args[':cvterm_id'] = $cvterm_id;
  168. }
  169. // Now add in any additional filters
  170. $fields = field_info_field_map();
  171. foreach ($fields as $field_name => $details) {
  172. if (array_key_exists('TripalEntity', $details['bundles']) and
  173. in_array($bundle_name, $details['bundles']['TripalEntity']) and
  174. in_array($field_name, array_keys($filters))) {
  175. $instance = field_info_instance('TripalEntity', $field_name, $bundle_name);
  176. $chado_table = $instance['settings']['chado_table'];
  177. $chado_column = $instance['settings']['chado_column'];
  178. if ($chado_table == $table) {
  179. $where .= " AND T.$chado_column = :$field_name";
  180. $args[":$field_name"] = $filters[$field_name];
  181. }
  182. }
  183. }
  184. // First get the count
  185. // @performance optimize, estimate or remove this. It's only used for reporting progress on the command-line.
  186. $sql = "SELECT count(*) as num_records " . $from . $where;
  187. $result = chado_query($sql, $args);
  188. $count = $result->fetchField();
  189. tripal_report_error($message_type, TRIPAL_INFO,
  190. "There are !count records to publish.",
  191. ['!count' => $count], $message_opts);
  192. if ($report_progress) {
  193. $job->setTotalItems($count);
  194. $job->setItemsHandled(0);
  195. $job->setInterval(1);
  196. }
  197. // Perform the query in chunks.
  198. $sql = $select . $from . $where . ' LIMIT ' . $chunk_size;
  199. $more_records_to_publish = TRUE;
  200. $num_actually_published = 0; // the full number of records published.
  201. $i = 0; //reset on each chunk.
  202. while ($more_records_to_publish) {
  203. $records = chado_query($sql, $args);
  204. // There is no need to cache transactions since Drupal handles nested
  205. // transactions "by performing no transactional operations (as far as the
  206. // database sees) within the inner nesting layers". Effectively, Drupal
  207. // ensures nested trasactions work the same as passing a transaction
  208. // through to the deepest level and not starting a new transaction if we
  209. // are already in one.
  210. $transaction = db_transaction();
  211. try {
  212. // Keep track of how many we've published so far
  213. // before clearing our chunk,
  214. $num_actually_published = $num_actually_published + $i;
  215. $i = 0;
  216. while ($record = $records->fetchObject()) {
  217. // First save the tripal_entity record.
  218. // @performace This is likely a bottleneck. Too bad we can't create
  219. // multiple entities at once... sort of like the copy method.
  220. $record_id = $record->record_id;
  221. $ec = entity_get_controller('TripalEntity');
  222. $entity = $ec->create([
  223. 'bundle' => $bundle_name,
  224. 'term_id' => $bundle->term_id,
  225. // Add in the Chado details for when the hook_entity_create()
  226. // is called and our tripal_chado_entity_create() implementation
  227. // can deal with it.
  228. 'chado_record' => chado_generate_var($table, [$pkey_field => $record_id], ['include_fk' => 0]),
  229. 'chado_record_id' => $record_id,
  230. 'publish' => TRUE,
  231. 'bundle_object' => $bundle,
  232. ]);
  233. $entity = $entity->save($cache);
  234. if (!$entity) {
  235. throw new Exception('Could not create entity.');
  236. }
  237. // Next save the chado entity record.
  238. $entity_record = [
  239. 'entity_id' => $entity->id,
  240. 'record_id' => $record_id,
  241. ];
  242. // For the Tv2 to Tv3 migration we want to add the nid to the
  243. // entity so we can associate the node with the entity.
  244. if (property_exists($record, 'nid')) {
  245. $entity_record['nid'] = $record->nid;
  246. }
  247. $result = db_insert($chado_entity_table)
  248. ->fields($entity_record)
  249. ->execute();
  250. if (!$result) {
  251. throw new Exception('Could not create mapping of entity to Chado record.');
  252. }
  253. $i++;
  254. if ($report_progress) {
  255. $job->setItemsHandled($i);
  256. }
  257. }
  258. } catch (Exception $e) {
  259. $transaction->rollback();
  260. $error = $e->getMessage();
  261. tripal_report_error($message_type, TRIPAL_ERROR, "Could not publish record: @error", ['@error' => $error]);
  262. drupal_set_message('Failed publishing record. See recent logs for more details.', 'error');
  263. return FALSE;
  264. }
  265. // If we get through the loop and haven't completed 100 records, then
  266. // we're done!
  267. if ($i < $chunk_size) {
  268. $more_records_to_publish = FALSE;
  269. }
  270. // Commit our current chunk.
  271. unset($transaction);
  272. }
  273. // Add the last number published to our total.
  274. $num_actually_published = $num_actually_published + $i;
  275. tripal_report_error($message_type, TRIPAL_INFO,
  276. "Successfully published !count !type record(s).",
  277. ['!count' => $num_actually_published, '!type' => $bundle->label], $message_opts);
  278. return TRUE;
  279. }
  280. /**
  281. * Returns an array of tokens based on Tripal Entity Fields.
  282. *
  283. * @param $base_table
  284. * The name of a base table in Chado.
  285. *
  286. * @return
  287. * An array of tokens where the key is the machine_name of the token.
  288. *
  289. * @ingroup tripal_chado_api
  290. */
  291. function chado_get_tokens($base_table) {
  292. $tokens = [];
  293. $table_descrip = chado_get_schema($base_table);
  294. foreach ($table_descrip['fields'] as $field_name => $field_details) {
  295. $token = '[' . $base_table . '.' . $field_name . ']';
  296. $location = implode(' > ', [$base_table, $field_name]);
  297. $tokens[$token] = [
  298. 'name' => ucwords(str_replace('_', ' ', $base_table)) . ': ' . ucwords(str_replace('_', ' ', $field_name)),
  299. 'table' => $base_table,
  300. 'field' => $field_name,
  301. 'token' => $token,
  302. 'description' => array_key_exists('description', $field_details) ? $field_details['description'] : '',
  303. 'location' => $location,
  304. ];
  305. if (!array_key_exists('description', $field_details) or preg_match('/TODO/', $field_details['description'])) {
  306. $tokens[$token]['description'] = 'The ' . $field_name . ' field of the ' . $base_table . ' table.';
  307. }
  308. }
  309. // RECURSION:
  310. // Follow the foreign key relationships recursively
  311. if (array_key_exists('foreign keys', $table_descrip)) {
  312. foreach ($table_descrip['foreign keys'] as $table => $details) {
  313. foreach ($details['columns'] as $left_field => $right_field) {
  314. $sub_token_prefix = $base_table . '.' . $left_field;
  315. $sub_location_prefix = implode(' > ', [$base_table, $left_field]);
  316. $sub_tokens = chado_get_tokens($table);
  317. if (is_array($sub_tokens)) {
  318. $tokens = array_merge($tokens, $sub_tokens);
  319. }
  320. }
  321. }
  322. }
  323. return $tokens;
  324. }
  325. /**
  326. * Replace all Chado Tokens in a given string.
  327. *
  328. * NOTE: If there is no value for a token then the token is removed.
  329. *
  330. * @param string $string
  331. * The string containing tokens.
  332. * @param $record
  333. * A Chado record as generated by chado_generate_var()
  334. *
  335. * @return
  336. * The string will all tokens replaced with values.
  337. *
  338. * @ingroup tripal_chado_api
  339. */
  340. function chado_replace_tokens($string, $record) {
  341. // Get the list of tokens
  342. $tokens = chado_get_tokens($record->tablename);
  343. // Determine which tokens were used in the format string
  344. if (preg_match_all('/\[[^]]+\]/', $string, $used_tokens)) {
  345. // Get the value for each token used
  346. foreach ($used_tokens[0] as $token) {
  347. $token_info = $tokens[$token];
  348. if (!empty($token_info)) {
  349. $table = $token_info['table'];
  350. $var = $record;
  351. $value = '';
  352. // Iterate through each portion of the location string. An example string
  353. // might be: stock > type_id > name.
  354. $location = explode('>', $token_info['location']);
  355. foreach ($location as $index) {
  356. $index = trim($index);
  357. // if $var is an object then it is the $node object or a table
  358. // that has been expanded.
  359. if (is_object($var)) {
  360. // check to see if the index is a member of the object. If so,
  361. // then reset the $var to this value.
  362. if (property_exists($var, $index)) {
  363. $value = $var->$index;
  364. }
  365. }
  366. // if the $var is an array then there are multiple instances of the same
  367. // table in a FK relationship (e.g. relationship tables)
  368. elseif (is_array($var)) {
  369. $value = $var[$index];
  370. }
  371. else {
  372. tripal_report_error('tripal_chado', TRIPAL_WARNING,
  373. 'Tokens: Unable to determine the value of %token. Things went awry when trying ' .
  374. 'to access \'%index\' for the following: \'%var\'.',
  375. [
  376. '%token' => $token,
  377. '%index' => $index,
  378. '%var' => print_r($var, TRUE),
  379. ]
  380. );
  381. }
  382. }
  383. $string = str_replace($token, $value, $string);
  384. }
  385. }
  386. }
  387. return $string;
  388. }