tripal_chado.api.inc 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298
  1. <?php
  2. /**
  3. * Retrieves an entity that matches the given table and record id.
  4. *
  5. * @param $table
  6. * The name of the Chado table.
  7. * @param $record_id
  8. * The record's primary key in the table specified by $table.
  9. *
  10. * @return
  11. * A chado_entity object.
  12. */
  13. function tripal_load_chado_entity($table, $record_id) {
  14. $entity_id = db_select('chado_entity', 'ce')
  15. ->fields('ce', array('entity_id'))
  16. ->condition('ce.record_id', $record_id)
  17. ->condition('ce.data_table', $table)
  18. ->execute()
  19. ->fetchField();
  20. if ($entity_id) {
  21. $entity = entity_load('TripalEntity', array($entity_id));
  22. return reset($entity);
  23. }
  24. return NULL;
  25. }
  26. /**
  27. * Retrieves the entity ID using a record ID.
  28. *
  29. * @param unknown $data_table
  30. * @param unknown $record_id
  31. */
  32. function tripal_get_chado_entity_id($data_table, $record_id) {
  33. return db_select('chado_entity', 'ce')
  34. ->fields('ce', array('entity_id'))
  35. ->condition('data_table', $data_table)
  36. ->condition('record_id', $record_id)
  37. ->execute()
  38. ->fetchField();
  39. }
  40. /**
  41. * Publishes content in Chado as a new TripalEntity entity.
  42. *
  43. * @param $values
  44. * A key/value associative array that supports the following keys:
  45. * - bundle_name: The name of the the TripalBundle (e.g. bio_data-12345).
  46. * @param $job_id
  47. * (Optional) The numeric job ID as provided by the Tripal jobs system. There
  48. * is no need to specify this argument if this function is being called
  49. * outside of the jobs systems.
  50. *
  51. * @return boolean
  52. * TRUE if all of the records of the given bundle type were published, and
  53. * FALSE if a failure occured.
  54. */
  55. function tripal_chado_publish_records($values, $job_id = NULL) {
  56. // Make sure we have the required options: bundle_name.
  57. if (!array_key_exists('bundle_name', $values) or !$values['bundle_name']) {
  58. tripal_report_error('tripal_chado', TRIPAL_ERROR,
  59. "Could not publish record: @error",
  60. array('@error' => 'The bundle name must be provided'));
  61. return FALSE;
  62. }
  63. $bundle_name = $values['bundle_name'];
  64. $sync_node = array_key_exists('sync_node', $values) ? $values['sync_node'] : '';
  65. // Load the bundle entity so we can get information about which Chado
  66. // table/field this entity belongs to.
  67. $bundle = tripal_load_bundle_entity(array('name' => $bundle_name));
  68. $bundle_id = $bundle->id;
  69. $term = tripal_load_term_entity(array('term_id' => $bundle->term_id));
  70. $vocab = $term->vocab;
  71. $params = array(
  72. 'vocabulary' => $vocab->vocabulary,
  73. 'accession' => $term->accession,
  74. );
  75. $mapped_table = chado_get_cvterm_mapping($params);
  76. $table = $mapped_table->chado_table;
  77. $column = $mapped_table->chado_field;
  78. $cvterm_id = $mapped_table->cvterm->cvterm_id;
  79. // Get the table information for the Chado table.
  80. $table_schema = chado_get_schema($table);
  81. $pkey_field = $table_schema['primary key'][0];
  82. // Construct the SQL for identifying which records should be published.
  83. $select = "SELECT $pkey_field as record_id ";
  84. $from = "
  85. FROM {" . $table . "} T
  86. LEFT JOIN public.chado_entity CE on CE.record_id = T.$pkey_field
  87. AND CE.data_table = '$table'
  88. ";
  89. // For migration of Tripal v2 nodes to entities we want to include the
  90. // coresponding chado linker table.
  91. if ($sync_node && db_table_exists('chado_' . $table)) {
  92. $select = "SELECT T.$pkey_field as record_id, CT.nid ";
  93. $from .= "INNER JOIN public.chado_$table CT ON CT.$pkey_field = T.$pkey_field";
  94. }
  95. $where = " WHERE CE.record_id IS NULL ";
  96. // TODO: here we have hard-coded the analysis and organism tables, but
  97. // there may be more that don't hve a type field. I think if the $column
  98. // value is null then that check is good enough, but it needs to be looked at.
  99. if ($table != 'analysis' and $table != 'organism') {
  100. $where .= "AND $column = $cvterm_id";
  101. }
  102. // First get the count
  103. $sql = "SELECT count(*) as num_records " . $from . $where;
  104. $result = chado_query($sql);
  105. $count = $result->fetchField();
  106. // calculate the interval for updates
  107. $interval = intval($count / 1000);
  108. if ($interval < 1) {
  109. $interval = 1;
  110. }
  111. // Perform the query.
  112. $sql = $select . $from . $where;
  113. $records = chado_query($sql);
  114. $transaction = db_transaction();
  115. printf("%d of %d records. (%0.2f%%) Memory: %s bytes\r", $i, $count, 0, number_format(memory_get_usage()));
  116. try {
  117. $i = 0;
  118. while($record = $records->fetchObject()) {
  119. // update the job status every interval
  120. //if ($jobid and $i % $interval == 0) {
  121. $complete = ($i / $count) * 33.33333333;
  122. //tripal_set_job_progress($jobid, intval($complete + 33.33333333));
  123. printf("%d of %d records. (%0.2f%%) Memory: %s bytes\r", $i, $count, $complete * 3, number_format(memory_get_usage()));
  124. //}
  125. // First save the tripal_entity record.
  126. $record_id = $record->record_id;
  127. $ec = entity_get_controller('TripalEntity');
  128. $entity = $ec->create(array(
  129. 'bundle' => $bundle_name,
  130. 'term_id' => $bundle->term_id,
  131. 'chado_record' => chado_generate_var($table, array($pkey_field => $record_id)),
  132. 'chado_record_id' => $record_id,
  133. ));
  134. $entity = $entity->save();
  135. if (!$entity) {
  136. throw new Exception('Could not create entity.');
  137. }
  138. // Next save the chado_entity record.
  139. $entity_record = array(
  140. 'entity_id' => $entity->id,
  141. 'record_id' => $record_id,
  142. 'data_table' => $table,
  143. 'type_table' => $table,
  144. 'field' => $column,
  145. );
  146. // For the Tv2 to Tv3 migration we want to add the nid to the
  147. // entity so we can associate the node with the entity.
  148. if (property_exists($record, 'nid')) {
  149. $entity_record['nid'] = $record->nid;
  150. }
  151. $success = drupal_write_record('chado_entity', $entity_record);
  152. $i++;
  153. }
  154. }
  155. catch (Exception $e) {
  156. $transaction->rollback();
  157. $error = $e->getMessage();
  158. tripal_report_error('tripal_chado', TRIPAL_ERROR, "Could not publish record: @error", array('@error' => $error));
  159. drupal_set_message('Failed publishing record. See recent logs for more details.', 'error');
  160. return FALSE;
  161. }
  162. drupal_set_message("Succesfully published $i " . $bundle->label . " record(s).");
  163. return TRUE;
  164. }
  165. /**
  166. * Returns an array of tokens based on Tripal Entity Fields.
  167. *
  168. * @param $base_table
  169. * The name of a base table in Chado.
  170. * @return
  171. * An array of tokens where the key is the machine_name of the token.
  172. */
  173. function tripal_get_chado_tokens($base_table) {
  174. $tokens = array();
  175. $table_descrip = chado_get_schema($base_table);
  176. foreach ($table_descrip['fields'] as $field_name => $field_details) {
  177. $token = '[' . $base_table . '.' . $field_name . ']';
  178. $location = implode(' > ',array($base_table, $field_name));
  179. $tokens[$token] = array(
  180. 'name' => ucwords(str_replace('_',' ',$base_table)) . ': ' . ucwords(str_replace('_',' ',$field_name)),
  181. 'table' => $base_table,
  182. 'field' => $field_name,
  183. 'token' => $token,
  184. 'description' => array_key_exists('description', $field_details) ? $field_details['description'] : '',
  185. 'location' => $location
  186. );
  187. if (!array_key_exists('description', $field_details) or preg_match('/TODO/',$field_details['description'])) {
  188. $tokens[$token]['description'] = 'The '.$field_name.' field of the '.$base_table.' table.';
  189. }
  190. }
  191. // RECURSION:
  192. // Follow the foreign key relationships recursively
  193. if (array_key_exists('foreign keys', $table_descrip)) {
  194. foreach ($table_descrip['foreign keys'] as $table => $details) {
  195. foreach ($details['columns'] as $left_field => $right_field) {
  196. $sub_token_prefix = $base_table . '.' . $left_field;
  197. $sub_location_prefix = implode(' > ',array($base_table, $left_field));
  198. $sub_tokens = tripal_get_chado_tokens($table);
  199. if (is_array($sub_tokens)) {
  200. $tokens = array_merge($tokens, $sub_tokens);
  201. }
  202. }
  203. }
  204. }
  205. return $tokens;
  206. }
  207. /**
  208. * Replace all Chado Tokens in a given string.
  209. *
  210. * NOTE: If there is no value for a token then the token is removed.
  211. *
  212. * @param string $string
  213. * The string containing tokens.
  214. * @param $record
  215. * A Chado record as generated by chado_generate_var()
  216. *
  217. * @return
  218. * The string will all tokens replaced with values.
  219. */
  220. function tripal_replace_chado_tokens($string, $record) {
  221. // Get the list of tokens
  222. $tokens = tripal_get_chado_tokens($record->tablename);
  223. // Determine which tokens were used in the format string
  224. if (preg_match_all('/\[[^]]+\]/', $string, $used_tokens)) {
  225. // Get the value for each token used
  226. foreach ($used_tokens[0] as $token) {
  227. $token_info = $tokens[$token];
  228. if (!empty($token_info)) {
  229. $table = $token_info['table'];
  230. $var = $record;
  231. $value = '';
  232. // Iterate through each portion of the location string. An example string
  233. // might be: stock > type_id > name.
  234. $location = explode('>', $token_info['location']);
  235. foreach ($location as $index) {
  236. $index = trim($index);
  237. // if $var is an object then it is the $node object or a table
  238. // that has been expanded.
  239. if (is_object($var)) {
  240. // check to see if the index is a member of the object. If so,
  241. // then reset the $var to this value.
  242. if (property_exists($var, $index)) {
  243. $value = $var->$index;
  244. }
  245. }
  246. // if the $var is an array then there are multiple instances of the same
  247. // table in a FK relationship (e.g. relationship tables)
  248. elseif (is_array($var)) {
  249. $value = $var[$index];
  250. }
  251. else {
  252. tripal_report_error('tripal_chado', TRIPAL_WARNING,
  253. 'Tokens: Unable to determine the value of %token. Things went awry when trying ' .
  254. 'to access \'%index\' for the following: \'%var\'.',
  255. array('%token' => $token, '%index' => $index, '%var' => print_r($var,TRUE))
  256. );
  257. }
  258. }
  259. $string = str_replace($token, $value, $string);
  260. }
  261. }
  262. }
  263. return $string;
  264. }