tripal_chado.api.inc 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376
  1. <?php
  2. /**
  3. * @file
  4. *
  5. * This file provides the Chado API: a set of functions for
  6. * interacting with Chado.
  7. */
  8. /**
  9. * @defgroup tripal_chado_api Chado
  10. *
  11. * @ingroup tripal_api
  12. * The Tripal Chado API: a set of functions for
  13. * interacting with Chado.
  14. *
  15. */
  16. /**
  17. * Publishes content in Chado as a new TripalEntity entity.
  18. *
  19. * @param $values
  20. * A key/value associative array that supports the following keys:
  21. * - bundle_name: The name of the the TripalBundle (e.g. bio_data-12345).
  22. * @param $job_id
  23. * (Optional) The numeric job ID as provided by the Tripal jobs system. There
  24. * is no need to specify this argument if this function is being called
  25. * outside of the jobs systems.
  26. *
  27. * @return boolean
  28. * TRUE if all of the records of the given bundle type were published, and
  29. * FALSE if a failure occured.
  30. *
  31. * @ingroup tripal_chado_api
  32. */
  33. function chado_publish_records($values, $job_id = NULL) {
  34. // We want the job object in order to report progress.
  35. if (is_object($job_id)) {
  36. $job = $job_id;
  37. }
  38. if (is_numeric($job_id)) {
  39. $job = new TripalJob();
  40. $job->load($job_id);
  41. }
  42. $report_progress = FALSE;
  43. if (is_object($job)) {
  44. $report_progress = TRUE;
  45. }
  46. // Make sure we have the required options: bundle_name.
  47. if (!array_key_exists('bundle_name', $values) or !$values['bundle_name']) {
  48. tripal_report_error('tripal_chado', TRIPAL_ERROR,
  49. "Could not publish record: @error",
  50. array('@error' => 'The bundle name must be provided'));
  51. return FALSE;
  52. }
  53. // Get the incoming arguments from the $values array.
  54. $bundle_name = $values['bundle_name'];
  55. $filters = array_key_exists('filters', $values) ? $values['filters'] : array();
  56. $sync_node = array_key_exists('sync_node', $values) ? $values['sync_node'] : '';
  57. // Load the bundle entity so we can get information about which Chado
  58. // table/field this entity belongs to.
  59. $bundle = tripal_load_bundle_entity(array('name' => $bundle_name));
  60. if (!$bundle) {
  61. tripal_report_error('tripal_chado', TRIPAL_ERROR,
  62. "Unknown bundle. Could not publish record: @error",
  63. array('@error' => 'The bundle name must be provided'));
  64. return FALSE;
  65. }
  66. $chado_entity_table = chado_get_bundle_entity_table($bundle);
  67. // Get the mapping of the bio data type to the Chado table.
  68. $chado_bundle = db_select('chado_bundle', 'cb')
  69. ->fields('cb')
  70. ->condition('bundle_id', $bundle->id)
  71. ->execute()
  72. ->fetchObject();
  73. if(!$chado_bundle) {
  74. tripal_report_error('tripal_chado', TRIPAL_ERROR,
  75. "Cannot find mapping of bundle to Chado tables. Could not publish record.");
  76. return FALSE;
  77. }
  78. $table = $chado_bundle->data_table;
  79. $type_column = $chado_bundle->type_column;
  80. $type_linker_table = $chado_bundle->type_linker_table;
  81. $cvterm_id = $chado_bundle->type_id;
  82. $type_value = $chado_bundle->type_value;
  83. // Get the table information for the Chado table.
  84. $table_schema = chado_get_schema($table);
  85. $pkey_field = $table_schema['primary key'][0];
  86. // Construct the SQL for identifying which records should be published.
  87. $args = array();
  88. $select = "SELECT T.$pkey_field as record_id ";
  89. $from = "
  90. FROM {" . $table . "} T
  91. LEFT JOIN [" . $chado_entity_table . "] CE on CE.record_id = T.$pkey_field
  92. ";
  93. // For migration of Tripal v2 nodes to entities we want to include the
  94. // coresponding chado linker table.
  95. if ($sync_node && db_table_exists('chado_' . $table)) {
  96. $select = "SELECT T.$pkey_field as record_id, CT.nid ";
  97. $from .= " INNER JOIN [chado_" . $table . "] CT ON CT.$pkey_field = T.$pkey_field";
  98. }
  99. $where = " WHERE CE.record_id IS NULL ";
  100. // Handle records that are mapped to property tables.
  101. if ($type_linker_table and $type_column and $type_value) {
  102. $propschema = chado_get_schema($type_linker_table);
  103. $fkeys = $propschema['foreign keys'][$table]['columns'];
  104. foreach ($fkeys as $leftkey => $rightkey) {
  105. if ($rightkey == $pkey_field) {
  106. $from .= " INNER JOIN {" . $type_linker_table . "} LT ON T.$pkey_field = LT.$leftkey ";
  107. }
  108. }
  109. $where .= "AND LT.$type_column = :cvterm_id and LT.value = :prop_value";
  110. $args[':cvterm_id'] = $cvterm_id;
  111. $args[':prop_value'] = $type_value;
  112. }
  113. // Handle records that are mapped to cvterm linking tables.
  114. if ($type_linker_table and $type_column and !$type_value) {
  115. $cvtschema = chado_get_schema($type_linker_table);
  116. $fkeys = $cvtschema['foreign keys'][$table]['columns'];
  117. foreach ($fkeys as $leftkey => $rightkey) {
  118. if ($rightkey == $pkey_field) {
  119. $from .= " INNER JOIN {" . $type_linker_table . "} LT ON T.$pkey_field = LT.$leftkey ";
  120. }
  121. }
  122. $where .= "AND LT.$type_column = :cvterm_id";
  123. $args[':cvterm_id'] = $cvterm_id;
  124. }
  125. // Handle records that are mapped via a type_id column in the base table.
  126. if (!$type_linker_table and $type_column) {
  127. $where .= "AND T.$type_column = :cvterm_id";
  128. $args[':cvterm_id'] = $cvterm_id;
  129. }
  130. // Handle the case where records are in the cvterm table and mapped via a single
  131. // vocab. Here we use the type_value for the cv_id.
  132. if ($table == 'cvterm' and $type_value) {
  133. $where .= "AND T.cv_id = :cv_id";
  134. $args[':cv_id'] = $type_value;
  135. }
  136. // Handle the case where records are in the cvterm table but we want to
  137. // use all of the child terms.
  138. if ($table == 'cvterm' and !$type_value) {
  139. $where .= "AND T.cvterm_id IN (
  140. SELECT CVTP.subject_id
  141. FROM {cvtermpath} CVTP
  142. WHERE CVTP.object_id = :cvterm_id)
  143. ";
  144. $args[':cvterm_id'] = $cvterm_id;
  145. }
  146. // Now add in any additional filters
  147. $fields = field_info_field_map();
  148. foreach ($fields as $field_name => $details) {
  149. if (array_key_exists('TripalEntity', $details['bundles']) and
  150. in_array($bundle_name, $details['bundles']['TripalEntity']) and
  151. in_array($field_name, array_keys($filters))){
  152. $instance = field_info_instance('TripalEntity', $field_name, $bundle_name);
  153. $chado_table = $instance['settings']['chado_table'];
  154. $chado_column = $instance['settings']['chado_column'];
  155. if ($chado_table == $table) {
  156. $where .= " AND T.$chado_column = :$field_name";
  157. $args[":$field_name"] = $filters[$field_name];
  158. }
  159. }
  160. }
  161. // First get the count
  162. $sql = "SELECT count(*) as num_records " . $from . $where;
  163. $result = chado_query($sql, $args);
  164. $count = $result->fetchField();
  165. // calculate the interval for updates
  166. $interval = intval($count / 50);
  167. if ($interval < 1) {
  168. $interval = 1;
  169. }
  170. // Perform the query.
  171. $sql = $select . $from . $where;
  172. $records = chado_query($sql, $args);
  173. $transaction = db_transaction();
  174. print "\nNOTE: publishing records is performed using a database transaction. \n" .
  175. "If the load fails or is terminated prematurely then the entire set of \n" .
  176. "is rolled back with no changes to the database\n\n";
  177. $i = 0;
  178. printf("%d of %d records. (%0.2f%%) Memory: %s bytes\r", $i, $count, 0, number_format(memory_get_usage()));
  179. try {
  180. while($record = $records->fetchObject()) {
  181. // update the job status every interval
  182. if ($i % $interval == 0) {
  183. $complete = ($i / $count) * 33.33333333;
  184. // Currently don't support setting job progress within a transaction.
  185. // if ($report_progress) { $job->setProgress(intval($complete * 3)); }
  186. printf("%d of %d records. (%0.2f%%) Memory: %s bytes\r", $i, $count, $complete * 3, number_format(memory_get_usage()));
  187. }
  188. // First save the tripal_entity record.
  189. $record_id = $record->record_id;
  190. $ec = entity_get_controller('TripalEntity');
  191. $entity = $ec->create(array(
  192. 'bundle' => $bundle_name,
  193. 'term_id' => $bundle->term_id,
  194. // Add in the Chado details for when the hook_entity_create()
  195. // is called and our tripal_chado_entity_create() implementation
  196. // can deal with it.
  197. 'chado_record' => chado_generate_var($table, array($pkey_field => $record_id)),
  198. 'chado_record_id' => $record_id,
  199. ));
  200. $entity = $entity->save();
  201. if (!$entity) {
  202. throw new Exception('Could not create entity.');
  203. }
  204. // Next save the chado entity record.
  205. $entity_record = array(
  206. 'entity_id' => $entity->id,
  207. 'record_id' => $record_id,
  208. );
  209. // For the Tv2 to Tv3 migration we want to add the nid to the
  210. // entity so we can associate the node with the entity.
  211. if (property_exists($record, 'nid')) {
  212. $entity_record['nid'] = $record->nid;
  213. }
  214. $result = db_insert($chado_entity_table)
  215. ->fields($entity_record)
  216. ->execute();
  217. if(!$result){
  218. throw new Exception('Could not create mapping of entity to Chado record.');
  219. }
  220. $i++;
  221. }
  222. }
  223. catch (Exception $e) {
  224. $transaction->rollback();
  225. $error = $e->getMessage();
  226. tripal_report_error('tripal_chado', TRIPAL_ERROR, "Could not publish record: @error", array('@error' => $error));
  227. drupal_set_message('Failed publishing record. See recent logs for more details.', 'error');
  228. return FALSE;
  229. }
  230. drupal_set_message("Succesfully published $i " . $bundle->label . " record(s).");
  231. return TRUE;
  232. }
  233. /**
  234. * Returns an array of tokens based on Tripal Entity Fields.
  235. *
  236. * @param $base_table
  237. * The name of a base table in Chado.
  238. * @return
  239. * An array of tokens where the key is the machine_name of the token.
  240. *
  241. * @ingroup tripal_chado_api
  242. */
  243. function chado_get_tokens($base_table) {
  244. $tokens = array();
  245. $table_descrip = chado_get_schema($base_table);
  246. foreach ($table_descrip['fields'] as $field_name => $field_details) {
  247. $token = '[' . $base_table . '.' . $field_name . ']';
  248. $location = implode(' > ',array($base_table, $field_name));
  249. $tokens[$token] = array(
  250. 'name' => ucwords(str_replace('_',' ',$base_table)) . ': ' . ucwords(str_replace('_',' ',$field_name)),
  251. 'table' => $base_table,
  252. 'field' => $field_name,
  253. 'token' => $token,
  254. 'description' => array_key_exists('description', $field_details) ? $field_details['description'] : '',
  255. 'location' => $location
  256. );
  257. if (!array_key_exists('description', $field_details) or preg_match('/TODO/',$field_details['description'])) {
  258. $tokens[$token]['description'] = 'The '.$field_name.' field of the '.$base_table.' table.';
  259. }
  260. }
  261. // RECURSION:
  262. // Follow the foreign key relationships recursively
  263. if (array_key_exists('foreign keys', $table_descrip)) {
  264. foreach ($table_descrip['foreign keys'] as $table => $details) {
  265. foreach ($details['columns'] as $left_field => $right_field) {
  266. $sub_token_prefix = $base_table . '.' . $left_field;
  267. $sub_location_prefix = implode(' > ',array($base_table, $left_field));
  268. $sub_tokens = chado_get_tokens($table);
  269. if (is_array($sub_tokens)) {
  270. $tokens = array_merge($tokens, $sub_tokens);
  271. }
  272. }
  273. }
  274. }
  275. return $tokens;
  276. }
  277. /**
  278. * Replace all Chado Tokens in a given string.
  279. *
  280. * NOTE: If there is no value for a token then the token is removed.
  281. *
  282. * @param string $string
  283. * The string containing tokens.
  284. * @param $record
  285. * A Chado record as generated by chado_generate_var()
  286. *
  287. * @return
  288. * The string will all tokens replaced with values.
  289. *
  290. * @ingroup tripal_chado_api
  291. */
  292. function chado_replace_tokens($string, $record) {
  293. // Get the list of tokens
  294. $tokens = chado_get_tokens($record->tablename);
  295. // Determine which tokens were used in the format string
  296. if (preg_match_all('/\[[^]]+\]/', $string, $used_tokens)) {
  297. // Get the value for each token used
  298. foreach ($used_tokens[0] as $token) {
  299. $token_info = $tokens[$token];
  300. if (!empty($token_info)) {
  301. $table = $token_info['table'];
  302. $var = $record;
  303. $value = '';
  304. // Iterate through each portion of the location string. An example string
  305. // might be: stock > type_id > name.
  306. $location = explode('>', $token_info['location']);
  307. foreach ($location as $index) {
  308. $index = trim($index);
  309. // if $var is an object then it is the $node object or a table
  310. // that has been expanded.
  311. if (is_object($var)) {
  312. // check to see if the index is a member of the object. If so,
  313. // then reset the $var to this value.
  314. if (property_exists($var, $index)) {
  315. $value = $var->$index;
  316. }
  317. }
  318. // if the $var is an array then there are multiple instances of the same
  319. // table in a FK relationship (e.g. relationship tables)
  320. elseif (is_array($var)) {
  321. $value = $var[$index];
  322. }
  323. else {
  324. tripal_report_error('tripal_chado', TRIPAL_WARNING,
  325. 'Tokens: Unable to determine the value of %token. Things went awry when trying ' .
  326. 'to access \'%index\' for the following: \'%var\'.',
  327. array('%token' => $token, '%index' => $index, '%var' => print_r($var,TRUE))
  328. );
  329. }
  330. }
  331. $string = str_replace($token, $value, $string);
  332. }
  333. }
  334. }
  335. return $string;
  336. }