tripal_chado.api.inc 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393
  1. <?php
  2. /**
  3. * Publishes content in Chado as a new TripalEntity entity.
  4. *
  5. * @param $values
  6. * A key/value associative array that supports the following keys:
  7. * - bundle_name: The name of the the TripalBundle (e.g. bio_data-12345).
  8. * @param $job_id
  9. * (Optional) The numeric job ID as provided by the Tripal jobs system. There
  10. * is no need to specify this argument if this function is being called
  11. * outside of the jobs systems.
  12. *
  13. * @return boolean
  14. * TRUE if all of the records of the given bundle type were published, and
  15. * FALSE if a failure occured.
  16. */
  17. function tripal_chado_publish_records($values, $job_id = NULL) {
  18. // Make sure we have the required options: bundle_name.
  19. if (!array_key_exists('bundle_name', $values) or !$values['bundle_name']) {
  20. tripal_report_error('tripal_chado', TRIPAL_ERROR,
  21. "Could not publish record: @error",
  22. array('@error' => 'The bundle name must be provided'));
  23. return FALSE;
  24. }
  25. // Get the incoming arguments from the $values array.
  26. $bundle_name = $values['bundle_name'];
  27. $filters = array_key_exists('filters', $values) ? $values['filters'] : array();
  28. $sync_node = array_key_exists('sync_node', $values) ? $values['sync_node'] : '';
  29. // Load the bundle entity so we can get information about which Chado
  30. // table/field this entity belongs to.
  31. $bundle = tripal_load_bundle_entity(array('name' => $bundle_name));
  32. if (!$bundle) {
  33. tripal_report_error('tripal_chado', TRIPAL_ERROR,
  34. "Unknown bundle. Could not publish record: @error",
  35. array('@error' => 'The bundle name must be provided'));
  36. return FALSE;
  37. }
  38. $chado_entity_table = tripal_chado_get_bundle_entity_table($bundle);
  39. // Get the mapping of the bio data type to the Chado table.
  40. $chado_bundle = db_select('chado_bundle', 'cb')
  41. ->fields('cb')
  42. ->condition('bundle_id', $bundle->id)
  43. ->execute()
  44. ->fetchObject();
  45. if(!$chado_bundle) {
  46. tripal_report_error('tripal_chado', TRIPAL_ERROR,
  47. "Cannot find mapping of bundle to Chado tables. Could not publish record.");
  48. return FALSE;
  49. }
  50. $table = $chado_bundle->data_table;
  51. $type_column = $chado_bundle->type_column;
  52. $type_linker_table = $chado_bundle->type_linker_table;
  53. $cvterm_id = $chado_bundle->type_id;
  54. // Get the table information for the Chado table.
  55. $table_schema = chado_get_schema($table);
  56. $pkey_field = $table_schema['primary key'][0];
  57. // Construct the SQL for identifying which records should be published.
  58. $args = array();
  59. $select = "SELECT $pkey_field as record_id ";
  60. $from = "
  61. FROM {" . $table . "} T
  62. LEFT JOIN public.$chado_entity_table CE on CE.record_id = T.$pkey_field
  63. ";
  64. // For migration of Tripal v2 nodes to entities we want to include the
  65. // coresponding chado linker table.
  66. if ($sync_node && db_table_exists('chado_' . $table)) {
  67. $select = "SELECT T.$pkey_field as record_id, CT.nid ";
  68. $from .= " INNER JOIN public.chado_$table CT ON CT.$pkey_field = T.$pkey_field";
  69. }
  70. $where = " WHERE CE.record_id IS NULL ";
  71. // Handle bundles that use a linker property table for identifying the type
  72. // of record to publish.
  73. if ($type_linker_table and $type_column) {
  74. $propschema = chado_get_schema($type_linker_table);
  75. $fkeys = $propschema['foreign keys'][$table]['columns'];
  76. foreach ($fkeys as $leftkey => $rightkey) {
  77. if ($rightkey == $pkey_field) {
  78. $from .= " INNER JOIN {" . $type_linker_table . "} LT ON T.$pkey_field = LT.$leftkey ";
  79. }
  80. }
  81. $where .= "AND LT.$type_column = :cvterm_id";
  82. $args[':cvterm_id'] = $cvterm_id;
  83. }
  84. // If the type column is in the base table then add in the SQL for that.
  85. if (!$type_linker_table and $type_column) {
  86. $where .= "AND T.$type_column = :cvterm_id";
  87. $args[':cvterm_id'] = $cvterm_id;
  88. }
  89. // If no type column is specified then we have a problem.
  90. if ($type_linker_table and !$type_column) {
  91. tripal_report_error('tripal_chado', TRIPAL_ERROR,
  92. "Could not publish record: @error",
  93. array('@error' => 'The bundle does not properly map to Chado.'));
  94. return FALSE;
  95. }
  96. // Now add in any additional filters
  97. $fields = field_info_field_map();
  98. foreach ($fields as $field_name => $details) {
  99. if (array_key_exists('TripalEntity', $details['bundles']) and
  100. in_array($bundle_name, $details['bundles']['TripalEntity']) and
  101. in_array($field_name, array_keys($filters))){
  102. $instance = field_info_instance('TripalEntity', $field_name, $bundle_name);
  103. $chado_table = $instance['settings']['chado_table'];
  104. $chado_column = $instance['settings']['chado_column'];
  105. if ($chado_table == $table) {
  106. $where .= " AND T.$chado_column = :$field_name";
  107. $args[":$field_name"] = $filters[$field_name];
  108. }
  109. }
  110. }
  111. // First get the count
  112. $sql = "SELECT count(*) as num_records " . $from . $where;
  113. $result = chado_query($sql, $args);
  114. $count = $result->fetchField();
  115. // calculate the interval for updates
  116. $interval = intval($count / 1000);
  117. if ($interval < 1) {
  118. $interval = 1;
  119. }
  120. // Perform the query.
  121. $sql = $select . $from . $where;
  122. $records = chado_query($sql, $args);
  123. $transaction = db_transaction();
  124. print "\nNOTE: publishing records is performed using a database transaction. \n" .
  125. "If the load fails or is terminated prematurely then the entire set of \n" .
  126. "is rolled back with no changes to the database\n\n";
  127. printf("%d of %d records. (%0.2f%%) Memory: %s bytes\r", $i, $count, 0, number_format(memory_get_usage()));
  128. try {
  129. $i = 0;
  130. while($record = $records->fetchObject()) {
  131. // update the job status every interval
  132. //if ($jobid and $i % $interval == 0) {
  133. $complete = ($i / $count) * 33.33333333;
  134. //tripal_set_job_progress($jobid, intval($complete + 33.33333333));
  135. printf("%d of %d records. (%0.2f%%) Memory: %s bytes\r", $i, $count, $complete * 3, number_format(memory_get_usage()));
  136. //}
  137. // First save the tripal_entity record.
  138. $record_id = $record->record_id;
  139. $ec = entity_get_controller('TripalEntity');
  140. $entity = $ec->create(array(
  141. 'bundle' => $bundle_name,
  142. 'term_id' => $bundle->term_id,
  143. // Add in the Chaod details for when the hook_entity_create()
  144. // is called and our tripal_cahdo_entity_create() implementation
  145. // can deal with it.
  146. 'chado_record' => chado_generate_var($table, array($pkey_field => $record_id)),
  147. 'chado_record_id' => $record_id,
  148. ));
  149. $entity = $entity->save();
  150. if (!$entity) {
  151. throw new Exception('Could not create entity.');
  152. }
  153. // Next save the chado entity record.
  154. $entity_record = array(
  155. 'entity_id' => $entity->id,
  156. 'record_id' => $record_id,
  157. );
  158. // For the Tv2 to Tv3 migration we want to add the nid to the
  159. // entity so we can associate the node with the entity.
  160. if (property_exists($record, 'nid')) {
  161. $entity_record['nid'] = $record->nid;
  162. }
  163. $result = db_insert($chado_entity_table)
  164. ->fields($entity_record)
  165. ->execute();
  166. if(!$result){
  167. throw new Exception('Could not create mapping of entity to Chado record.');
  168. }
  169. $i++;
  170. }
  171. }
  172. catch (Exception $e) {
  173. $transaction->rollback();
  174. $error = $e->getMessage();
  175. tripal_report_error('tripal_chado', TRIPAL_ERROR, "Could not publish record: @error", array('@error' => $error));
  176. drupal_set_message('Failed publishing record. See recent logs for more details.', 'error');
  177. return FALSE;
  178. }
  179. drupal_set_message("Succesfully published $i " . $bundle->label . " record(s).");
  180. return TRUE;
  181. }
  182. /**
  183. * Returns an array of tokens based on Tripal Entity Fields.
  184. *
  185. * @param $base_table
  186. * The name of a base table in Chado.
  187. * @return
  188. * An array of tokens where the key is the machine_name of the token.
  189. */
  190. function tripal_get_chado_tokens($base_table) {
  191. $tokens = array();
  192. $table_descrip = chado_get_schema($base_table);
  193. foreach ($table_descrip['fields'] as $field_name => $field_details) {
  194. $token = '[' . $base_table . '.' . $field_name . ']';
  195. $location = implode(' > ',array($base_table, $field_name));
  196. $tokens[$token] = array(
  197. 'name' => ucwords(str_replace('_',' ',$base_table)) . ': ' . ucwords(str_replace('_',' ',$field_name)),
  198. 'table' => $base_table,
  199. 'field' => $field_name,
  200. 'token' => $token,
  201. 'description' => array_key_exists('description', $field_details) ? $field_details['description'] : '',
  202. 'location' => $location
  203. );
  204. if (!array_key_exists('description', $field_details) or preg_match('/TODO/',$field_details['description'])) {
  205. $tokens[$token]['description'] = 'The '.$field_name.' field of the '.$base_table.' table.';
  206. }
  207. }
  208. // RECURSION:
  209. // Follow the foreign key relationships recursively
  210. if (array_key_exists('foreign keys', $table_descrip)) {
  211. foreach ($table_descrip['foreign keys'] as $table => $details) {
  212. foreach ($details['columns'] as $left_field => $right_field) {
  213. $sub_token_prefix = $base_table . '.' . $left_field;
  214. $sub_location_prefix = implode(' > ',array($base_table, $left_field));
  215. $sub_tokens = tripal_get_chado_tokens($table);
  216. if (is_array($sub_tokens)) {
  217. $tokens = array_merge($tokens, $sub_tokens);
  218. }
  219. }
  220. }
  221. }
  222. return $tokens;
  223. }
  224. /**
  225. * Replace all Chado Tokens in a given string.
  226. *
  227. * NOTE: If there is no value for a token then the token is removed.
  228. *
  229. * @param string $string
  230. * The string containing tokens.
  231. * @param $record
  232. * A Chado record as generated by chado_generate_var()
  233. *
  234. * @return
  235. * The string will all tokens replaced with values.
  236. */
  237. function tripal_replace_chado_tokens($string, $record) {
  238. // Get the list of tokens
  239. $tokens = tripal_get_chado_tokens($record->tablename);
  240. // Determine which tokens were used in the format string
  241. if (preg_match_all('/\[[^]]+\]/', $string, $used_tokens)) {
  242. // Get the value for each token used
  243. foreach ($used_tokens[0] as $token) {
  244. $token_info = $tokens[$token];
  245. if (!empty($token_info)) {
  246. $table = $token_info['table'];
  247. $var = $record;
  248. $value = '';
  249. // Iterate through each portion of the location string. An example string
  250. // might be: stock > type_id > name.
  251. $location = explode('>', $token_info['location']);
  252. foreach ($location as $index) {
  253. $index = trim($index);
  254. // if $var is an object then it is the $node object or a table
  255. // that has been expanded.
  256. if (is_object($var)) {
  257. // check to see if the index is a member of the object. If so,
  258. // then reset the $var to this value.
  259. if (property_exists($var, $index)) {
  260. $value = $var->$index;
  261. }
  262. }
  263. // if the $var is an array then there are multiple instances of the same
  264. // table in a FK relationship (e.g. relationship tables)
  265. elseif (is_array($var)) {
  266. $value = $var[$index];
  267. }
  268. else {
  269. tripal_report_error('tripal_chado', TRIPAL_WARNING,
  270. 'Tokens: Unable to determine the value of %token. Things went awry when trying ' .
  271. 'to access \'%index\' for the following: \'%var\'.',
  272. array('%token' => $token, '%index' => $index, '%var' => print_r($var,TRUE))
  273. );
  274. }
  275. }
  276. $string = str_replace($token, $value, $string);
  277. }
  278. }
  279. }
  280. return $string;
  281. }
  282. /**
  283. * Retrieve an entity_id for a chado record.
  284. *
  285. * @param string $chado_table
  286. * The chado_table where the record is stored.
  287. * @param integer $record_id
  288. * The record_id which is the primary key of the chado table.
  289. *
  290. * @return
  291. * Return an integer representing the entity_id or NULL if not found. The
  292. * record is then accessible on the website through URL /bio_data/<entity_id>
  293. */
  294. function tripal_get_chado_entity_id ($chado_table, $record_id) {
  295. // To find the bundle_table, check if type_column is used for the chado table.
  296. $type_column =
  297. db_select('chado_bundle', 'CB')
  298. ->fields('CB', array('type_column'))
  299. ->condition('data_table', $chado_table)
  300. ->execute()
  301. ->fetchField();
  302. // If there is a type_column, get bundle_id by specifying the data_table,
  303. // type_column, and type_id.
  304. $bundle_id = NULL;
  305. if ($type_column) {
  306. $schema = chado_get_schema($chado_table);
  307. $pkey = is_array($schema['primary key']) ? $schema['primary key'][0] : $schema['primary key'];
  308. $type_id = NULL;
  309. if (key_exists($type_column, $schema['fields'])) {
  310. $type_id =
  311. chado_db_select($chado_table, 'C')
  312. ->fields('C', array($type_column))
  313. ->condition($pkey, $record_id)
  314. ->execute()
  315. ->fetchField();
  316. }
  317. if ($type_id) {
  318. $bundle_id =
  319. db_select('chado_bundle', 'CB')
  320. ->fields('CB', array('bundle_id'))
  321. ->condition('data_table', $chado_table)
  322. ->condition('type_column', $type_column)
  323. ->condition('type_id', $type_id)
  324. ->execute()
  325. ->fetchField();
  326. }
  327. }
  328. // If type_column is not used, get bundle_id by specifying the data_table.
  329. else {
  330. $bundle_id =
  331. db_select('chado_bundle', 'CB')
  332. ->fields('CB', array('bundle_id'))
  333. ->condition('data_table', $chado_table)
  334. ->execute()
  335. ->fetchField();
  336. }
  337. // If bundle_id is found, find the bundle table name and return the entity_id.
  338. $entity_id = NULL;
  339. if ($bundle_id) {
  340. $table_name =
  341. db_select('tripal_bundle', 'TB')
  342. ->fields('TB', array('name'))
  343. ->condition('id', $bundle_id)
  344. ->execute()
  345. ->fetchField();
  346. $entity_id =
  347. db_select('chado_' . $table_name, 'CBD')
  348. ->fields('CBD', array('entity_id'))
  349. ->condition('record_id', $record_id)
  350. ->execute()
  351. ->fetchField();
  352. }
  353. return $entity_id;
  354. }