tripal_core.chado_nodes.api.inc 46 KB


  1. <?php
  2. /**
  3. * @file
  4. * API to handle much of the common functionality implemented when creating a drupal node type.
  5. */
  6. /**
  7. * @defgroup tripal_chado_node_api Chado Node API
  8. * @ingroup tripal_chado_api
  9. * @{
  10. * Many Tripal modules implement Drupal node types as a means of displaying chado
  11. * records individually through Drupal as a single web page. In order to do this, many of
  12. * the same drupal hooks are implemented and the code between modules is actually quite
  13. * similar. This API aims to abstract much of the common functionality in order to make
  14. * it easier for new Tripal modules to implement drupal node types and to centralize the
  15. * maintenance effort as much as possible.
  16. *
  17. * A generic sync form has been created. See chado_node_sync_form() for
  18. * instructions on how to implement this form in your module.
  19. *
  20. * Many of the base chado tables also have associated prop, _dbxref and _relationship
  21. * tables. Generic mini-forms have been created to help you handle these forms. To
  22. * implement this functionality you call the mini-form from your module node form and
  23. * then call the associated update functions from both your hook_insert and hook_update.
  24. * The functions of interest are as follows:
  25. * - chado_add_node_form_properties() and chado_update_node_form_properties()
  26. * to provide an interface for adding/removing properties
  27. * - chado_add_node_form_dbxrefs() and chado_update_node_form_dbxrefs()
  28. * to provide an interface for adding/removing additional database references
  29. * - chado_add_node_form_relationships() and chado_update_node_form_relationships()
  30. * to provide an interface for adding/removing relationships between chado records
  31. * from your base table
  32. * @}
  33. */
  34. /**
  35. * Get chado id for a node. E.g, if you want to get 'analysis_id' from the
  36. * 'analysis' table for a synced 'chado_analysis' node, (the same for
  37. * organisms and features):
  38. * $analysis_id = chado_get_id_from_nid ('analysis', $node->nid)
  39. * $organism_id = chado_get_id_from_nid ('organism', $node->nid)
  40. * $feature_id = chado_get_id_from_nid ('feature', $node->nid)
  41. *
  42. * @param $table
  43. * The chado table the chado record is from
  44. * @param $nid
  45. * The value of the primary key of node
  46. * @param $linking_table
  47. * The Drupal table linking the chado record to it's node.
  48. * This field is optional and defaults to chado_$table
  49. *
  50. * @return
  51. * The chado id of the associated chado record
  52. *
  53. * @ingroup tripal_chado_node_api
  54. */
  55. function chado_get_id_from_nid($table, $nid, $linking_table = NULL) {
  56. if (empty($linking_table)) {
  57. $linking_table = 'chado_' . $table;
  58. }
  59. $sql = "SELECT " . $table . "_id as id FROM {$linking_table} WHERE nid = :nid";
  60. return db_query($sql, array(':nid' => $nid))->fetchField();
  61. }
  62. /**
  63. * Get node id for a chado feature/organism/analysis. E.g, if you want to
  64. * get the node id for an analysis, use:
  65. * $nid = chado_get_nid_from_id ('analysis', $analysis_id)
  66. * Likewise,
  67. * $nid = chado_get_nid_from_id ('organism', $organism_id)
  68. * $nid = chado_get_nid_from_id ('feature', $feature_id)
  69. *
  70. * @param $table
  71. * The chado table the id is from
  72. * @param $id
  73. * The value of the primary key from the $table chado table (ie: feature_id)
  74. * @param $linking_table
  75. * The Drupal table linking the chado record to it's node.
  76. * This field is optional and defaults to chado_$table
  77. *
  78. * @return
  79. * The nid of the associated node
  80. *
  81. * @ingroup tripal_chado_node_api
  82. */
  83. function chado_get_nid_from_id($table, $id, $linking_table = NULL) {
  84. if (empty($linking_table)) {
  85. $linking_table = 'chado_' . $table;
  86. }
  87. $sql = "SELECT nid FROM {" . $linking_table . "} WHERE " . $table . "_id = :" . $table . "_id";
  88. return db_query($sql, array(":" . $table . "_id" => $id))->fetchField();
  89. }
  90. /**
  91. * Determine the chado base table for a given content type
  92. *
  93. * @param $content_type
  94. * The machine name of the content type (node type) you want to
  95. * determine the base chado table of
  96. * @param $module
  97. * (Optional) The machine-name of the module implementing the
  98. * content type
  99. *
  100. * @return
  101. * The name of the chado base table for the specified content type
  102. *
  103. * @ingroup tripal_chado_node_api
  104. */
  105. function chado_node_get_base_table($content_type, $module = FALSE) {
  106. if ($module) {
  107. $node_info = call_user_func($details['module'] . '_node_info');
  108. }
  109. else {
  110. $node_types = module_invoke_all('node_info');
  111. if (isset($node_types[$content_type])) {
  112. $node_info = $node_types[$content_type];
  113. }
  114. else {
  115. return FALSE;
  116. }
  117. }
  118. if (isset($node_info['chado_node_api']['base_table'])) {
  119. return $node_info['chado_node_api']['base_table'];
  120. }
  121. else {
  122. return FALSE;
  123. }
  124. }
  125. /**
  126. * @section
  127. * Common Functionality for Properties, Dbxrefs and relationships chado node API
  128. */
  129. /**
  130. * Validate the Triggering element from a node form.
  131. *
  132. * We are going to inspect the post to determine what PHP knows is the triggering
  133. * element and if it doesn't agree with Drupal then we are actually going to
  134. * change it in Drupal.
  135. *
  136. * This fixes an obscure bug triggered when a property is added and then
  137. * a relationship removed, Drupal thinks the first property remove button was
  138. * clicked and instead removes a property (not a relationship) and renders the new
  139. * property table in the relationship table page space.
  140. *
  141. * NOTE: Many Drupal issues state that this problem is solved if the #name
  142. * of the button is unique (which it is in our case) but we are still experiencing
  143. * incorrectly determined triggering elements so we need to handle it ourselves.
  144. */
  145. function chado_validate_node_form_triggering_element($form, &$form_state) {
  146. // We are going to inspect the post to determine what PHP knows is the triggering
  147. // element and if it doesn't agree with Drupal then we are actually going to
  148. // change it in Drupal.
  149. if ($_POST['_triggering_element_name'] != $form_state['triggering_element']['#name']) {
  150. $form_state['triggering_element']['#name'] = $_POST['_triggering_element_name'];
  151. }
  152. }
  153. /**
  154. * Validate Adding Subtables entries from the node forms.
  155. * Supported subtables: Properties, Relationships, Additional DBxrefs.
  156. *
  157. * @param array $form
  158. * @param array $form_state
  159. */
  160. function chado_add_node_form_subtables_add_button_validate($form, &$form_state) {
  161. // Based on triggering element call the correct validation function
  162. // ASUMPTION #1: each of the buttons must have property, dbxref or relationship
  163. // as the first part of the #name to uniquely identify the subsection.
  164. if (preg_match('/^([a-z]+).*/', $form_state['triggering_element']['#name'], $matches)) {
  165. $subsection = $matches[1];
  166. switch($subsection) {
  167. case 'properties':
  168. chado_add_node_form_properties_add_button_validate($form, $form_state);
  169. break;
  170. case 'dbxrefs':
  171. chado_add_node_form_dbxrefs_add_button_validate($form, $form_state);
  172. break;
  173. case 'relationships':
  174. chado_add_node_form_relationships_add_button_validate($form, $form_state);
  175. break;
  176. }
  177. }
  178. }
  179. /**
  180. * Add subtable entries to the node forms.
  181. * Supported subtables: Properties, Relationships, Additional DBxrefs.
  182. *
  183. * @param array $form
  184. * @param array $form_state
  185. */
  186. function chado_add_node_form_subtables_add_button_submit($form, &$form_state) {
  187. // Based on triggering element call the correct submit function
  188. // ASUMPTION #1: each of the buttons must have properties, dbxrefs or relationships
  189. // as the first part of the #name to uniquely identify the subsection.
  190. if (preg_match('/^([a-z]+).*/', $form_state['triggering_element']['#name'], $matches)) {
  191. $subsection = $matches[1];
  192. switch($subsection) {
  193. case 'properties':
  194. chado_add_node_form_properties_add_button_submit($form, $form_state);
  195. break;
  196. case 'dbxrefs':
  197. chado_add_node_form_dbxrefs_add_button_submit($form, $form_state);
  198. break;
  199. case 'relationships':
  200. chado_add_node_form_relationships_add_button_submit($form, $form_state);
  201. break;
  202. }
  203. }
  204. // This is needed to ensure the form builder function is called for the node
  205. // form in order for any of these changes to be seen.
  206. $form_state['rebuild'] = TRUE;
  207. }
  208. /**
  209. * Validate Removing Subtables entries from the node forms.
  210. * Supported subtables: Properties, Relationships, Additional DBxrefs.
  211. *
  212. * Since Removing isn't associated with any user input the only thing we
  213. * need to validate is that Drupal has determined the triggering element correctly.
  214. * That said, we will call each subtables associated validate function just incase
  215. * there is some case-specific validation we do not know of or have not anticipated.
  216. *
  217. * @param array $form
  218. * @param array $form_state
  219. */
  220. function chado_add_node_form_subtables_remove_button_validate($form, &$form_state) {
  221. // We need to validate the trigerring element since Drupal has known
  222. // issues determining this correctly when there are multiple buttons
  223. // with the same label.
  224. chado_validate_node_form_triggering_element($form, $form_state);
  225. // Based on triggering element call the correct validation function
  226. // ASUMPTION #1: each of the buttons must have property, dbxref or relationship
  227. // as the first part of the #name to uniquely identify the subsection.
  228. if (preg_match('/^([a-z]+).*/', $form_state['triggering_element']['#name'], $matches)) {
  229. $subsection = $matches[1];
  230. switch($subsection) {
  231. case 'properties':
  232. chado_add_node_form_properties_remove_button_validate($form, $form_state);
  233. break;
  234. case 'dbxrefs':
  235. chado_add_node_form_dbxrefs_remove_button_validate($form, $form_state);
  236. break;
  237. case 'relationships':
  238. chado_add_node_form_relationships_remove_button_validate($form, $form_state);
  239. break;
  240. }
  241. }
  242. }
  243. /**
  244. * Remove subtable entries to the node forms.
  245. * Supported subtables: Properties, Relationships, Additional DBxrefs.
  246. *
  247. * @param array $form
  248. * @param array $form_state
  249. */
  250. function chado_add_node_form_subtables_remove_button_submit($form, &$form_state) {
  251. // Based on triggering element call the correct submit function
  252. // ASUMPTION #1: each of the buttons must have properties, dbxrefs or relationships
  253. // as the first part of the #name to uniquely identify the subsection.
  254. if (preg_match('/^([a-z]+).*/', $form_state['triggering_element']['#name'], $matches)) {
  255. $subsection = $matches[1];
  256. switch($subsection) {
  257. case 'properties':
  258. chado_add_node_form_properties_remove_button_submit($form, $form_state);
  259. break;
  260. case 'dbxrefs':
  261. chado_add_node_form_dbxrefs_remove_button_submit($form, $form_state);
  262. break;
  263. case 'relationships':
  264. chado_add_node_form_relationships_remove_button_submit($form, $form_state);
  265. break;
  266. }
  267. }
  268. // This is needed to ensure the form builder function is called for the node
  269. // form in order for any of these changes to be seen.
  270. $form_state['rebuild'] = TRUE;
  271. }
  272. /**
  273. * Ajax function which returns the section of the form to be re-rendered
  274. * for either the properties, dbxref or relationship sub-sections.
  275. *
  276. * @ingroup tripal_core
  277. */
  278. function chado_add_node_form_subtable_ajax_update($form, &$form_state) {
  279. // We need to validate the trigerring element since Drupal has known
  280. // issues determining this correctly when there are multiple buttons
  281. // with the same label.
  282. chado_validate_node_form_triggering_element($form, $form_state);
  283. // Based on triggering element render the correct part of the form.
  284. // ASUMPTION: each of the buttons must have property, dbxref or relationship
  285. // as the first part of the #name to uniquely identify the subsection.
  286. if (preg_match('/^([a-z]+).*/', $form_state['triggering_element']['#name'], $matches)) {
  287. $subsection = $matches[1];
  288. switch($subsection) {
  289. case 'properties':
  290. return $form['properties']['property_table'];
  291. break;
  292. case 'dbxrefs':
  293. return $form['addtl_dbxrefs']['dbxref_table'];
  294. break;
  295. case 'relationships':
  296. return $form['relationships']['relationship_table'];
  297. break;
  298. }
  299. }
  300. }
  301. /**
  302. * @section
  303. * Sync Form
  304. */
  305. /**
  306. * Generic Sync Form to aid in sync'ing (create drupal nodes linking to chado content)
  307. * any chado node type.
  308. *
  309. * To use this you need to add a call to it from your hook_menu() and
  310. * add some additional information to your hook_node_info(). The Following code gives an
  311. * example of how this might be done:
  312. * @code
  313. function modulename_menu() {
  314. // the machine name of your module
  315. $module_name = 'tripal_example';
  316. // the base specified in hook_node_info
  317. $linking_table = 'chado_example';
  318. // This menu item will be a tab on the admin/tripal/chado/tripal_example page
  319. // that is not selected by default
  320. $items['admin/tripal/chado/tripal_example/sync'] = array(
  321. 'title' => ' Sync',
  322. 'description' => 'Sync examples from Chado with Drupal',
  323. 'page callback' => 'drupal_get_form',
  324. 'page arguments' => array('chado_node_sync_form', $module_name, $linking_table),
  325. 'access arguments' => array('administer tripal examples'),
  326. 'type' => MENU_LOCAL_TASK,
  327. 'weight' => 0
  328. );
  329. return $items;
  330. }
  331. function modulename_node_info() {
  332. return array(
  333. 'chado_example' => array(
  334. 'name' => t('example'),
  335. 'base' => 'chado_example',
  336. 'description' => t('A Chado example is a collection of material that can be sampled and have experiments performed on it.'),
  337. 'has_title' => TRUE,
  338. 'locked' => TRUE,
  339. // this is what differs from the regular Drupal-documented hook_node_info()
  340. 'chado_node_api' => array(
  341. 'base_table' => 'example', // the name of the chado base table
  342. 'hook_prefix' => 'chado_example', // usually the name of the node type
  343. 'record_type_title' => array(
  344. 'singular' => t('Example'), // Singular human-readable title
  345. 'plural' => t('Examples') // Plural human-readable title
  346. ),
  347. 'sync_filters' => array( // filters for syncing
  348. 'type_id' => TRUE, // TRUE if there is an example.type_id field
  349. 'organism_id' => TRUE, // TRUE if there is an example.organism_id field
  350. 'checkboxes' => array('name') // If the 'checkboxes' key is present then the
  351. // value must be an array of column names in
  352. // base table. The values from these columns will
  353. // be retreived, contentated with a space delimeter
  354. // and provided in a list of checkboxes
  355. // for the user to choose which to sync.
  356. ),
  357. )
  358. ),
  359. );
  360. }
  361. * @endcode
  362. *
  363. * For more information on how you can override some of this behaviour while still
  364. * benifiting from as much of the common architecture as possible see the following
  365. * functions: hook_chado_node_sync_create_new_node(), hook_chado_node_sync_form(),
  366. * hook_chado_node_sync_select_query().
  367. *
  368. * @ingroup tripal_chado_node_api
  369. */
  370. function chado_node_sync_form($form, &$form_state) {
  371. $form = array();
  372. if (isset($form_state['build_info']['args'][0])) {
  373. $module = $form_state['build_info']['args'][0];
  374. $linking_table = $form_state['build_info']['args'][1];
  375. $node_info = call_user_func($module . '_node_info');
  376. $args = $node_info[$linking_table]['chado_node_api'];
  377. $form_state['chado_node_api'] = $args;
  378. }
  379. $form['linking_table'] = array(
  380. '#type' => 'hidden',
  381. '#value' => $linking_table
  382. );
  383. // define the fieldsets
  384. $form['sync'] = array(
  385. '#type' => 'fieldset',
  386. '#title' => 'Sync ' . $args['record_type_title']['plural'],
  387. '#descrpition' => '',
  388. );
  389. $form['sync']['description'] = array(
  390. '#type' => 'item',
  391. '#value' => t("%title_plural of the types listed ".
  392. "below in the %title_singular Types box will be synced (leave blank to sync all types). You may limit the ".
  393. "%title_plural to be synced by a specific organism. Depending on the ".
  394. "number of %title_plural in the chado database this may take a long ".
  395. "time to complete. ",
  396. array(
  397. '%title_singular' => $args['record_type_title']['singular'],
  398. '%title_plural' => $args['record_type_title']['plural']
  399. )),
  400. );
  401. if ($args['sync_filters']['type_id']) {
  402. $form['sync']['type_ids'] = array(
  403. '#title' => t('%title_singular Types',
  404. array(
  405. '%title_singular' => $args['record_type_title']['singular'],
  406. '%title_plural' => $args['record_type_title']['plural']
  407. )),
  408. '#type' => 'textarea',
  409. '#description' => t("Enter the names of the %title_singular types to sync. " .
  410. "Leave blank to sync all %title_plural. Separate each type with a comma ".
  411. "or new line. Pages for these %title_singular ".
  412. "types will be created automatically for %title_plural that exist in the ".
  413. "chado database. The names must match ".
  414. "exactly (spelling and case) with terms in the ontologies",
  415. array(
  416. '%title_singular' => strtolower($args['record_type_title']['singular']),
  417. '%title_plural' => strtolower($args['record_type_title']['plural'])
  418. )),
  419. '#default_value' => (isset($form_state['values']['type_id'])) ? $form_state['values']['type_id'] : '',
  420. );
  421. }
  422. // get the list of organisms
  423. if ($args['sync_filters']['organism_id']) {
  424. $sql = "SELECT * FROM {organism} ORDER BY genus, species";
  425. $results = chado_query($sql);
  426. $organisms[] = '';
  427. foreach ($results as $organism) {
  428. $organisms[$organism->organism_id] = "$organism->genus $organism->species ($organism->common_name)";
  429. }
  430. $form['sync']['organism_id'] = array(
  431. '#title' => t('Organism'),
  432. '#type' => t('select'),
  433. '#description' => t("Choose the organism for which %title_plural types set above will be synced.",
  434. array(
  435. '%title_singular' => $args['record_type_title']['singular'],
  436. '%title_plural' => $args['record_type_title']['plural']
  437. )),
  438. '#options' => $organisms,
  439. '#default_value' => (isset($form_state['values']['organism_id'])) ? $form_state['values']['organism_id'] : 0,
  440. );
  441. }
  442. // get the list of organisms
  443. if (array_key_exists('checkboxes', $args['sync_filters'])) {
  444. // get the base schema
  445. $base_table = $args['base_table'];
  446. $table_info = chado_get_schema($base_table);
  447. // if the base table does not have a primary key or has more than one then
  448. // we can't proceed, otherwise, generate the checkboxes
  449. if (array_key_exists('primary key', $table_info) and count($table_info['primary key']) == 1) {
  450. $pkey = $table_info['primary key'][0];
  451. $columns = $args['sync_filters']['checkboxes'];
  452. $select_cols = implode("|| ' ' ||", $columns);
  453. // get non-synced records
  454. $sql = "
  455. SELECT BT.$pkey as id, $select_cols as value
  456. FROM {" . $base_table . "} BT
  457. LEFT JOIN public.$linking_table LT ON LT.$pkey = BT.$pkey
  458. WHERE LT.$pkey IS NULL
  459. ORDER BY value ASC
  460. ";
  461. $results = chado_query($sql);
  462. $values = array();
  463. foreach ($results as $result) {
  464. $values[$result->id] = $result->value;
  465. }
  466. if (count($values) > 0) {
  467. $form['sync']['ids'] = array(
  468. '#title' => 'Avaliable ' . $args['record_type_title']['plural'],
  469. '#type' => 'checkboxes',
  470. '#options' => $values,
  471. '#default_value' => (isset($form_state['values']['ids'])) ? $form_state['values']['ids'] : array(),
  472. '#suffix' => '</div><br>',
  473. '#prefix' => t("The following %title_plural have not been synced. Check those to be synced or leave all unchecked to sync them all.",
  474. array(
  475. '%title_singular' => strtolower($args['record_type_title']['singular']),
  476. '%title_plural' => strtolower($args['record_type_title']['plural'])
  477. )) . '<div style="height: 200px; overflow: scroll">',
  478. );
  479. }
  480. else {
  481. $form['sync']['no_ids'] = array(
  482. '#markup' => "<p>There are no " . strtolower($args['record_type_title']['plural']) . " to sync.</p>",
  483. );
  484. }
  485. }
  486. }
  487. // if we provide a list of checkboxes we shouldn't need a max_sync
  488. else {
  489. $form['sync']['max_sync'] = array(
  490. '#type' => 'textfield',
  491. '#title' => t('Maximum number of records to Sync'),
  492. '#description' => t('Leave this field empty to sync all records, regardless of number'),
  493. '#default_value' => (isset($form_state['values']['max_sync'])) ? $form_state['values']['max_sync'] : '',
  494. );
  495. }
  496. $form['sync']['button'] = array(
  497. '#type' => 'submit',
  498. '#value' => t('Sync ' . $args['record_type_title']['plural']),
  499. '#weight' => 3,
  500. );
  501. $form['cleanup'] = array(
  502. '#type' => 'fieldset',
  503. '#title' => t('Clean Up')
  504. );
  505. $form['cleanup']['description'] = array(
  506. '#markup' => t("<p>With Drupal and chado residing in different databases " .
  507. "it is possible that nodes in Drupal and " . strtolower($args['record_type_title']['plural']) . " in Chado become " .
  508. "\"orphaned\". This can occur if a node in Drupal is " .
  509. "deleted but the corresponding chado records is not and/or vice " .
  510. "versa. Click the button below to resolve these discrepancies.</p>"),
  511. '#weight' => -10,
  512. );
  513. $form['cleanup']['cleanup_batch_size'] = array(
  514. '#type' => 'textfield',
  515. '#title' => t('Batch Size'),
  516. '#description' => t('The number of records to analyze together in a batch. If you are having memory issues you might want to decrease this number.'),
  517. '#default_value' => variable_get('chado_node_api_cleanup_batch_size', 25000),
  518. );
  519. $form['cleanup']['button'] = array(
  520. '#type' => 'submit',
  521. '#value' => 'Clean up orphaned ' . strtolower($args['record_type_title']['plural']),
  522. '#weight' => 2,
  523. );
  524. // Allow each module to alter this form as needed
  525. $hook_form_alter = $args['hook_prefix'] . '_chado_node_sync_form';
  526. if (function_exists($hook_form_alter)) {
  527. $form = call_user_func($hook_form_alter, $form, $form_state);
  528. }
  529. return $form;
  530. }
  531. /**
  532. * Generic Sync Form Validate
  533. *
  534. * @ingroup tripal_core
  535. */
  536. function chado_node_sync_form_validate($form, &$form_state) {
  537. if (empty($form_state['values']['cleanup_batch_size'])) {
  538. $form_state['values']['cleanup_batch_size'] = 25000;
  539. drupal_set_message('You entered a Batch Size of 0 for Cleaning-up orphaned nodes. Since this is not valid, we reset it to the default of 25,000.', 'warning');
  540. }
  541. elseif (!is_numeric($form_state['values']['cleanup_batch_size'])) {
  542. form_set_error('cleanup_batch_size', 'The batch size must be a postitive whole number.');
  543. }
  544. else {
  545. // Round the value just to make sure.
  546. $form_state['values']['cleanup_batch_size'] = abs(round($form_state['values']['cleanup_batch_size']));
  547. }
  548. }
  549. /**
  550. * Generic Sync Form Submit
  551. *
  552. * @ingroup tripal_core
  553. */
  554. function chado_node_sync_form_submit($form, $form_state) {
  555. global $user;
  556. if (preg_match('/^Sync/', $form_state['values']['op'])) {
  557. // get arguments
  558. $args = $form_state['chado_node_api'];
  559. $module = $form_state['chado_node_api']['hook_prefix'];
  560. $base_table = $form_state['chado_node_api']['base_table'];
  561. $linking_table = $form_state['values']['linking_table'];
  562. // Allow each module to hijack the submit if needed
  563. $hook_form_hijack_submit = $args['hook_prefix'] . '_chado_node_sync_form_submit';
  564. if (function_exists($hook_form_hijack_submit)) {
  565. return call_user_func($hook_form_hijack_submit, $form, $form_state);
  566. }
  567. // Get the types separated into a consistent string
  568. $types = array();
  569. if (isset($form_state['values']['type_ids'])) {
  570. // seperate by new line or comma.
  571. $temp_types = preg_split("/[,\n\r]+/", $form_state['values']['type_ids']);
  572. // remove any extra spacing around the types
  573. for($i = 0; $i < count($temp_types); $i++) {
  574. // skip empty types
  575. if (trim($temp_types[$i]) == '') {
  576. continue;
  577. }
  578. $types[$i] = trim($temp_types[$i]);
  579. }
  580. }
  581. // Get the ids to be synced
  582. $ids = array();
  583. if (array_key_exists('ids', $form_state['values'])){
  584. foreach ($form_state['values']['ids'] as $id => $selected) {
  585. if ($selected) {
  586. $ids[] = $id;
  587. }
  588. }
  589. }
  590. // get the organism to be synced
  591. $organism_id = FALSE;
  592. if (array_key_exists('organism_id', $form_state['values'])) {
  593. $organism_id = $form_state['values']['organism_id'];
  594. }
  595. // Job Arguments
  596. $job_args = array(
  597. 'base_table' => $base_table,
  598. 'max_sync' => (!empty($form_state['values']['max_sync'])) ? $form_state['values']['max_sync'] : FALSE,
  599. 'organism_id' => $organism_id,
  600. 'types' => $types,
  601. 'ids' => $ids,
  602. 'inking_table' => $linking_table
  603. );
  604. $title = "Sync " . $args['record_type_title']['plural'];
  605. tripal_add_job($title, $module, 'chado_node_sync_records', $job_args, $user->uid);
  606. }
  607. if (preg_match('/^Clean up orphaned/', $form_state['values']['op'])) {
  608. $module = $form_state['chado_node_api']['hook_prefix'];
  609. $base_table = $form_state['chado_node_api']['base_table'];
  610. $job_args = array($base_table, $form_state['values']['cleanup_batch_size']);
  611. variable_set('chado_node_api_cleanup_batch_size', $form_state['values']['cleanup_batch_size']);
  612. tripal_add_job($form_state['values']['op'], $module, 'chado_cleanup_orphaned_nodes', $job_args, $user->uid);
  613. }
  614. }
  615. /**
  616. * Generic function for syncing records in Chado with Drupal nodes.
  617. *
  618. * @param $base_table
  619. * The name of the Chado table containing the record that should be synced
  620. * @param $max_sync
  621. * Optional: A numeric value to indicate the maximum number of records to sync.
  622. * @param $organism_id
  623. * Optional: Limit the list of records to be synced to only those that
  624. * are associated with this organism_id. If the record is not assocaited
  625. * with an organism then this field is not needed.
  626. * @param $types
  627. * Optional: Limit the list of records to be synced to only those that
  628. * match the types listed in this array.
  629. * @param $ids
  630. * Optional: Limit the list of records to bye synced to only those whose
  631. * primary key value matches the ID provided in this array.
  632. * @param $linking_table
  633. * Optional: Tripal maintains "linking" tables in the Drupal schema
  634. * to link Drupal nodes with Chado records. By default these tables
  635. * are named as 'chado_' . $base_table. But if for some reason the
  636. * linking table is not named in this way then it can be provided by this
  637. * argument.
  638. * @param $job_id
  639. * Optional. Used by the Trpial Jobs system when running this function
  640. * as a job. It is not needed othewise.
  641. *
  642. * @ingroup tripal_chado_node_api
  643. */
  644. function chado_node_sync_records($base_table, $max_sync = FALSE, $organism_id = FALSE,
  645. $types = array(), $ids = array(), $linking_table = FALSE, $job_id = NULL) {
  646. global $user;
  647. $base_table_id = $base_table . '_id';
  648. if (!$linking_table) {
  649. $linking_table = 'chado_' . $base_table;
  650. }
  651. print "\nSync'ing $base_table records. ";
  652. // START BUILDING QUERY TO GET ALL RECORD FROM BASE TABLE THAT MATCH
  653. $select = array("$base_table.*");
  654. $joins = array();
  655. $where_clauses = array();
  656. $where_args = array();
  657. // If types are supplied then handle them
  658. $restrictions = '';
  659. if (count($types) > 0) {
  660. $restrictions .= " Type(s): " . implode(', ',$types) . "\n";
  661. $select[] = 'cvterm.name as cvtname';
  662. $joins[] = "LEFT JOIN {cvterm} cvterm ON $base_table.type_id = cvterm.cvterm_id";
  663. foreach ($types as $type) {
  664. $sanitized_type = str_replace(' ','_',$type);
  665. $where_clauses['type'][] = "cvterm.name = :type_name_$sanitized_type";
  666. $where_args['type'][":type_name_$sanitized_type"] = $type;
  667. }
  668. }
  669. // if IDs have been supplied
  670. if ($ids) {
  671. $restrictions .= " Specific Records: " . count($ids) . " recored(s) specified.\n";
  672. foreach ($ids as $id) {
  673. $where_clauses['id'][] = "$base_table.$base_table_id = :id_$id";
  674. $where_args['id'][":id_$id"] = $id;
  675. }
  676. }
  677. // If Organism is supplied
  678. if ($organism_id) {
  679. $organism = chado_select_record('organism', array('*'), array('organism_id' => $organism_id));
  680. $restrictions .= " Organism: " . $organism[0]->genus . " " . $organism[0]->species . "\n";
  681. $select[] = 'organism.*';
  682. $joins[] = "LEFT JOIN {organism} organism ON organism.organism_id = $base_table.organism_id";
  683. $where_clauses['organism'][] = 'organism.organism_id = :organism_id';
  684. $where_args['organism'][':organism_id'] = $organism_id;
  685. }
  686. // Allow module to add to query
  687. $hook_query_alter = $linking_table . '_chado_node_sync_select_query';
  688. if (function_exists($hook_query_alter)) {
  689. $update = call_user_func($hook_query_alter, array(
  690. 'select' => $select,
  691. 'joins' => $joins,
  692. 'where_clauses' => $where_clauses,
  693. 'where_args' => $where_args,
  694. ));
  695. // Now add in any new changes
  696. if ($update and is_array($update)) {
  697. $select = $update['select'];
  698. $joins = $update['joins'];
  699. $where_clauses = $update['where_clauses'];
  700. $where_args = $update['where_args'];
  701. }
  702. }
  703. // Build Query, we do a left join on the chado_xxxx table in the Drupal schema
  704. // so that if no criteria are specified we only get those items that have not
  705. // yet been synced.
  706. $query = "
  707. SELECT " . implode(', ', $select) . ' ' .
  708. 'FROM {' . $base_table . '} ' . $base_table . ' ' . implode(' ', $joins) . ' '.
  709. " LEFT JOIN public.$linking_table CT ON CT.$base_table_id = $base_table.$base_table_id " .
  710. "WHERE CT.$base_table_id IS NULL ";
  711. // extend the where clause if needed
  712. $where = '';
  713. $sql_args = array();
  714. foreach ($where_clauses as $category => $items) {
  715. $where .= ' AND (';
  716. foreach ($items as $item) {
  717. $where .= $item . ' OR ';
  718. }
  719. $where = substr($where, 0, -4); // remove the trailing 'OR'
  720. $where .= ') ';
  721. $sql_args = array_merge($sql_args, $where_args[$category]);
  722. }
  723. if ($where) {
  724. $query .= $where;
  725. }
  726. $query .- " ORDER BY " . $base_table_id;
  727. // If Maximum number to Sync is supplied
  728. if ($max_sync) {
  729. $query .= " LIMIT $max_sync";
  730. $restrictions .= " Limited to $max_sync records.\n";
  731. }
  732. if ($restrictions) {
  733. print "Records matching these criteria will be synced: \n$restrictions";
  734. }
  735. else {
  736. print "\n";
  737. }
  738. // execute the query
  739. $results = chado_query($query, $sql_args);
  740. // Iterate through records that need to be synced
  741. $count = $results->rowCount();
  742. $interval = intval($count * 0.01);
  743. if ($interval < 1) {
  744. $interval = 1;
  745. }
  746. print "\n$count $base_table records found.\n";
  747. $i = 0;
  748. $transaction = db_transaction();
  749. print "\nNOTE: Syncing is performed using a database transaction. \n" .
  750. "If the sync fails or is terminated prematurely then the entire set of \n" .
  751. "synced items is rolled back and will not be found in the database\n\n";
  752. try {
  753. $percent = 0;
  754. foreach ($results as $record) {
  755. //print "\nLoading $base_table " . ($i + 1) . " of $count ($base_table_id=" . $record->{$base_table_id} . ")...";
  756. // update the job status every 1% features
  757. if ($job_id and $i % $interval == 0) {
  758. $percent = sprintf("%.2f", (($i + 1) / $count) * 100);
  759. print "Syncing $base_table " . ($i + 1) . " of $count (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes.\r";
  760. tripal_set_job_progress($job_id, intval(($i/$count)*100));
  761. }
  762. // Check if it is in the chado linking table (ie: check to see if it is already linked to a node)
  763. $result = db_select($linking_table, 'lnk')
  764. ->fields('lnk',array('nid'))
  765. ->condition($base_table_id, $record->{$base_table_id}, '=')
  766. ->execute()
  767. ->fetchObject();
  768. if (!empty($result)) {
  769. //print " Previously Sync'd";
  770. }
  771. else {
  772. // Create generic new node
  773. $new_node = new stdClass();
  774. $new_node->type = $linking_table;
  775. $new_node->uid = $user->uid;
  776. $new_node->{$base_table_id} = $record->{$base_table_id};
  777. $new_node->$base_table = $record;
  778. $new_node->language = LANGUAGE_NONE;
  779. // TODO: should we get rid of this hook and use hook_node_presave() instead?
  780. // allow base module to set additional fields as needed
  781. $hook_create_new_node = $linking_table . '_chado_node_sync_create_new_node';
  782. if (function_exists($hook_create_new_node)) {
  783. $new_node = call_user_func($hook_create_new_node, $new_node, $record);
  784. }
  785. // Validate and Save New Node
  786. $form = array();
  787. $form_state = array();
  788. node_validate($new_node, $form, $form_state);
  789. if (!form_get_errors()) {
  790. $node = node_submit($new_node);
  791. node_save($node);
  792. //print " Node Created (nid=".$node->nid.")";
  793. }
  794. else {
  795. watchdog('trp-fsync', "Failed to insert $base_table: %title", array('%title' => $new_node->title), WATCHDOG_ERROR);
  796. }
  797. }
  798. $i++;
  799. }
  800. print "\n\nComplete!\n";
  801. }
  802. catch (Exception $e) {
  803. $transaction->rollback();
  804. print "\n"; // make sure we start errors on new line
  805. watchdog_exception('trp-fsync', $e);
  806. print "FAILED: Rolling back database changes...\n";
  807. }
  808. }
  809. /**
  810. * This function is a wrapper for the chado_cleanup_orphaned_nodes function.
  811. * It breaks up the work of chado_cleanup_orphaned_nodes into smaller pieces
  812. * that are more managable for servers that may have low php memory settings.
  813. *
  814. * @param $table
  815. * The name of the table that corresonds to the node type we want to clean up.
  816. * @param $nentries
  817. * The number of entries to parse at one time (ie: the batch size).
  818. * @param $job_id
  819. * This should be the job id from the Tripal jobs system. This function
  820. * will update the job status using the provided job ID.
  821. *
  822. * @ingroup tripal_chado_node_api
  823. */
  824. function chado_cleanup_orphaned_nodes($table, $nentries = 25000, $job_id = NULL) {
  825. $count = 0;
  826. // Find the total number of entries in the table.
  827. $dsql = "SELECT COUNT(*) FROM {node} WHERE type = 'chado_" . $table . "'";
  828. $clsql= "SELECT COUNT(*) FROM {chado_" . $table . "}";
  829. // Find the number nodes of type chado_$table and find the number of entries
  830. // in chado_$table; keep the larger of the two numbers.
  831. $ndat = db_query($dsql);
  832. $temp = $ndat->fetchObject();
  833. $count = $temp->count;
  834. $cdat = db_query($clsql);
  835. $temp = $cdat->fetchObject();
  836. if(count < $temp->count) {
  837. $count = $temp->count;
  838. }
  839. $m = ceil($count / $nentries);
  840. for($i = 0; $i < $m; $i++) {
  841. $offset = ($nentries*$i)+1;
  842. chado_cleanup_orphaned_nodes_part($table, $job_id, $nentries, $offset);
  843. }
  844. return '';
  845. }
  846. /**
  847. * This function will delete Drupal nodes for any sync'ed table (e.g.
  848. * feature, organism, analysis, stock, library) if the chado record has been
  849. * deleted or the entry in the chado_[table] table has been removed.
  850. *
  851. * @param $table
  852. * The name of the table that corresonds to the node type we want to clean up.
  853. * @param $job_id
  854. * This should be the job id from the Tripal jobs system. This function
  855. * will update the job status using the provided job ID.
  856. *
  857. * @ingroup tripal_chado_node_api
  858. */
  859. function chado_cleanup_orphaned_nodes_part($table, $job_id = NULL, $nentries, $offset) {
  860. $count = 0;
  861. // Change this variable to TRUE to print debugging values for memory leaks.
  862. $debug_memory_leak = FALSE;
  863. // build the SQL statments needed to check if nodes point to valid analyses
  864. $dsql = "SELECT * FROM {node} WHERE type = 'chado_" . $table . "' ORDER BY nid LIMIT $nentries OFFSET $offset";
  865. $nsql = "SELECT * FROM {node} WHERE nid = :nid";
  866. $csql = "SELECT * FROM {chado_" . $table . "} WHERE nid = :nid ";
  867. $clsql= "SELECT * FROM {chado_" . $table . "} ORDER BY nid LIMIT $nentries OFFSET $offset";
  868. $lsql = "SELECT * FROM {" . $table . "} where " . $table . "_id = :" . $table . "_id ";
  869. // load the chado_$table into an array
  870. print "Getting chado_$table\n";
  871. $cnodes = array();
  872. $res = db_query($clsql);
  873. foreach ($res as $node) {
  874. $cnodes[$count] = $node;
  875. $count++;
  876. }
  877. // Free $res.
  878. $mu = ($debug_memory_leak) ? memory_get_usage() : 0;
  879. $res = NULL;
  880. if ($debug_memory_leak) {
  881. print "\tFreeing res: " . ($mu - memory_get_usage()) ." bytes\n";
  882. }
  883. $interval = intval($count * 0.01);
  884. if ($interval < 1) {
  885. $interval = 1;
  886. }
  887. // iterate through all of the chado_$table entries and remove those
  888. // that don't have a node or don't have a $table record in chado.libary
  889. print "Verifying all chado_$table Entries\n";
  890. $deleted = 0;
  891. foreach ($cnodes as $nid) {
  892. // update the job status every 1% analyses
  893. if ($job_id and $i % $interval == 0) {
  894. tripal_set_job_progress($job_id, intval(($i / $count) * 100));
  895. }
  896. // see if the node exits, if not remove the entry from the chado_$table table
  897. $results = db_query($nsql, array(':nid' => $nid->nid));
  898. $node = $results->fetchObject();
  899. if (!$node) {
  900. $deleted++;
  901. db_query("DELETE FROM {chado_" . $table . "} WHERE nid = :nid", array(':nid' => $nid->nid));
  902. $message = "chado_$table missing node.... DELETING: $nid->nid";
  903. watchdog('tripal_core', $message, array(), WATCHDOG_WARNING);
  904. }
  905. // see if the record in chado exist, if not remove the entry from the chado_$table
  906. $table_id = $table . "_id";
  907. $results = chado_query($lsql, array(":" . $table . "_id" => $nid->$table_id));
  908. $record = $results->fetchObject();
  909. if (!$record) {
  910. $deleted++;
  911. $sql = "DELETE FROM {chado_" . $table . "} WHERE " . $table . "_id = :" . $table . "_id";
  912. db_query($sql, array(":" . $table . "_id" => $nid->$table_id));
  913. $message = "chado_$table missing $table.... DELETING entry.";
  914. watchdog('tripal_core', $message, array(), WATCHDOG_NOTICE);
  915. }
  916. $i++;
  917. }
  918. // Freeing up various resources: $cnodes, $node, $record, and $results.
  919. $mu = ($debug_memory_leak) ? memory_get_usage() : 0;
  920. $cnodes = NULL;
  921. if ($debug_memory_leak) {
  922. print "\tFreeing cnodes: " . ($mu - memory_get_usage()) ." bytes\n";
  923. }
  924. $mu = ($debug_memory_leak) ? memory_get_usage() : 0;
  925. $results = NULL;
  926. if ($debug_memory_leak) {
  927. print "\tFreeing results: " . ($mu - memory_get_usage()) ." bytes\n";
  928. }
  929. $mu = ($debug_memory_leak) ? memory_get_usage() : 0;
  930. $node = NULL;
  931. if ($debug_memory_leak) {
  932. print "\tFreeing node: " . ($mu - memory_get_usage()) ." bytes\n";
  933. }
  934. $mu = ($debug_memory_leak) ? memory_get_usage() : 0;
  935. $record = NULL;
  936. if ($debug_memory_leak) {
  937. print "\tFreeing record: " . ($mu - memory_get_usage()) ." bytes\n";
  938. }
  939. print "\t$deleted chado_$table entries missing either a node or chado entry.\n";
  940. // iterate through all of the nodes and delete those that don't
  941. // have a corresponding entry in chado_$table
  942. $deleted = 0;
  943. // load into nodes array
  944. print "Getting nodes\n";
  945. $nodes = array();
  946. $res = db_query($dsql);
  947. foreach ($res as $node) {
  948. $nodes[$count] = $node;
  949. $count++;
  950. }
  951. // Free $res.
  952. $mu = ($debug_memory_leak) ? memory_get_usage() : 0;
  953. $res = NULL;
  954. if ($debug_memory_leak) {
  955. print "\tFreeing res:\n" . ($mu - memory_get_usage()) ." bytes\n";
  956. }
  957. foreach ($nodes as $node) {
  958. // update the job status every 1% libraries
  959. if ($job_id and $i % $interval == 0) {
  960. tripal_set_job_progress($job_id, intval(($i / $count) * 100));
  961. }
  962. // check to see if the node has a corresponding entry
  963. // in the chado_$table table. If not then delete the node.
  964. $results = db_query($csql, array(":nid" => $node->nid));
  965. $link = $results->fetchObject();
  966. if (!$link) {
  967. if (node_access('delete', $node)) {
  968. $deleted++;
  969. $message = "Node missing in chado_$table table.... DELETING node $node->nid";
  970. watchdog("tripal_core", $message, array(), WATCHDOG_NOTICE);
  971. node_delete($node->nid);
  972. }
  973. else {
  974. $message = "Node missing in chado_$table table.... but cannot delete due to improper permissions (node $node->nid)";
  975. watchdog("tripal_core", $message, array(), WATCHDOG_WARNING);
  976. }
  977. }
  978. $i++;
  979. }
  980. // // Freeing up various resources: $results, $link and $nodes
  981. $mu = ($debug_memory_leak) ? memory_get_usage() : 0;
  982. $results = NULL;
  983. if ($debug_memory_leak) {
  984. print "\tFreeing results: " . ($mu - memory_get_usage()) . " bytes\n";
  985. }
  986. $mu = ($debug_memory_leak) ? memory_get_usage() : 0;
  987. $link = NULL;
  988. if ($debug_memory_leak) {
  989. print "\tFreeing link: " . ($mu - memory_get_usage()) . " bytes\n";
  990. }
  991. $mu = ($debug_memory_leak) ? memory_get_usage() : 0;
  992. $nodes = NULL;
  993. if ($debug_memory_leak) {
  994. print "\tFreeing nodes: " . ($mu - memory_get_usage()) ." bytes\n";
  995. }
  996. print "\t$deleted nodes did not have corresponding chado_$table entries.\n";
  997. return '';
  998. }
  999. /**
  1000. * Create New Node
  1001. *
  1002. * Note: For your own module, replace hook in the function name with the machine-name of
  1003. * your chado node type (ie: chado_feature).
  1004. *
  1005. * @param $new_node:
  1006. * a basic new node object
  1007. * @param $record:
  1008. * the record object from chado specifying the biological data for this node
  1009. *
  1010. * @return
  1011. * A node object containing all the fields necessary to create a new node during sync
  1012. *
  1013. * @ingroup tripal_chado_node_api
  1014. */
  1015. function hook_chado_node_sync_create_new_node($new_node, $record) {
  1016. // Add relevant chado details to the new node object
  1017. // This really only needs to be the fields from the node used during node creation
  1018. // including values used to generate the title, etc.
  1019. // All additional chado data will be added via nodetype_load when the node is later used
  1020. $new_node->uniquename = $record->uniquename;
  1021. return $new_node;
  1022. }
  1023. /**
  1024. * Alter the sync form (optional)
  1025. *
  1026. * This might be necessary if you need additional filtering options for choosing which
  1027. * chado records to sync or even if you just want to further customize the help text
  1028. * provided by the form.
  1029. *
  1030. * Note: For your own module, replace hook in the function name with the machine-name of
  1031. * your chado node type (ie: chado_feature).
  1032. *
  1033. * @ingroup tripal_chado_node_api
  1034. */
  1035. function hook_chado_node_sync_form($form, &$form_state) {
  1036. // Change or add to the form array as needed
  1037. // Any changes should be made in accordance with the Drupal Form API
  1038. return $form;
  1039. }
  1040. /**
  1041. * Bypass chado node api sync form submit (optional). Allows you to use this function
  1042. * as your own submit.
  1043. *
  1044. * This might be necessary if you want to add additional arguements to the tripal job or
  1045. * to call your own sync'ing function if the generic chado_node_sync_records() is not
  1046. * sufficient.
  1047. *
  1048. * Note: For your own module, replace hook in the function name with the machine-name of
  1049. * your chado node type (ie: chado_feature).
  1050. *
  1051. * @ingroup tripal_chado_node_api
  1052. */
  1053. function hook_chado_node_sync_form_submit ($form, $form_state) {
  1054. global $user;
  1055. $job_args = array(
  1056. $base_table, // the base chado table (ie: feature)
  1057. $max_sync, // the maximum number of records to sync or FALSE for sync all that match
  1058. $organism_id, // the organism_id to restrict records to or FALSE if not to restrict by organism_id
  1059. $types // A string with the cvterm.name of the types to restrict to separated by |||
  1060. );
  1061. // You should register a tripal job
  1062. tripal_add_job(
  1063. $title, // the title of the job -be descriptive
  1064. $module, // the name of your module
  1065. 'chado_node_sync_records', // the chado node api sync function
  1066. $job_args, // an array with the arguments to pass to the above function
  1067. $user->uid // the user who submitted the job
  1068. );
  1069. }
  1070. /**
  1071. * Alter the query that retrieves records to be sync'd (optional)
  1072. *
  1073. * This might be necessary if you need fields from other chado tables to create your node
  1074. * or if your chado node type only supports a subset of a given table (ie: a germplasm node
  1075. * type might only support node creation for cerain types of stock records in which case
  1076. * you would need to filter the results to only those types).
  1077. *
  1078. * Note: For your own module, replace hook in the function name with the machine-name of
  1079. * your chado node type (ie: chado_feature).
  1080. *
  1081. * @param $query
  1082. * An array containing the following:
  1083. * 'select': An array of select clauses
  1084. * 'joins: An array of joins (ie: a single join could be
  1085. * 'LEFT JOIN {chadotable} alias ON base.id=alias.id')
  1086. * 'where_clauses: An array of where clauses which will all be AND'ed
  1087. * together. Use :placeholders for values.
  1088. * 'where_args: An associative array of arguments to be subbed in to the
  1089. * where clause where the
  1090. *
  1091. * @ingroup tripal_chado_node_api
  1092. */
  1093. function hook_chado_node_sync_select_query($query) {
  1094. // You can add fields to be selected. Be sure to prefix each field with the
  1095. // tale name.
  1096. $query['select'][] = 'example.myfavfield';
  1097. // Provide any join you may need to the joins array. Be sure to wrap the
  1098. // table name in curly brackets.
  1099. $query['joins'][] = 'LEFT JOIN {exampleprop} PROP ON PROP.example_id=EXAMPLE.example_id';
  1100. // The category should be a unique id for a group of items that will be
  1101. // concatenated together via an SQL 'OR'. By default the $where_clases
  1102. // variable will come with categories of 'id', 'organism' and 'type'.
  1103. // you can add your own unique category or alter the contents of the existing
  1104. // categories. Be sure to make sure the category doesn't already exist
  1105. // in the $query['where_clauses']
  1106. $category = 'my_category';
  1107. // Provide any aditionall where clauses and their necessary arguments.
  1108. // Be sure to prefix the field with the table name. Be sure that the
  1109. // placeholder is unique across all categories (perhaps add a unique
  1110. // prefix/suffix).
  1111. $query['where_clauses'][$category][] = 'example.myfavfield = :favvalue';
  1112. $query['where_args'][$category][':favvalue'] = 'awesome-ness';
  1113. // Must return the updated query
  1114. return $query;
  1115. }