tripal_feature.sync_features.inc 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449
  1. <?php
  2. /**
  3. * @file
  4. * @todo Add file header description
  5. */
  6. /**
  7. *
  8. */
  9. function tripal_feature_sync_form() {
  10. $form['description'] = array(
  11. '#markup' => t("Add feature types, optionally select an organism and " .
  12. "click the 'Sync all Features' button to create Drupal " .
  13. "content for features in chado. Only features of the types listed " .
  14. "below in the Feature Types box will be synced. You may limit the " .
  15. "features to be synced by a specific organism. Depending on the " .
  16. "number of features in the chado database this may take a long " .
  17. "time to complete. "),
  18. );
  19. $form['feature_types'] = array(
  20. '#title' => t('Feature Types'),
  21. '#type' => 'textarea',
  22. '#description' => t("Enter the names of the feature types to sync. Pages for these feature " .
  23. "types will be created automatically for features that exist in the " .
  24. "chado database. The names listed here should be spearated by " .
  25. "spaces or entered separately on new lines. The names must match " .
  26. "exactly (spelling and case) with terms in the sequence ontology"),
  27. '#required' => TRUE,
  28. '#default_value' => variable_get('chado_sync_feature_types', 'gene mRNA'),
  29. );
  30. // get the list of organisms
  31. $sql = "SELECT * FROM {organism} ORDER BY genus, species";
  32. $orgs = tripal_organism_get_synced();
  33. $organisms[] = '';
  34. foreach ($orgs as $organism) {
  35. $organisms[$organism->organism_id] = "$organism->genus $organism->species ($organism->common_name)";
  36. }
  37. $form['organism_id'] = array(
  38. '#title' => t('Organism'),
  39. '#type' => t('select'),
  40. '#description' => t("Choose the organism for which features types set above will be synced. Only organisms which also have been synced will appear in this list."),
  41. '#options' => $organisms,
  42. );
  43. $form['button'] = array(
  44. '#type' => 'submit',
  45. '#value' => t('Sync all Features'),
  46. '#weight' => 3,
  47. );
  48. get_tripal_feature_admin_form_cleanup_set($form);
  49. return $form;
  50. }
  51. /**
  52. *
  53. *
  54. * @ingroup tripal_feature
  55. */
  56. function get_tripal_feature_admin_form_cleanup_set(&$form) {
  57. $form['cleanup'] = array(
  58. '#type' => 'fieldset',
  59. '#title' => t('Clean Up'),
  60. '#collapsible' => FALSE,
  61. '#collapsed' => FALSE,
  62. );
  63. $form['cleanup']['description'] = array(
  64. '#markup' => t("With Drupal and chado residing in different databases " .
  65. "it is possible that nodes in Drupal and features in Chado become " .
  66. "\"orphaned\". This can occur if a feature node in Drupal is " .
  67. "deleted but the corresponding chado feature is not and/or vice " .
  68. "versa. The Cleanup function will also remove nodes for features " .
  69. "that are not in the list of allowed feature types as specified " .
  70. "above. This is helpful when a feature type needs to be " .
  71. "removed but was previously present as Drupal nodes. " .
  72. "Click the button below to resolve these discrepancies."),
  73. '#weight' => 1,
  74. );
  75. $form['cleanup']['button'] = array(
  76. '#type' => 'submit',
  77. '#value' => t('Clean up orphaned features'),
  78. '#weight' => 2,
  79. );
  80. }
  81. /**
  82. *
  83. */
  84. function tripal_feature_sync_form_validate($form, &$form_state) {
  85. $organism_id = $form_state['values']['organism_id'];
  86. $feature_types = $form_state['values']['feature_types'];
  87. }
  88. /**
  89. *
  90. */
  91. function tripal_feature_sync_form_submit($form, &$form_state) {
  92. global $user;
  93. $organism_id = $form_state['values']['organism_id'];
  94. $feature_types = $form_state['values']['feature_types'];
  95. switch ($form_state['values']['op']) {
  96. case t('Sync all Features') :
  97. $job_args = array(0, $organism_id, $feature_types);
  98. if ($organism_id) {
  99. $organism = tripal_core_chado_select('organism', array('genus', 'species'), array('organism_id' => $organism_id));
  100. $title = "Sync all features for " . $organism[0]->genus . " " . $organism[0]->species;
  101. }
  102. else {
  103. $title = t('Sync all features for all synced organisms');
  104. }
  105. variable_set('chado_sync_feature_types', $feature_types);
  106. tripal_add_job($title, 'tripal_feature', 'tripal_feature_sync_features', $job_args, $user->uid);
  107. break;
  108. case t('Clean up orphaned features') :
  109. $job_args = array();
  110. tripal_add_job('Cleanup orphaned features', 'tripal_feature',
  111. 'tripal_features_cleanup', $job_args, $user->uid);
  112. break;
  113. }
  114. }
  115. /**
  116. *
  117. * @param $na
  118. * Tripal expects all jobs to have at least one argument. For this function
  119. * we don't need any, so we have this dummy argument as a filler
  120. * @param $job_id
  121. */
  122. function tripal_feature_set_urls($na = NULL, $job = NULL) {
  123. // begin the transaction
  124. db_query("BEGIN");
  125. print "\nNOTE: Setting of URLs is performed using a database transaction. \n" .
  126. "If the load fails or is terminated prematurely then the entire set of \n" .
  127. "new URLs will be rolled back and no changes will be made\n\n";
  128. // get the number of records we need to set URLs for
  129. $csql = "SELECT count(*) FROM {chado_feature}";
  130. $num_nodes = db_query($csql)->fetchField();
  131. // calculate the interval at which we will print an update on the screen
  132. $num_set = 0;
  133. $num_per_interval = 100;
  134. // prepate the statements which will quickly add url alias. Because these
  135. // are not Chado tables we must manually prepare them
  136. $psql = "
  137. PREPARE del_url_alias_by_src (text) AS
  138. DELETE FROM {url_alias} WHERE source = \$1
  139. ";
  140. db_query($psql);
  141. $psql = "
  142. PREPARE ins_url_alias_nisrds (text, text) AS
  143. INSERT INTO url_alias (source, alias) VALUES (\$1, \$2)
  144. ";
  145. db_query($psql);
  146. // get the URL alias syntax string
  147. $url_alias = variable_get('chado_feature_url_string', '/feature/[genus]/[species]/[type]/[uniquename]');
  148. if (!$url_alias) {
  149. $url_alias = '/feature/[genus]/[species]/[type]/[uniquename]';
  150. }
  151. $url_alias = preg_replace('/^\//', '', $url_alias); // remove any preceeding forward slash
  152. // get the list of features that have been synced
  153. $sql = "SELECT * FROM {chado_feature}";
  154. $nodes = db_query($sql);
  155. while ($node = $nodes->fetchObject()) {
  156. // get the URL alias
  157. $src = "node/$node->nid";
  158. $dst = tripal_feature_get_feature_url($node, $url_alias);
  159. if (!$dst) {
  160. db_query('DEALLOCATE "del_url_alias_by_src"');
  161. db_query('DEALLOCATE "ins_url_alias_nisrds"');
  162. db_query("ROLLBACK");
  163. return;
  164. }
  165. // if the src and dst is the same (the URL alias couldn't be set)
  166. // then skip to the next one. There's nothing we can do about this one.
  167. if ($src == $dst) {
  168. continue;
  169. }
  170. // remove any previous alias and then add the new one
  171. $success = db_query("EXECUTE del_url_alias_by_src(:source)", array(':source' => $src));
  172. if (!$success) {
  173. db_query('DEALLOCATE "del_url_alias_by_src"');
  174. db_query('DEALLOCATE "ins_url_alias_nisrds"');
  175. db_query("ROLLBACK");
  176. watchdog('trp-seturl', "Failed Removing URL Alias: %source", array('%source' => $src), WATCHDOG_ERROR);
  177. return;
  178. }
  179. $success = db_query("EXECUTE ins_url_alias_nisrds(:source, :alias)", array(':source' => $src, ':alias' => $dst));
  180. if (!$success) {
  181. db_query('DEALLOCATE "del_url_alias_by_src"');
  182. db_query('DEALLOCATE "ins_url_alias_nisrds"');
  183. db_query("ROLLBACK");
  184. watchdog('trp-seturl', "Failed Adding URL Alias: %alias", array('%alias' => $dst), WATCHDOG_ERROR);
  185. return;
  186. }
  187. // update the job status every 1% features
  188. if ($job and $num_set % $num_per_interval == 0) {
  189. $percent = ($num_set / $num_nodes) * 100;
  190. tripal_job_set_progress($job, intval($percent));
  191. $percent = sprintf("%.2f", $percent);
  192. print "Setting URLs (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes.\r";
  193. }
  194. $num_set++;
  195. }
  196. $percent = ($num_set / $num_nodes) * 100;
  197. tripal_job_set_progress($job, intval($percent));
  198. $percent = sprintf("%.2f", $percent);
  199. print "Setting URLs (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes.\r";
  200. print "\nDone. Set " . number_format($num_set) . " URLs\n";
  201. // unprepare the statements
  202. db_query('DEALLOCATE "del_url_alias_by_src"');
  203. db_query('DEALLOCATE "ins_url_alias_nisrds"');
  204. db_query("COMMIT");
  205. }
  206. /**
  207. *
  208. * @param $node
  209. * A node object containing at least the feature_id and nid
  210. * @param $url_alias
  211. * Optional. This should be the URL alias syntax string that contains
  212. * placeholders such as [id], [genus], [species], [name], [uniquename],
  213. * and [type]. These placeholders will be substituted for actual values.
  214. * If this parameter is not provided then the value of the
  215. * chado_feature_url_string Drupal variable will be used.
  216. */
  217. function tripal_feature_get_feature_url($node, $url_alias = NULL) {
  218. // get the starting URL alias
  219. if (!$url_alias) {
  220. $url_alias = variable_get('chado_feature_url_string', '/feature/[genus]/[species]/[type]/[uniquename]');
  221. if (!$url_alias) {
  222. $url_alias = '/feature/[genus]/[species]/[type]/[uniquename]';
  223. }
  224. $url_alias = preg_replace('/^\//', '', $url_alias); // remove any preceeding forward slash
  225. }
  226. // get the feature
  227. $values = array('feature_id' => $node->feature_id);
  228. $options = array('statement_name' => 'sel_feature_id');
  229. $feature = tripal_core_chado_select('feature', array('*'), $values, $options);
  230. if (!$feature) {
  231. watchdog('trp-seturl', "Cannot find feature when setting URL alias for feature: %id", array('%id' => $node->feature_id), WATCHDOG_ERROR);
  232. return FALSE;
  233. }
  234. $feature = (object) $feature[0];
  235. // get the organism
  236. $values = array('organism_id' => $feature->organism_id);
  237. $options = array('statement_name' => 'sel_organism_id');
  238. $organism = tripal_core_chado_select('organism', array('*'), $values, $options);
  239. if (!$organism) {
  240. watchdog('trp-seturl', "Cannot find organism when setting URL alias for feature: %id", array('%id' => $node->feature_id), WATCHDOG_ERROR);
  241. return FALSE;
  242. }
  243. $genus = preg_replace('/\s/', '_', strtolower($organism[0]->genus));
  244. $species = preg_replace('/\s/', '_', strtolower($organism[0]->species));
  245. // get the type
  246. $values = array('cvterm_id' => $feature->type_id);
  247. $options = array('statement_name' => 'sel_cvterm_id');
  248. $cvterm = tripal_core_chado_select('cvterm', array('name'), $values, $options);
  249. if (!$cvterm) {
  250. watchdog('trp-seturl', "Cannot find type when setting URL alias for feature: %id", array('%id' => $node->feature_id), WATCHDOG_ERROR);
  251. return FALSE;
  252. }
  253. $type = preg_replace('/\s/', '_', $cvterm[0]->name);
  254. // now substitute in the values
  255. $url_alias = preg_replace('/\[id\]/', $feature->feature_id, $url_alias);
  256. $url_alias = preg_replace('/\[genus\]/', $genus, $url_alias);
  257. $url_alias = preg_replace('/\[species\]/', $species, $url_alias);
  258. $url_alias = preg_replace('/\[type\]/', $type, $url_alias);
  259. $url_alias = preg_replace('/\[name\]/', $feature->name, $url_alias);
  260. $url_alias = preg_replace('/\[uniquename\]/', $feature->uniquename, $url_alias);
  261. // the dst field of the url_alias table is only 128 characters long.
  262. // if this is the case then simply return the node URL, we can't set this one
  263. if (strlen($url_alias) > 128) {
  264. watchdog('trp-seturl', "Cannot set alias longer than 128 characters: %alias.", array('%alias' => $url_alias), WATCHDOG_ERROR);
  265. return "node/" . $node->nid;
  266. }
  267. return $url_alias;
  268. }
  269. /**
  270. *
  271. *
  272. * @ingroup tripal_feature
  273. */
  274. function tripal_feature_sync_features($max_sync = 0, $organism_id = NULL,
  275. $feature_types = NULL, $job_id = NULL) {
  276. global $user;
  277. // get the list of available sequence ontology terms for which
  278. // we will build drupal pages from features in chado. If a feature
  279. // is not one of the specified typse we won't build a node for it.
  280. if (!$feature_types) {
  281. $allowed_types = variable_get('chado_sync_feature_types', 'gene mRNA');
  282. }
  283. else {
  284. $allowed_types = $feature_types;
  285. }
  286. $allowed_types = preg_replace("/[\s\n\r]+/", " ", $allowed_types);
  287. print "Looking for features of type: $allowed_types\n";
  288. $so_terms = split(' ', $allowed_types);
  289. $where_cvt = "";
  290. $args = array();
  291. $i = 0;
  292. foreach ($so_terms as $term) {
  293. $where_cvt .= "CVT.name = :term$i OR ";
  294. $args[":term$i"] = $term;
  295. $i++;
  296. }
  297. $where_cvt = drupal_substr($where_cvt, 0, drupal_strlen($where_cvt)-3); # strip trailing 'OR'
  298. // get the list of organisms that are synced and only include features from
  299. // those organisms
  300. $orgs = tripal_organism_get_synced();
  301. $where_org = "";
  302. $i = 0;
  303. foreach ($orgs as $org) {
  304. if ($organism_id) {
  305. if ($org->organism_id and $org->organism_id == $organism_id) {
  306. $where_org .= "F.organism_id = :org_id$i OR ";
  307. $args[":org_id$i"] = $org->organism_id;
  308. }
  309. }
  310. else {
  311. if ($org->organism_id) {
  312. $where_org .= "F.organism_id = :org_id$i OR ";
  313. $args[":org_id$i"] = $org->organism_id;
  314. }
  315. }
  316. $i++;
  317. }
  318. $where_org = drupal_substr($where_org, 0, drupal_strlen($where_org)-3); # strip trailing 'OR'
  319. // get the list of features that we will sync
  320. $sql = "
  321. SELECT F.*, CVT.name as cvtname, O.genus, O.species
  322. FROM {feature} F
  323. INNER JOIN {cvterm} CVT ON F.type_id = CVT.cvterm_id
  324. INNER JOIN {cv} CV ON CV.cv_id = CVT.cv_id
  325. INNER JOIN {organism} O ON O.organism_id = F.organism_id
  326. LEFT JOIN public.chado_feature CF ON CF.feature_id = F.feature_ID
  327. WHERE
  328. ($where_cvt) AND ($where_org) AND CV.name = 'sequence' AND
  329. CF.feature_id IS NULL
  330. ORDER BY feature_id";
  331. print_r($sql);
  332. print_r($args);
  333. $results = chado_query($sql, $args);
  334. // Iterate through features that need to be synced
  335. $count = $results->rowCount();
  336. $interval = intval($count * 0.01);
  337. if ($interval < 1) {
  338. $interval = 1;
  339. }
  340. $i = 0;
  341. $transaction = db_transaction();
  342. try {
  343. // tripal_feature_set_vocabulary();
  344. print "Loading feature $i of $count (0.00%). Memory: " . number_format(memory_get_usage()) . " bytes\r";
  345. foreach ($results as $feature) {
  346. // update the job status every 1% features
  347. if ($job_id and $i % $interval == 0) {
  348. $percent = sprintf("%.2f", ($i / $count) * 100);
  349. print "Parsing Line $line_num (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes.\r";
  350. tripal_job_set_progress($job_id, intval(($i/$count)*100));
  351. }
  352. $new_node = new stdClass();
  353. $new_node->type = 'chado_feature';
  354. $new_node->uid = $user->uid;
  355. $new_node->feature_id = $feature->feature_id;
  356. // set these values as they are needed for constructing the title and
  357. // the match the names of the fields in the feature form
  358. $new_node->organism_id = $feature->organism_id;
  359. $new_node->fname = $feature->name;
  360. $new_node->uniquename = $feature->uniquename;
  361. $new_node->feature_type = $feature->cvtname;
  362. $form = array(); // dummy variable
  363. $form_state = array(); // dummy variable
  364. node_validate($new_node, $form, $form_state);
  365. if (!form_get_errors()) {
  366. $node = node_submit($new_node);
  367. node_save($node);
  368. }
  369. else {
  370. watchdog('tfeature_sync', "Unable to create feature node. ID: %feature_id, Name: %name.",
  371. array('%feature_id' => $feature->feature_id, '%name' => $feature->uniquename), WATCHDOG_WARNING);
  372. }
  373. $i++;
  374. }
  375. }
  376. catch (Exception $e) {
  377. print "\n"; // make sure we start errors on new line
  378. watchdog_exception('trp-fsync', $e);
  379. $transaction->rollback();
  380. print "FAILED: Rolling back database changes...\n";
  381. }
  382. }
  383. /**
  384. *
  385. * Remove orphaned drupal nodes
  386. *
  387. * @param $dummy
  388. * Not Used -kept for backwards compatibility
  389. * @param $job_id
  390. * The id of the tripal job executing this function
  391. *
  392. * @ingroup tripal_feature
  393. */
  394. function tripal_features_cleanup($dummy = NULL, $job_id = NULL) {
  395. return tripal_core_chado_node_cleanup_orphaned('feature', $job_id);
  396. }