syncFeatures.inc 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427
  1. <?php
  2. /**
  3. * @file
  4. * @todo Add file header description
  5. */
  6. # This script can be run as a stand-alone script to sync all the features from chado to drupal
  7. // Parameter f specifies the feature_id to sync
  8. // -f 0 will sync all features
  9. $arguments = getopt("f:");
  10. if (isset($arguments['f'])) {
  11. $drupal_base_url = parse_url('http://www.example.com');
  12. $_SERVER['HTTP_HOST'] = $drupal_base_url['host'];
  13. $_SERVER['REQUEST_URI'] = $_SERVER['SCRIPT_NAME'] = $_SERVER['PHP_SELF'];
  14. $_SERVER['REMOTE_ADDR'] = NULL;
  15. $_SERVER['REQUEST_METHOD'] = NULL;
  16. require_once 'includes/bootstrap.inc';
  17. drupal_bootstrap(DRUPAL_BOOTSTRAP_FULL);
  18. $feature_id = $arguments['f'];
  19. if ($feature_id > 0 ) {
  20. tripal_feature_sync_feature($feature_id);
  21. }
  22. else{
  23. print "syncing all features...\n";
  24. tripal_feature_sync_features();
  25. }
  26. }
  27. /**
  28. *
  29. */
  30. function tripal_feature_sync_form() {
  31. $form['description'] = array(
  32. '#type' => 'item',
  33. '#value' => t("Add feature types, optionally select an organism and ".
  34. "click the 'Sync all Features' button to create Drupal ".
  35. "content for features in chado. Only features of the types listed ".
  36. "below in the Feature Types box will be synced. You may limit the ".
  37. "features to be synced by a specific organism. Depending on the ".
  38. "number of features in the chado database this may take a long ".
  39. "time to complete. "),
  40. );
  41. $form['feature_types'] = array(
  42. '#title' => t('Feature Types'),
  43. '#type' => 'textarea',
  44. '#description' => t('Enter the names of the sequence types that the ".
  45. "site will support with independent pages. Pages for these data ".
  46. "types will be built automatically for features that exist in the ".
  47. "chado database. The names listed here should be spearated by ".
  48. "spaces or entered separately on new lines. The names must match ".
  49. "exactly (spelling and case) with terms in the sequence ontology'),
  50. '#required' => TRUE,
  51. '#default_value' => variable_get('chado_sync_feature_types', 'gene contig'),
  52. );
  53. // get the list of organisms
  54. $sql = "SELECT * FROM {organism} ORDER BY genus, species";
  55. $orgs = tripal_organism_get_synced();
  56. $organisms[] = '';
  57. foreach ($orgs as $organism) {
  58. $organisms[$organism->organism_id] = "$organism->genus $organism->species ($organism->common_name)";
  59. }
  60. $form['organism_id'] = array(
  61. '#title' => t('Organism'),
  62. '#type' => t('select'),
  63. '#description' => t("Choose the organism for which features will be deleted."),
  64. '#options' => $organisms,
  65. );
  66. $form['button'] = array(
  67. '#type' => 'submit',
  68. '#value' => t('Sync all Features'),
  69. '#weight' => 3,
  70. );
  71. return $form;
  72. }
  73. /**
  74. *
  75. */
  76. function tripal_feature_sync_form_validate($form, &$form_state) {
  77. $organism_id = $form_state['values']['organism_id'];
  78. $feature_types = $form_state['values']['feature_types'];
  79. // nothing to do
  80. }
  81. /**
  82. *
  83. */
  84. function tripal_feature_sync_form_submit($form, &$form_state) {
  85. global $user;
  86. $organism_id = $form_state['values']['organism_id'];
  87. $feature_types = $form_state['values']['feature_types'];
  88. $job_args = array(0, $organism_id, $feature_types);
  89. if ($organism_id) {
  90. $organism = tripal_core_chado_select('organism', array('genus', 'species'), array('organism_id' => $organism_id));
  91. $title = "Sync all features for " . $organism[0]->genus . " " . $organism[0]->species;
  92. }
  93. else {
  94. $title = t('Sync all features for all synced organisms');
  95. }
  96. variable_set('chado_sync_feature_types', $feature_types);
  97. tripal_add_job($title, 'tripal_feature',
  98. 'tripal_feature_sync_features', $job_args, $user->uid);
  99. }
  100. /**
  101. *
  102. */
  103. function tripal_feature_set_urls($job_id = NULL) {
  104. // first get the list of features that have been synced
  105. $sql = "SELECT * FROM {chado_feature}";
  106. $nodes = db_query($sql);
  107. while ($node = db_fetch_object($nodes)) {
  108. // now get the feature details
  109. $feature_arr = tripal_core_chado_select('feature',
  110. array('feature_id', 'name', 'uniquename'),
  111. array('feature_id' => $node->feature_id));
  112. $feature = $feature_arr[0];
  113. tripal_feature_set_feature_url($node, $feature);
  114. }
  115. }
  116. /**
  117. *
  118. */
  119. function tripal_feature_set_feature_url($node, $feature) {
  120. // determine which URL alias to use
  121. $alias_type = variable_get('chado_feature_url', 'internal ID');
  122. $aprefix = variable_get('chado_feature_accession_prefix', 'ID');
  123. switch ($alias_type) {
  124. case 'feature name':
  125. $url_alias = $feature->name;
  126. break;
  127. case 'feature unique name':
  128. $url_alias = $feature->uniquename;
  129. break;
  130. default:
  131. $url_alias = "$aprefix$feature->feature_id";
  132. }
  133. print "Setting $alias_type as URL alias for $feature->name: node/$node->nid => $url_alias\n";
  134. // remove any previous alias
  135. db_query("DELETE FROM {url_alias} WHERE src = '%s'", "node/$node->nid");
  136. // add the new alias
  137. path_set_alias("node/$node->nid", $url_alias);
  138. }
  139. /**
  140. *
  141. *
  142. * @ingroup tripal_feature
  143. */
  144. function tripal_feature_sync_features($max_sync = 0, $organism_id = NULL,
  145. $feature_types = NULL, $job_id = NULL) {
  146. //print "Syncing features (max of $max_sync)\n";
  147. $i = 0;
  148. // get the list of available sequence ontology terms for which
  149. // we will build drupal pages from features in chado. If a feature
  150. // is not one of the specified typse we won't build a node for it.
  151. if (!$feature_types) {
  152. $allowed_types = variable_get('chado_sync_feature_types', 'gene contig');
  153. }
  154. else {
  155. $allowed_types = $feature_types;
  156. }
  157. $allowed_types = preg_replace("/[\s\n\r]+/", " ", $allowed_types);
  158. print "Looking for features of type: $allowed_types\n";
  159. $so_terms = split(' ', $allowed_types);
  160. $where_cvt = "";
  161. foreach ($so_terms as $term) {
  162. $where_cvt .= "CVT.name = '$term' OR ";
  163. }
  164. $where_cvt = drupal_substr($where_cvt, 0, drupal_strlen($where_cvt)-3); # strip trailing 'OR'
  165. // get the list of organisms that are synced and only include features from
  166. // those organisms
  167. $orgs = tripal_organism_get_synced();
  168. $where_org = "";
  169. foreach ($orgs as $org) {
  170. if ($organism_id) {
  171. if ($org->organism_id and $org->organism_id == $organism_id) {
  172. $where_org .= "F.organism_id = $org->organism_id OR ";
  173. }
  174. }
  175. else {
  176. if ($org->organism_id) {
  177. $where_org .= "F.organism_id = $org->organism_id OR ";
  178. }
  179. }
  180. }
  181. $where_org = drupal_substr($where_org, 0, drupal_strlen($where_org)-3); # strip trailing 'OR'
  182. // use this SQL statement to get the features that we're going to upload
  183. $sql = "SELECT feature_id ".
  184. "FROM {FEATURE} F ".
  185. " INNER JOIN Cvterm CVT ON F.type_id = CVT.cvterm_id ".
  186. " INNER JOIN CV on CV.cv_id = CVT.cv_id ".
  187. "WHERE ($where_cvt) AND ($where_org) AND CV.name = 'sequence' ".
  188. "ORDER BY feature_id";
  189. // get the list of features
  190. $previous_db = tripal_db_set_active('chado'); // use chado database
  191. $results = db_query($sql);
  192. tripal_db_set_active($previous_db); // now use drupal database
  193. // load into ids array
  194. $count = 0;
  195. $ids = array();
  196. while ($id = db_fetch_object($results)) {
  197. $ids[$count] = $id->feature_id;
  198. $count++;
  199. }
  200. // make sure our vocabularies are set before proceeding
  201. tripal_feature_set_vocabulary();
  202. // pre-create the SQL statement that will be used to check
  203. // if a feature has already been synced. We skip features
  204. // that have been synced
  205. $sql = "SELECT * FROM {chado_feature} WHERE feature_id = %d";
  206. // Iterate through features that need to be synced
  207. $interval = intval($count * 0.01);
  208. if ($interval > 1) {
  209. $interval = 1;
  210. }
  211. $num_ids = sizeof($ids);
  212. $i = 0;
  213. foreach ($ids as $feature_id) {
  214. // update the job status every 1% features
  215. if ($job_id and $i % $interval == 0) {
  216. tripal_job_set_progress($job_id, intval(($i/$count)*100));
  217. }
  218. // if we have a maximum number to sync then stop when we get there
  219. // if not then just continue on
  220. if ($max_sync and $i == $max_sync) {
  221. return '';
  222. }
  223. if (!db_fetch_object(db_query($sql, $feature_id))) {
  224. # parsing all the features can cause memory overruns
  225. # we are not sure why PHP does not clean up the memory as it goes
  226. # to avoid this problem we will call this script through an
  227. # independent system call
  228. print "$i of $num_ids Syncing feature id: $feature_id\n";
  229. $cmd = "php " . drupal_get_path('module', 'tripal_feature') . "/includes/syncFeatures.inc -f $feature_id ";
  230. system($cmd);
  231. }
  232. $i++;
  233. }
  234. return '';
  235. }
  236. /**
  237. *
  238. *
  239. * @ingroup tripal_feature
  240. */
  241. function tripal_feature_sync_feature($feature_id) {
  242. // print "\tSyncing feature $feature_id\n";
  243. $mem = memory_get_usage(TRUE);
  244. $mb = $mem/1048576;
  245. // print "$mb mb\n";
  246. global $user;
  247. $create_node = 1; // set to 0 if the node exists and we just sync and not create
  248. // get the accession prefix
  249. $aprefix = variable_get('chado_feature_accession_prefix', 'ID');
  250. // if we don't have a feature_id then return
  251. if (!$feature_id) {
  252. drupal_set_message(t("Please provide a feature_id to sync"));
  253. return '';
  254. }
  255. // get information about this feature
  256. $fsql = "SELECT F.feature_id, F.name, F.uniquename,O.genus, ".
  257. " O.species,CVT.name as cvname,F.residues,F.organism_id ".
  258. "FROM {FEATURE} F ".
  259. " INNER JOIN Cvterm CVT ON F.type_id = CVT.cvterm_id ".
  260. " INNER JOIN Organism O ON F.organism_id = O.organism_ID ".
  261. "WHERE F.feature_id = %d";
  262. $previous_db = tripal_db_set_active('chado'); // use chado database
  263. $feature = db_fetch_object(db_query($fsql, $feature_id));
  264. tripal_db_set_active($previous_db); // now use drupal database
  265. // get the synonyms for this feature
  266. $synsql = "SELECT S.name ".
  267. "FROM {feature_synonym} FS ".
  268. " INNER JOIN {synonym} S on FS.synonym_id = S.synonym_id ".
  269. "WHERE FS.feature_id = %d";
  270. $previous_db = tripal_db_set_active('chado'); // use chado database
  271. $synonyms = db_query($synsql, $feature_id);
  272. tripal_db_set_active($previous_db); // now use drupal database
  273. // now add these synonyms to the feature object as a single string
  274. $synstring = '';
  275. while ($synonym = db_fetch_object($synonyms)) {
  276. $synstring .= "$synonym->name\n";
  277. }
  278. $feature->synonyms = $synstring;
  279. // check to make sure that we don't have any nodes with this feature name as a title
  280. // but without a corresponding entry in the chado_feature table if so then we want to
  281. // clean up that node. (If a node is found we don't know if it belongs to our feature or
  282. // not since features can have the same name/title.)
  283. $tsql = "SELECT * FROM {node} N ".
  284. "WHERE title = '%s'";
  285. $cnsql = "SELECT * FROM {chado_feature} ".
  286. "WHERE nid = %d";
  287. $nodes = db_query($tsql, $feature->name);
  288. // cycle through all nodes that may have this title
  289. while ($node = db_fetch_object($nodes)) {
  290. $feature_nid = db_fetch_object(db_query($cnsql, $node->nid));
  291. if (!$feature_nid) {
  292. drupal_set_message(t("%feature_id: A node is present but the chado_feature entry is missing... correcting", array('%feature_id' => $feature_id)));
  293. node_delete($node->nid);
  294. }
  295. }
  296. // check if this feature already exists in the chado_feature table.
  297. // if we have a chado feature, we want to check to see if we have a node
  298. $cfsql = "SELECT * FROM {chado_feature} ".
  299. "WHERE feature_id = %d";
  300. // @coder-ignore: don't need to use db_rewrite_sql() since need all nodes regardless of access control
  301. $nsql = "SELECT * FROM {node} ".
  302. "WHERE nid = %d";
  303. $chado_feature = db_fetch_object(db_query($cfsql, $feature->feature_id));
  304. if ($chado_feature) {
  305. drupal_set_message(t("%feature_id: A chado_feature entry exists", array('%feature_id' => $feature_id)));
  306. $node = db_fetch_object(db_query($nsql, $chado_feature->nid));
  307. if (!$node) {
  308. // if we have a chado_feature but not a node then we have a problem and
  309. // need to cleanup
  310. drupal_set_message(t("%feature_id: The node is missing, but has a chado_feature entry... correcting", array('%feature_id' => $feature_id)));
  311. $df_sql = "DELETE FROM {chado_feature} WHERE feature_id = %d";
  312. db_query($df_sql, $feature_id);
  313. }
  314. else {
  315. drupal_set_message(t("%feature_id: A corresponding node exists", array('%feature_id' => $feature_id)));
  316. $create_node = 0;
  317. }
  318. }
  319. // if we've encountered an error then just return.
  320. if ($error_msg = db_error()) {
  321. //print "$error_msg\n";
  322. return '';
  323. }
  324. // if a drupal node does not exist for this feature then we want to
  325. // create one. Note that the node_save call in this block
  326. // will call the hook_submit function which
  327. if ($create_node) {
  328. // get the organism for this feature
  329. $sql = "SELECT * FROM {organism} WHERE organism_id = %d";
  330. $organism = db_fetch_object(db_query($sql, $feature->organism_id));
  331. drupal_set_message(t("%feature_id: Creating node $feature->name", array('%feature_id' => $feature_id)));
  332. $new_node = new stdClass();
  333. $new_node->type = 'chado_feature';
  334. $new_node->uid = $user->uid;
  335. $new_node->title = "$feature->name, $feature->uniquename ($feature->cvname) $organism->genus $organism->species";
  336. $new_node->fname = "$feature->name";
  337. $new_node->uniquename = "$feature->uniquename";
  338. $new_node->feature_id = $feature->feature_id;
  339. $new_node->residues = $feature->residues;
  340. $new_node->organism_id = $feature->organism_id;
  341. $new_node->feature_type = $feature->cvname;
  342. $new_node->synonyms = $feature->synonyms;
  343. // validate the node and if okay then submit
  344. node_validate($new_node);
  345. if ($errors = form_get_errors()) {
  346. foreach ($errors as $key => $msg) {
  347. drupal_set_message(t("%msg", array('%msg' => $msg)));
  348. }
  349. return $errors;
  350. }
  351. else {
  352. $node = node_submit($new_node);
  353. node_save($node);
  354. }
  355. }
  356. else {
  357. $node = $chado_feature;
  358. }
  359. // set the taxonomy for this node
  360. drupal_set_message(t("%feature_id ($node->nid): setting taxonomy", array('%feature_id' => $feature_id)));
  361. tripal_feature_set_taxonomy($node, $feature_id);
  362. // reindex the node
  363. // drupal_set_message(t("$feature_id( $node->nid): indexing"));
  364. // tripal_feature_index_feature ($feature_id,$node->nid);
  365. // set the URL alias for this node
  366. tripal_feature_set_feature_url($node, $feature);
  367. return '';
  368. }