syncFeatures.php 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275
  1. <?php
  2. //
  3. // Copyright 2009 Clemson University
  4. //
  5. # This script can be run as a stand-alone script to sync all the features from chado to drupal
  6. // Parameter f specifies the feature_id to sync
  7. // -f 0 will sync all features
  8. $arguments = getopt("f:");
  9. if(isset($arguments['f'])){
  10. $drupal_base_url = parse_url('http://www.example.com');
  11. $_SERVER['HTTP_HOST'] = $drupal_base_url['host'];
  12. $_SERVER['REQUEST_URI'] = $_SERVER['SCRIPT_NAME'] = $_SERVER['PHP_SELF'];
  13. $_SERVER['REMOTE_ADDR'] = NULL;
  14. $_SERVER['REQUEST_METHOD'] = NULL;
  15. require_once 'includes/bootstrap.inc';
  16. drupal_bootstrap(DRUPAL_BOOTSTRAP_FULL);
  17. $feature_id = $arguments['f'];
  18. if($feature_id > 0 ){
  19. print "syncing feature $feature_id\n";
  20. tripal_feature_sync_feature($feature_id);
  21. }
  22. else{
  23. print "syncing all features...\n";
  24. tripal_feature_sync_features();
  25. }
  26. }
  27. /************************************************************************
  28. *
  29. */
  30. function tripal_feature_sync_features ($max_sync = 0, $job_id = NULL){
  31. //print "Syncing features (max of $max_sync)\n";
  32. $i = 0;
  33. // get the list of available sequence ontology terms for which
  34. // we will build drupal pages from features in chado. If a feature
  35. // is not one of the specified typse we won't build a node for it.
  36. $allowed_types = variable_get('chado_feature_types','EST contig');
  37. $allowed_types = preg_replace("/[\s\n\r]+/"," ",$allowed_types);
  38. $so_terms = split(' ',$allowed_types);
  39. $where_cvt = "";
  40. foreach ($so_terms as $term){
  41. $where_cvt .= "CVT.name = '$term' OR ";
  42. }
  43. $where_cvt = substr($where_cvt,0,strlen($where_cvt)-3); # strip trailing 'OR'
  44. // get the list of organisms that are synced and only include features from
  45. // those organisms
  46. $orgs = organism_get_synced();
  47. $where_org = "";
  48. foreach($orgs as $org){
  49. $where_org .= "F.organism_id = $org->organism_id OR ";
  50. }
  51. $where_org = substr($where_org,0,strlen($where_org)-3); # strip trailing 'OR'
  52. // use this SQL statement to get the features that we're going to upload
  53. $sql = "SELECT feature_id ".
  54. "FROM {FEATURE} F ".
  55. " INNER JOIN Cvterm CVT ON F.type_id = CVT.cvterm_id ".
  56. "WHERE ($where_cvt) AND ($where_org) ".
  57. "ORDER BY feature_id";
  58. // get the list of features
  59. $previous_db = db_set_active('chado'); // use chado database
  60. $results = db_query($sql);
  61. db_set_active($previous_db); // now use drupal database
  62. // load into ids array
  63. $count = 0;
  64. $ids = array();
  65. while($id = db_fetch_object($results)){
  66. $ids[$count] = $id->feature_id;
  67. $count++;
  68. }
  69. // make sure our vocabularies are set before proceeding
  70. tripal_feature_set_vocabulary();
  71. // pre-create the SQL statement that will be used to check
  72. // if a feature has already been synced. We skip features
  73. // that have been synced
  74. $sql = "SELECT * FROM {chado_feature} WHERE feature_id = %d";
  75. // Iterate through features that need to be synced
  76. $interval = intval($count * 0.01);
  77. foreach($ids as $feature_id){
  78. // update the job status every 1% features
  79. if($job_id and $i % $interval == 0){
  80. tripal_job_set_progress($job_id,intval(($i/$count)*100));
  81. }
  82. // if we have a maximum number to sync then stop when we get there
  83. // if not then just continue on
  84. if($max_sync and $i == $max_sync){
  85. return '';
  86. }
  87. if(!db_fetch_object(db_query($sql,$feature_id))){
  88. # parsing all the features can cause memory overruns
  89. # we are not sure why PHP does not clean up the memory as it goes
  90. # to avoid this problem we will call this script through an
  91. # independent system call
  92. $cmd = "php " . drupal_get_path('module', 'tripal_feature') . "/syncFeatures.php -f $feature_id ";
  93. system($cmd);
  94. }
  95. $i++;
  96. }
  97. return '';
  98. }
  99. function tripal_feature_sync_feature ($feature_id){
  100. print "\tfeature $feature_id\n";
  101. $mem = memory_get_usage(TRUE);
  102. $mb = $mem/1048576;
  103. print "$mb mb\n";
  104. global $user;
  105. $create_node = 1; // set to 0 if the node exists and we just sync and not create
  106. // get the accession prefix
  107. $aprefix = variable_get('chado_feature_accession_prefix','ID');
  108. // if we don't have a feature_id then return
  109. if(!$feature_id){
  110. drupal_set_message(t("Please provide a feature_id to sync"));
  111. return '';
  112. }
  113. // get information about this feature
  114. $fsql = "SELECT F.feature_id, F.name, F.uniquename,O.genus, ".
  115. " O.species,CVT.name as cvname,F.residues,F.organism_id ".
  116. "FROM {FEATURE} F ".
  117. " INNER JOIN Cvterm CVT ON F.type_id = CVT.cvterm_id ".
  118. " INNER JOIN Organism O ON F.organism_id = O.organism_ID ".
  119. "WHERE F.feature_id = %d";
  120. $previous_db = db_set_active('chado'); // use chado database
  121. $feature = db_fetch_object(db_query($fsql,$feature_id));
  122. db_set_active($previous_db); // now use drupal database
  123. // check to make sure that we don't have any nodes with this feature name as a title
  124. // but without a corresponding entry in the chado_feature table if so then we want to
  125. // clean up that node. (If a node is found we don't know if it belongs to our feature or
  126. // not since features can have the same name/title.)
  127. $tsql = "SELECT * FROM {node} N ".
  128. "WHERE title = '%s'";
  129. $cnsql = "SELECT * FROM {chado_feature} ".
  130. "WHERE nid = %d";
  131. $nodes = db_query($tsql,$feature->name);
  132. // cycle through all nodes that may have this title
  133. while($node = db_fetch_object($nodes)){
  134. $feature_nid = db_fetch_object(db_query($cnsql,$node->nid));
  135. if(!$feature_nid){
  136. drupal_set_message(t("$feature_id: A node is present but the chado_feature entry is missing... correcting"));
  137. node_delete($node->nid);
  138. }
  139. }
  140. // check if this feature already exists in the chado_feature table.
  141. // if we have a chado feature, we want to check to see if we have a node
  142. $cfsql = "SELECT * FROM {chado_feature} ".
  143. "WHERE feature_id = %d";
  144. $nsql = "SELECT * FROM {node} ".
  145. "WHERE nid = %d";
  146. $chado_feature = db_fetch_object(db_query($cfsql,$feature->feature_id));
  147. if($chado_feature){
  148. drupal_set_message(t("$feature_id: A chado_feature entry exists"));
  149. $node = db_fetch_object(db_query($nsql,$chado_feature->nid));
  150. if(!$node){
  151. // if we have a chado_feature but not a node then we have a problem and
  152. // need to cleanup
  153. drupal_set_message(t("$feature_id: The node is missing, but has a chado_feature entry... correcting"));
  154. $df_sql = "DELETE FROM {chado_feature} WHERE feature_id = %d";
  155. db_query($df_sql,$feature_id);
  156. } else {
  157. drupal_set_message(t("$feature_id: A corresponding node exists"));
  158. $create_node = 0;
  159. }
  160. }
  161. // if we've encountered an error then just return.
  162. if($error_msg = db_error()){
  163. //print "$error_msg\n";
  164. return '';
  165. }
  166. // if a drupal node does not exist for this feature then we want to
  167. // create one. Note that the node_save call in this block
  168. // will call the hook_submit function which
  169. if($create_node){
  170. drupal_set_message(t("$feature_id: Creating node $feature->name"));
  171. $new_node = new stdClass();
  172. $new_node->type = 'chado_feature';
  173. $new_node->uid = $user->uid;
  174. $new_node->title = "$feature->name";
  175. $new_node->feature_id = $feature->feature_id;
  176. $new_node->residues = $feature->residues;
  177. $new_node->organism_id = $feature->organism_id;
  178. $new_node->feature_type = $feature->cvname;
  179. // validate the node and if okay then submit
  180. node_validate($new_node);
  181. if ($errors = form_get_errors()) {
  182. foreach($errors as $key => $msg){
  183. drupal_set_message($msg);
  184. }
  185. return $errors;
  186. } else {
  187. $node = node_submit($new_node);
  188. node_save($node);
  189. }
  190. }
  191. else {
  192. $node = $chado_feature;
  193. }
  194. // set the taxonomy for this node
  195. drupal_set_message(t("$feature_id ($node->nid): setting taxonomy"));
  196. tripal_feature_set_taxonomy($node,$feature_id);
  197. // reindex the node
  198. // drupal_set_message(t("$feature_id( $node->nid): indexing"));
  199. // tripal_feature_index_feature ($feature_id,$node->nid);
  200. // remove any URL alias that may already exist and recreate
  201. drupal_set_message(t("$feature_id ($node->nid): setting URL alias"));
  202. db_query("DELETE FROM {url_alias} WHERE dst = '%s'", "$aprefix$feature_id");
  203. path_set_alias("node/$node->nid","$aprefix$feature_id");
  204. return '';
  205. }
  206. /*******************************************************************************
  207. * Returns a list of organisms that are currently synced with Drupal
  208. */
  209. function organism_get_synced() {
  210. // use this SQL for getting synced organisms
  211. $dsql = "SELECT * FROM {chado_organism}";
  212. $orgs = db_query($dsql);
  213. // use this SQL statement for getting the organisms
  214. $csql = "SELECT * FROM {Organism} ".
  215. "WHERE organism_id = %d";
  216. $org_list = array();
  217. // iterate through the organisms and build an array of those that are synced
  218. while($org = db_fetch_object($orgs)){
  219. $previous_db = db_set_active('chado'); // use chado database
  220. $info = db_fetch_object(db_query($csql,$org->organism_id));
  221. db_set_active($previous_db); // now use drupal database
  222. $org_list[] = $info;
  223. }
  224. return $org_list;
  225. }
  226. ?>