syncFeatures.php 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351
  1. <?php
  2. # This script can be run as a stand-alone script to sync all the features from chado to drupal
  3. // Parameter f specifies the feature_id to sync
  4. // -f 0 will sync all features
  5. $arguments = getopt("f:");
  6. if(isset($arguments['f'])){
  7. $drupal_base_url = parse_url('http://www.example.com');
  8. $_SERVER['HTTP_HOST'] = $drupal_base_url['host'];
  9. $_SERVER['REQUEST_URI'] = $_SERVER['SCRIPT_NAME'] = $_SERVER['PHP_SELF'];
  10. $_SERVER['REMOTE_ADDR'] = NULL;
  11. $_SERVER['REQUEST_METHOD'] = NULL;
  12. require_once 'includes/bootstrap.inc';
  13. drupal_bootstrap(DRUPAL_BOOTSTRAP_FULL);
  14. $feature_id = $arguments['f'];
  15. if($feature_id > 0 ){
  16. tripal_feature_sync_feature($feature_id);
  17. }
  18. else{
  19. print "syncing all features...\n";
  20. tripal_feature_sync_features();
  21. }
  22. }
  23. /**
  24. *
  25. */
  26. function tripal_feature_set_urls($job_id = NULL){
  27. // first get the list of features that have been synced
  28. $sql = "SELECT * FROM {chado_feature}";
  29. $nodes = db_query($sql);
  30. while($node = db_fetch_object($nodes)){
  31. // now get the feature details
  32. $feature_arr = tripal_core_chado_select('feature',
  33. array('feature_id','name','uniquename'),
  34. array('feature_id' => $node->feature_id));
  35. $feature = $feature_arr[0];
  36. tripal_feature_set_feature_url($node,$feature);
  37. }
  38. }
  39. /**
  40. *
  41. */
  42. function tripal_feature_set_feature_url($node,$feature){
  43. // determine which URL alias to use
  44. $alias_type = variable_get('chado_feature_url','internal ID');
  45. $aprefix = variable_get('chado_feature_accession_prefix','ID');
  46. switch ($alias_type) {
  47. case 'feature name':
  48. $url_alias = $feature->name;
  49. break;
  50. case 'feature unique name':
  51. $url_alias = $feature->uniquename;
  52. break;
  53. default:
  54. $url_alias = "$aprefix$feature->feature_id";
  55. }
  56. print "Setting $alias_type as URL alias for $feature->name: node/$node->nid => $url_alias\n";
  57. // remove any previous alias
  58. db_query("DELETE FROM {url_alias} WHERE src = '%s'", "node/$node->nid");
  59. // add the new alias
  60. path_set_alias("node/$node->nid",$url_alias);
  61. }
  62. /**
  63. *
  64. *
  65. * @ingroup tripal_feature
  66. */
  67. function tripal_feature_sync_features ($max_sync = 0, $job_id = NULL){
  68. //print "Syncing features (max of $max_sync)\n";
  69. $i = 0;
  70. // get the list of available sequence ontology terms for which
  71. // we will build drupal pages from features in chado. If a feature
  72. // is not one of the specified typse we won't build a node for it.
  73. $allowed_types = variable_get('chado_feature_types','EST contig');
  74. $allowed_types = preg_replace("/[\s\n\r]+/"," ",$allowed_types);
  75. print "Looking for features of type: $allowed_types\n";
  76. $so_terms = split(' ',$allowed_types);
  77. $where_cvt = "";
  78. foreach ($so_terms as $term){
  79. $where_cvt .= "CVT.name = '$term' OR ";
  80. }
  81. $where_cvt = substr($where_cvt,0,strlen($where_cvt)-3); # strip trailing 'OR'
  82. // get the list of organisms that are synced and only include features from
  83. // those organisms
  84. $orgs = organism_get_synced();
  85. $where_org = "";
  86. foreach($orgs as $org){
  87. if($org->organism_id){
  88. $where_org .= "F.organism_id = $org->organism_id OR ";
  89. }
  90. }
  91. $where_org = substr($where_org,0,strlen($where_org)-3); # strip trailing 'OR'
  92. // use this SQL statement to get the features that we're going to upload
  93. $sql = "SELECT feature_id ".
  94. "FROM {FEATURE} F ".
  95. " INNER JOIN Cvterm CVT ON F.type_id = CVT.cvterm_id ".
  96. " INNER JOIN CV on CV.cv_id = CVT.cv_id ".
  97. "WHERE ($where_cvt) AND ($where_org) AND CV.name = 'sequence' ".
  98. "ORDER BY feature_id";
  99. // get the list of features
  100. $previous_db = tripal_db_set_active('chado'); // use chado database
  101. $results = db_query($sql);
  102. tripal_db_set_active($previous_db); // now use drupal database
  103. // load into ids array
  104. $count = 0;
  105. $ids = array();
  106. while($id = db_fetch_object($results)){
  107. $ids[$count] = $id->feature_id;
  108. $count++;
  109. }
  110. // make sure our vocabularies are set before proceeding
  111. tripal_feature_set_vocabulary();
  112. // pre-create the SQL statement that will be used to check
  113. // if a feature has already been synced. We skip features
  114. // that have been synced
  115. $sql = "SELECT * FROM {chado_feature} WHERE feature_id = %d";
  116. // Iterate through features that need to be synced
  117. $interval = intval($count * 0.01);
  118. $num_ids = sizeof($ids);
  119. $i = 0;
  120. foreach($ids as $feature_id){
  121. // update the job status every 1% features
  122. if($job_id and $i % $interval == 0){
  123. tripal_job_set_progress($job_id,intval(($i/$count)*100));
  124. }
  125. // if we have a maximum number to sync then stop when we get there
  126. // if not then just continue on
  127. if($max_sync and $i == $max_sync){
  128. return '';
  129. }
  130. if(!db_fetch_object(db_query($sql,$feature_id))){
  131. # parsing all the features can cause memory overruns
  132. # we are not sure why PHP does not clean up the memory as it goes
  133. # to avoid this problem we will call this script through an
  134. # independent system call
  135. print "$i of $num_ids Syncing feature id: $feature_id\n";
  136. $cmd = "php " . drupal_get_path('module', 'tripal_feature') . "/syncFeatures.php -f $feature_id ";
  137. system($cmd);
  138. }
  139. $i++;
  140. }
  141. return '';
  142. }
  143. /**
  144. *
  145. *
  146. * @ingroup tripal_feature
  147. */
  148. function tripal_feature_sync_feature ($feature_id){
  149. // print "\tSyncing feature $feature_id\n";
  150. $mem = memory_get_usage(TRUE);
  151. $mb = $mem/1048576;
  152. // print "$mb mb\n";
  153. global $user;
  154. $create_node = 1; // set to 0 if the node exists and we just sync and not create
  155. // get the accession prefix
  156. $aprefix = variable_get('chado_feature_accession_prefix','ID');
  157. // if we don't have a feature_id then return
  158. if(!$feature_id){
  159. drupal_set_message(t("Please provide a feature_id to sync"));
  160. return '';
  161. }
  162. // get information about this feature
  163. $fsql = "SELECT F.feature_id, F.name, F.uniquename,O.genus, ".
  164. " O.species,CVT.name as cvname,F.residues,F.organism_id ".
  165. "FROM {FEATURE} F ".
  166. " INNER JOIN Cvterm CVT ON F.type_id = CVT.cvterm_id ".
  167. " INNER JOIN Organism O ON F.organism_id = O.organism_ID ".
  168. "WHERE F.feature_id = %d";
  169. $previous_db = tripal_db_set_active('chado'); // use chado database
  170. $feature = db_fetch_object(db_query($fsql,$feature_id));
  171. tripal_db_set_active($previous_db); // now use drupal database
  172. // get the synonyms for this feature
  173. $synsql = "SELECT S.name ".
  174. "FROM {feature_synonym} FS ".
  175. " INNER JOIN {synonym} S on FS.synonym_id = S.synonym_id ".
  176. "WHERE FS.feature_id = %d";
  177. $previous_db = tripal_db_set_active('chado'); // use chado database
  178. $synonyms = db_query($synsql,$feature_id);
  179. tripal_db_set_active($previous_db); // now use drupal database
  180. // now add these synonyms to the feature object as a single string
  181. $synstring = '';
  182. while($synonym = db_fetch_object($synonyms)){
  183. $synstring .= "$synonym->name\n";
  184. }
  185. $feature->synonyms = $synstring;
  186. // check to make sure that we don't have any nodes with this feature name as a title
  187. // but without a corresponding entry in the chado_feature table if so then we want to
  188. // clean up that node. (If a node is found we don't know if it belongs to our feature or
  189. // not since features can have the same name/title.)
  190. $tsql = "SELECT * FROM {node} N ".
  191. "WHERE title = '%s'";
  192. $cnsql = "SELECT * FROM {chado_feature} ".
  193. "WHERE nid = %d";
  194. $nodes = db_query($tsql,$feature->name);
  195. // cycle through all nodes that may have this title
  196. while($node = db_fetch_object($nodes)){
  197. $feature_nid = db_fetch_object(db_query($cnsql,$node->nid));
  198. if(!$feature_nid){
  199. drupal_set_message(t("$feature_id: A node is present but the chado_feature entry is missing... correcting"));
  200. node_delete($node->nid);
  201. }
  202. }
  203. // check if this feature already exists in the chado_feature table.
  204. // if we have a chado feature, we want to check to see if we have a node
  205. $cfsql = "SELECT * FROM {chado_feature} ".
  206. "WHERE feature_id = %d";
  207. $nsql = "SELECT * FROM {node} ".
  208. "WHERE nid = %d";
  209. $chado_feature = db_fetch_object(db_query($cfsql,$feature->feature_id));
  210. if($chado_feature){
  211. drupal_set_message(t("$feature_id: A chado_feature entry exists"));
  212. $node = db_fetch_object(db_query($nsql,$chado_feature->nid));
  213. if(!$node){
  214. // if we have a chado_feature but not a node then we have a problem and
  215. // need to cleanup
  216. drupal_set_message(t("$feature_id: The node is missing, but has a chado_feature entry... correcting"));
  217. $df_sql = "DELETE FROM {chado_feature} WHERE feature_id = %d";
  218. db_query($df_sql,$feature_id);
  219. } else {
  220. drupal_set_message(t("$feature_id: A corresponding node exists"));
  221. $create_node = 0;
  222. }
  223. }
  224. // if we've encountered an error then just return.
  225. if($error_msg = db_error()){
  226. //print "$error_msg\n";
  227. return '';
  228. }
  229. // if a drupal node does not exist for this feature then we want to
  230. // create one. Note that the node_save call in this block
  231. // will call the hook_submit function which
  232. if($create_node){
  233. // get the organism for this feature
  234. $sql = "SELECT * FROM {organism} WHERE organism_id = %d";
  235. $organism = db_fetch_object(db_query($sql,$feature->organism_id));
  236. drupal_set_message(t("$feature_id: Creating node $feature->name"));
  237. $new_node = new stdClass();
  238. $new_node->type = 'chado_feature';
  239. $new_node->uid = $user->uid;
  240. $new_node->title = "$feature->name, $feature->uniquename ($feature->cvname) $organism->genus $organism->species";
  241. $new_node->fname = "$feature->name";
  242. $new_node->uniquename = "$feature->uniquename";
  243. $new_node->feature_id = $feature->feature_id;
  244. $new_node->residues = $feature->residues;
  245. $new_node->organism_id = $feature->organism_id;
  246. $new_node->feature_type = $feature->cvname;
  247. $new_node->synonyms = $feature->synonyms;
  248. // validate the node and if okay then submit
  249. node_validate($new_node);
  250. if ($errors = form_get_errors()) {
  251. foreach($errors as $key => $msg){
  252. drupal_set_message($msg);
  253. }
  254. return $errors;
  255. } else {
  256. $node = node_submit($new_node);
  257. node_save($node);
  258. }
  259. }
  260. else {
  261. $node = $chado_feature;
  262. }
  263. // set the taxonomy for this node
  264. drupal_set_message(t("$feature_id ($node->nid): setting taxonomy"));
  265. tripal_feature_set_taxonomy($node,$feature_id);
  266. // reindex the node
  267. // drupal_set_message(t("$feature_id( $node->nid): indexing"));
  268. // tripal_feature_index_feature ($feature_id,$node->nid);
  269. // set the URL alias for this node
  270. tripal_feature_set_feature_url($node,$feature);
  271. return '';
  272. }
  273. /**
  274. * Returns a list of organisms that are currently synced with Drupal
  275. *
  276. * @ingroup tripal_feature
  277. */
  278. function organism_get_synced() {
  279. // use this SQL for getting synced organisms
  280. $dsql = "SELECT * FROM {chado_organism}";
  281. $orgs = db_query($dsql);
  282. // use this SQL statement for getting the organisms
  283. $csql = "SELECT * FROM {Organism} ".
  284. "WHERE organism_id = %d";
  285. $org_list = array();
  286. // iterate through the organisms and build an array of those that are synced
  287. while($org = db_fetch_object($orgs)){
  288. $previous_db = tripal_db_set_active('chado'); // use chado database
  289. $info = db_fetch_object(db_query($csql,$org->organism_id));
  290. tripal_db_set_active($previous_db); // now use drupal database
  291. $org_list[] = $info;
  292. }
  293. return $org_list;
  294. }
  295. ?>