indexFeatures.php 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166
  1. <?php
  2. // This script can be run as a stand-alone script to sync all the features from chado to drupal
  3. //
  4. // To index a single feature
  5. // -i feature_id
  6. // -n node_id
  7. //
  8. // To index all features
  9. // -i 0
  10. $arguments = getopt("i:n:");
  11. if(isset($arguments['i'])){
  12. $drupal_base_url = parse_url('http://www.example.com');
  13. $_SERVER['HTTP_HOST'] = $drupal_base_url['host'];
  14. $_SERVER['REQUEST_URI'] = $_SERVER['SCRIPT_NAME'] = $_SERVER['PHP_SELF'];
  15. $_SERVER['REMOTE_ADDR'] = NULL;
  16. $_SERVER['REQUEST_METHOD'] = NULL;
  17. require_once 'includes/bootstrap.inc';
  18. drupal_bootstrap(DRUPAL_BOOTSTRAP_FULL);
  19. $feature_id = $arguments['i'];
  20. $nid = $arguments['n'];
  21. # print "\n";
  22. # print "feature id is $feature_id\n";
  23. # print "nid is $nid\n";
  24. # print "\n";
  25. if($feature_id > 0){
  26. # print "indexing feature $feature_id\n";
  27. // We register a shutdown function to ensure that the nodes
  28. // that are indexed will have proper entries in the search_totals
  29. // table. Without these entries, the searching doesn't work
  30. // properly. This function may run for quite a while since
  31. // it must calculate the sum of the scores of all entries in
  32. // the search_index table. In the case of common words like
  33. // 'contig', this will take quite a while
  34. register_shutdown_function('search_update_totals');
  35. tripal_feature_index_feature($feature_id, $nid);
  36. }
  37. else{
  38. print "indexing all features...\n";
  39. tripal_features_reindex(0);
  40. }
  41. }
  42. /************************************************************************
  43. *
  44. */
  45. function tripal_features_reindex ($max_sync,$job_id = NULL){
  46. $i = 0;
  47. // We register a shutdown function to ensure that the nodes
  48. // that are indexed will have proper entries in the search_totals
  49. // table. Without these entries, the searching doesn't work
  50. // properly. This function may run for quite a while since
  51. // it must calculate the sum of the scores of all entries in
  52. // the search_index table. In the case of common words like
  53. // 'contig', this will take quite a while
  54. register_shutdown_function('search_update_totals');
  55. // use this SQL statement to get the features that we're going to index. This
  56. // SQL statement is derived from the hook_search function in the Drupal API.
  57. // Essentially, this is the SQL statement that finds all nodes that need
  58. // reindexing, but adjusted to include the chado_feature
  59. $sql = "SELECT N.nid, N.title, CF.feature_id ".
  60. "FROM {node} N ".
  61. " INNER JOIN chado_feature CF ON CF.nid = N.nid ";
  62. $results = db_query($sql);
  63. // load into ids array
  64. $count = 0;
  65. $chado_features = array();
  66. while($chado_feature = db_fetch_object($results)){
  67. $chado_features[$count] = $chado_feature;
  68. $count++;
  69. }
  70. // Iterate through features that need to be indexed
  71. $interval = intval($count * 0.01);
  72. if($interval >= 0){
  73. $interval = 1;
  74. }
  75. foreach($chado_features as $chado_feature){
  76. // update the job status every 1% features
  77. if($job_id and $i % $interval == 0){
  78. $prog = intval(($i/$count)*100);
  79. tripal_job_set_progress($job_id,$prog);
  80. print "$prog\n";
  81. }
  82. // sync only the max requested
  83. if($max_sync and $i == $max_sync){
  84. return '';
  85. }
  86. $i++;
  87. # tripal_feature_index_feature ($chado_feature->feature_id,$chado_feature->nid);
  88. # parsing all the features can cause memory overruns
  89. # we are not sure why PHP does not clean up the memory as it goes
  90. # to avoid this problem we will call this script through an
  91. # independent system call
  92. $cmd = "php " . drupal_get_path('module', 'tripal_feature') . "/indexFeatures.php ";
  93. $cmd .= "-i $chado_feature->feature_id -n $chado_feature->nid ";
  94. # print "\t$cmd\n";
  95. # print "\tfeature id is $chado_feature->feature_id\n";
  96. # print "\tnid is $chado_feature->nid\n";
  97. # print "\n";
  98. system($cmd);
  99. }
  100. return '';
  101. }
  102. /************************************************************************
  103. *
  104. */
  105. function tripal_feature_index_feature ($feature_id,$nid){
  106. #print "\tfeature $feature_id nid $nid\n";
  107. // return if we haven't been provided with a feature_id
  108. if(!$feature_id){
  109. return 0;
  110. }
  111. // if we only have a feature_id then let's find a corresponding
  112. // node. If we can't find a node then return.
  113. if(!$nid){
  114. $nsql = "SELECT N.nid,N.title FROM {chado_feature} CF ".
  115. " INNER JOIN {node} N ON N.nid = CF.nid ".
  116. "WHERE CF.feature_id = %d";
  117. $node = db_fetch_object(db_query($nsql,$feature_id));
  118. if(!$node){
  119. return 0;
  120. }
  121. $node = node_load($node->nid);
  122. } else {
  123. $node = node_load($nid);
  124. }
  125. // node load the noad, the comments and the taxonomy and
  126. // index
  127. $node->build_mode = NODE_BUILD_SEARCH_INDEX;
  128. $node = node_build_content($node, FALSE, FALSE);
  129. $node->body = drupal_render($node->content);
  130. node_invoke_nodeapi($node, 'view', FALSE, FALSE);
  131. $node->body .= module_invoke('comment', 'nodeapi', $node, 'update index');
  132. $node->body .= module_invoke('taxonomy','nodeapi', $node, 'update index');
  133. // print "$node->title: $node->body\n";
  134. search_index($node->nid,'node',$node->body);
  135. # $mem = memory_get_usage(TRUE);
  136. # $mb = $mem/1048576;
  137. # print "$mb mb\n";
  138. return 1;
  139. }
  140. ?>