indexFeatures.php 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. <?php
  2. //
  3. // Copyright 2009 Clemson University
  4. //
  5. // This script can be run as a stand-alone script to sync all the features from chado to drupal
  6. //
  7. // To index a single feature
  8. // -i feature_id
  9. // -n node_id
  10. //
  11. // To index all features
  12. // -i 0
  13. $arguments = getopt("i:n:");
  14. if(isset($arguments['i'])){
  15. $drupal_base_url = parse_url('http://www.example.com');
  16. $_SERVER['HTTP_HOST'] = $drupal_base_url['host'];
  17. $_SERVER['REQUEST_URI'] = $_SERVER['SCRIPT_NAME'] = $_SERVER['PHP_SELF'];
  18. $_SERVER['REMOTE_ADDR'] = NULL;
  19. $_SERVER['REQUEST_METHOD'] = NULL;
  20. require_once 'includes/bootstrap.inc';
  21. drupal_bootstrap(DRUPAL_BOOTSTRAP_FULL);
  22. $feature_id = $arguments['i'];
  23. $nid = $arguments['n'];
  24. # print "\n";
  25. # print "feature id is $feature_id\n";
  26. # print "nid is $nid\n";
  27. # print "\n";
  28. if($feature_id > 0){
  29. # print "indexing feature $feature_id\n";
  30. tripal_feature_index_feature($feature_id, $nid);
  31. }
  32. else{
  33. print "indexing all features...\n";
  34. tripal_features_reindex(0);
  35. }
  36. }
  37. /************************************************************************
  38. *
  39. */
  40. function tripal_features_reindex ($max_sync,$job_id = NULL){
  41. $i = 0;
  42. // We register a shutdown function to ensure that the nodes
  43. // that are indexed will have proper entries in the search_totals
  44. // table. Without these entries, the searching doesn't work
  45. // properly. This function may run for quite a while since
  46. // it must calculate the sum of the scores of all entries in
  47. // the search_index table. In the case of common words like
  48. // 'contig', this will take quite a while
  49. register_shutdown_function('search_update_totals');
  50. // use this SQL statement to get the features that we're going to index. This
  51. // SQL statement is derived from the hook_search function in the Drupal API.
  52. // Essentially, this is the SQL statement that finds all nodes that need
  53. // reindexing, but adjusted to include the chado_feature
  54. $sql = "SELECT N.nid, N.title, CF.feature_id ".
  55. "FROM {node} N ".
  56. " INNER JOIN chado_feature CF ON CF.nid = N.nid ";
  57. $results = db_query($sql);
  58. // load into ids array
  59. $count = 0;
  60. $chado_features = array();
  61. while($chado_feature = db_fetch_object($results)){
  62. $chado_features[$count] = $chado_feature;
  63. $count++;
  64. }
  65. // Iterate through features that need to be indexed
  66. $interval = intval($count * 0.01);
  67. foreach($chado_features as $chado_feature){
  68. // update the job status every 1% features
  69. if($job_id and $i % $interval == 0){
  70. $prog = intval(($i/$count)*100);
  71. tripal_job_set_progress($job_id,$prog);
  72. print "$prog\n";
  73. }
  74. // sync only the max requested
  75. if($max_sync and $i == $max_sync){
  76. return '';
  77. }
  78. $i++;
  79. # tripal_feature_index_feature ($chado_feature->feature_id,$chado_feature->nid);
  80. # parsing all the features can cause memory overruns
  81. # we are not sure why PHP does not clean up the memory as it goes
  82. # to avoid this problem we will call this script through an
  83. # independent system call
  84. $cmd = "php " . drupal_get_path('module', 'tripal_feature') . "/indexFeatures.php ";
  85. $cmd .= "-i $chado_feature->feature_id -n $chado_feature->nid ";
  86. # print "\t$cmd\n";
  87. # print "\tfeature id is $chado_feature->feature_id\n";
  88. # print "\tnid is $chado_feature->nid\n";
  89. # print "\n";
  90. system($cmd);
  91. }
  92. return '';
  93. }
  94. /************************************************************************
  95. *
  96. */
  97. function tripal_feature_index_feature ($feature_id,$nid){
  98. #print "\tfeature $feature_id nid $nid\n";
  99. // return if we haven't been provided with a feature_id
  100. if(!$feature_id){
  101. return 0;
  102. }
  103. // if we only have a feature_id then let's find a corresponding
  104. // node. If we can't find a node then return.
  105. if(!$nid){
  106. $nsql = "SELECT N.nid,N.title FROM {chado_feature} CF ".
  107. " INNER JOIN {node} N ON N.nid = CF.nid ".
  108. "WHERE CF.feature_id = %d";
  109. $node = db_fetch_object(db_query($nsql,$feature_id));
  110. if(!$node){
  111. return 0;
  112. }
  113. $node = node_load($node->nid);
  114. } else {
  115. $node = node_load($nid);
  116. }
  117. // node load the noad, the comments and the taxonomy and
  118. // index
  119. $node->build_mode = NODE_BUILD_SEARCH_INDEX;
  120. $node = node_build_content($node, FALSE, FALSE);
  121. $node->body = drupal_render($node->content);
  122. node_invoke_nodeapi($node, 'view', FALSE, FALSE);
  123. $node->body .= module_invoke('comment', 'nodeapi', $node, 'update index');
  124. $node->body .= module_invoke('taxonomy','nodeapi', $node, 'update index');
  125. // print "$node->title: $node->body\n";
  126. search_index($node->nid,'node',$node->body);
  127. # $mem = memory_get_usage(TRUE);
  128. # $mb = $mem/1048576;
  129. # print "$mb mb\n";
  130. return 1;
  131. }
  132. ?>