tripal_stock.sync_stocks.inc 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456
  1. <?php
  2. /**
  3. * @file
  4. * @todo Add file header description
  5. */
  6. # This script can be run as a stand-alone script to sync all the stocks from chado to drupal
  7. // Parameter f specifies the stock_id to sync
  8. // -f 0 will sync all stocks
  9. $arguments = getopt("f:t:");
  10. if (isset($arguments['f']) and isset($arguments['t']) and $arguments['t'] == 'chado_stock') {
  11. $drupal_base_url = parse_url('http://www.example.com');
  12. $_SERVER['HTTP_HOST'] = $drupal_base_url['host'];
  13. $_SERVER['REQUEST_URI'] = $_SERVER['SCRIPT_NAME'] = $_SERVER['PHP_SELF'];
  14. $_SERVER['REMOTE_ADDR'] = NULL;
  15. $_SERVER['REQUEST_METHOD'] = NULL;
  16. require_once 'includes/bootstrap.inc';
  17. drupal_bootstrap(DRUPAL_BOOTSTRAP_FULL);
  18. $stock_id = $arguments['f'];
  19. if ($stock_id > 0 ) {
  20. tripal_stock_sync_stock($stock_id);
  21. }
  22. else{
  23. print "syncing all stocks...\n";
  24. tripal_stock_sync_stocks();
  25. }
  26. }
  27. /**
  28. *
  29. */
  30. function tripal_stock_sync_form() {
  31. $form['description'] = array(
  32. '#type' => 'item',
  33. '#value' => t("Stocks of the types listed ".
  34. "below in the Stock Types box will be synced (leave blank to sync all types). You may limit the ".
  35. "stocks to be synced by a specific organism. Depending on the ".
  36. "number of stocks in the chado database this may take a long ".
  37. "time to complete. "),
  38. );
  39. $form['stock_types'] = array(
  40. '#title' => t('Stock Types'),
  41. '#type' => 'textarea',
  42. '#description' => t("Enter the names of the stock types to sync. " .
  43. "Leave blank to sync all stocks. Pages for these stock ".
  44. "types will be created automatically for stocks that exist in the ".
  45. "chado database. The names listed here should be spearated by ".
  46. "spaces or entered separately on new lines. The names must match ".
  47. "exactly (spelling and case) with terms in the sequence ontology"),
  48. '#default_value' => variable_get('chado_sync_stock_types', ''),
  49. );
  50. // get the list of organisms
  51. $sql = "SELECT * FROM {organism} ORDER BY genus, species";
  52. $orgs = tripal_organism_get_synced();
  53. $organisms[] = '';
  54. foreach ($orgs as $organism) {
  55. $organisms[$organism->organism_id] = "$organism->genus $organism->species ($organism->common_name)";
  56. }
  57. $form['organism_id'] = array(
  58. '#title' => t('Organism'),
  59. '#type' => t('select'),
  60. '#description' => t("Choose the organism for which stocks types set above will be synced. Only organisms which also have been synced will appear in this list."),
  61. '#options' => $organisms,
  62. );
  63. $form['button'] = array(
  64. '#type' => 'submit',
  65. '#value' => t('Sync all Stocks'),
  66. '#weight' => 3,
  67. );
  68. return $form;
  69. }
  70. /**
  71. *
  72. */
  73. function tripal_stock_sync_form_validate($form, &$form_state) {
  74. $organism_id = $form_state['values']['organism_id'];
  75. $stock_types = $form_state['values']['stock_types'];
  76. // nothing to do
  77. }
  78. /**
  79. *
  80. */
  81. function tripal_stock_sync_form_submit($form, &$form_state) {
  82. global $user;
  83. $organism_id = $form_state['values']['organism_id'];
  84. $stock_types = $form_state['values']['stock_types'];
  85. $job_args = array(0, $organism_id, $stock_types);
  86. if ($organism_id) {
  87. $organism = tripal_core_chado_select('organism', array('genus', 'species'), array('organism_id' => $organism_id));
  88. $title = "Sync stocks for " . $organism[0]->genus . " " . $organism[0]->species;
  89. }
  90. else {
  91. $title = 'Sync stocks';
  92. }
  93. variable_set('chado_sync_stock_types', $stock_types);
  94. tripal_add_job($title, 'tripal_stock', 'tripal_stock_sync_stocks', $job_args, $user->uid);
  95. }
  96. /**
  97. *
  98. */
  99. function tripal_stock_set_urls($job_id = NULL) {
  100. // get the number of records we need to set URLs for
  101. $csql = "SELECT count(*) FROM {chado_stock}";
  102. $num_nodes = db_result(db_query($csql));
  103. print "$job_id\n";
  104. // calculate the interval at which we will print an update on the screen
  105. $num_set = 0;
  106. $num_per_interval = 10000;
  107. // prepate the statements which will quickly add url alias
  108. $psql = "
  109. PREPARE del_url_alias_by_src (text) AS
  110. DELETE FROM {url_alias} WHERE src = \$1
  111. ";
  112. db_query($psql);
  113. $psql = "
  114. PREPARE ins_url_alias_nisrds (text, text) AS
  115. INSERT INTO url_alias (src, dst) VALUES (\$1, \$2)
  116. ";
  117. db_query($psql);
  118. // get the list of stocks that have been synced
  119. $sql = "SELECT * FROM {chado_stock}";
  120. $nodes = db_query($sql);
  121. while ($node = db_fetch_object($nodes)) {
  122. // remove any previous alias
  123. $src = "node/$node->nid";
  124. $dst = tripal_stock_get_stock_url($node);
  125. $success = db_query("EXECUTE del_url_alias_by_src('%s')", $src);
  126. $success = db_query("EXECUTE ins_url_alias_nisrds('%s', '%s')", $src, $dst);
  127. // update the job status every 1% stocks
  128. if ($job and $num_nodes % $num_per_interval == 0) {
  129. $percent = ($num_set / $num_nodes) * 100;
  130. tripal_job_set_progress($job, intval($percent));
  131. $percent = sprintf("%.2f", $percent);
  132. print "Setting URLs (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes.\r";
  133. }
  134. $num_set++;
  135. }
  136. $percent = ($num_set / $num_nodes) * 100;
  137. tripal_job_set_progress($job, intval($percent));
  138. $percent = sprintf("%.2f", $percent);
  139. print "Setting URLs (" . $percent . "%). Memory: " . number_format(memory_get_usage()) . " bytes.\r";
  140. print "\nDone. Set " . number_format($num_set) . " URLs\n";
  141. }
  142. /**
  143. *
  144. */
  145. function tripal_stock_get_stock_url($node) {
  146. // get the starting URL alias
  147. $url_alias = variable_get('chado_stock_url_string', '/stock/[genus]/[species]/[type]/[uniquename]');
  148. if (!$url_alias) {
  149. $url_alias = '/stock/[genus]/[species]/[type]/[uniquename]';
  150. }
  151. $url_alias = preg_replace('/^\//', '', $url_alias); // remove any preceeding forward slash
  152. // get the stock
  153. $values = array('stock_id' => $node->stock_id);
  154. $stock = tripal_core_chado_select('stock', array('*'), $values);
  155. $stock = (object) $stock[0];
  156. // get the organism
  157. $values = array('organism_id' => $stock->organism_id);
  158. $organism = tripal_core_chado_select('organism', array('*'), $values);
  159. $genus = preg_replace('/\s/', '_', strtolower($organism[0]->genus));
  160. $species = preg_replace('/\s/', '_', strtolower($organism[0]->species));
  161. // get the type
  162. $values = array('cvterm_id' => $stock->type_id);
  163. $cvterm = tripal_core_chado_select('cvterm', array('name'), $values);
  164. $type = preg_replace('/\s/', '_', $cvterm[0]->name);
  165. // now substitute in the values
  166. $url_alias = preg_replace('/\[id\]/', $stock->stock_id, $url_alias);
  167. $url_alias = preg_replace('/\[genus\]/', $genus, $url_alias);
  168. $url_alias = preg_replace('/\[species\]/', $species, $url_alias);
  169. $url_alias = preg_replace('/\[type\]/', $type, $url_alias);
  170. $url_alias = preg_replace('/\[name\]/', $stock->name, $url_alias);
  171. $url_alias = preg_replace('/\[uniquename\]/', $stock->uniquename, $url_alias);
  172. return $url_alias;
  173. }
  174. /**
  175. *
  176. *
  177. * @ingroup tripal_stock
  178. */
  179. function tripal_stock_sync_stocks($max_sync = 0, $organism_id = NULL,
  180. $stock_types = NULL, $job_id = NULL) {
  181. //print "Syncing stocks (max of $max_sync)\n";
  182. $i = 0;
  183. // get the list of available sequence ontology terms for which
  184. // we will build drupal pages from stocks in chado. If a stock
  185. // is not one of the specified typse we won't build a node for it.
  186. if (!$stock_types) {
  187. $allowed_types = variable_get('chado_sync_stock_types', '');
  188. }
  189. else {
  190. $allowed_types = $stock_types;
  191. }
  192. if ($allowed_types) {
  193. $allowed_types = preg_replace("/[\s\n\r]+/", " ", $allowed_types);
  194. print "Looking for stocks of type: $allowed_types\n";
  195. $so_terms = split(' ', $allowed_types);
  196. $where_cvt = "";
  197. foreach ($so_terms as $term) {
  198. $where_cvt .= "CVT.name = '$term' OR ";
  199. }
  200. $where_cvt = drupal_substr($where_cvt, 0, drupal_strlen($where_cvt)-3); # strip trailing 'OR'
  201. }
  202. else {
  203. $where_cvt = '1=1';
  204. }
  205. // get the list of organisms that are synced and only include stocks from
  206. // those organisms
  207. $orgs = tripal_organism_get_synced();
  208. $where_org = "";
  209. foreach ($orgs as $org) {
  210. if ($organism_id) {
  211. if ($org->organism_id and $org->organism_id == $organism_id) {
  212. $where_org .= "S.organism_id = $org->organism_id OR ";
  213. }
  214. }
  215. else {
  216. if ($org->organism_id) {
  217. $where_org .= "S.organism_id = $org->organism_id OR ";
  218. }
  219. }
  220. }
  221. $where_org = drupal_substr($where_org, 0, drupal_strlen($where_org)-3); # strip trailing 'OR'
  222. // use this SQL statement to get the stocks that we're going to upload
  223. $sql = "SELECT stock_id ".
  224. "FROM {stock} S ".
  225. " INNER JOIN {cvterm} CVT ON S.type_id = CVT.cvterm_id ".
  226. "WHERE ($where_cvt) AND ($where_org) ".
  227. "ORDER BY stock_id";
  228. // get the list of stocks
  229. $results = chado_query($sql);
  230. // load into ids array
  231. $count = 0;
  232. $ids = array();
  233. while ($id = db_fetch_object($results)) {
  234. $ids[$count] = $id->stock_id;
  235. $count++;
  236. }
  237. // make sure our vocabularies are set before proceeding
  238. // tripal_stock_set_vocabulary();
  239. // pre-create the SQL statement that will be used to check
  240. // if a stock has already been synced. We skip stocks
  241. // that have been synced
  242. $sql = "SELECT * FROM {chado_stock} WHERE stock_id = %d";
  243. // Iterate through stocks that need to be synced
  244. $interval = intval($count * 0.01);
  245. if ($interval < 1) {
  246. $interval = 1;
  247. }
  248. $num_ids = sizeof($ids);
  249. $i = 0;
  250. foreach ($ids as $stock_id) {
  251. // update the job status every 1% stocks
  252. if ($job_id and $i % $interval == 0) {
  253. tripal_job_set_progress($job_id, intval(($i/$count)*100));
  254. }
  255. // if we have a maximum number to sync then stop when we get there
  256. // if not then just continue on
  257. if ($max_sync and $i == $max_sync) {
  258. return '';
  259. }
  260. if (!db_fetch_object(db_query($sql, $stock_id))) {
  261. # parsing all the stocks can cause memory overruns
  262. # we are not sure why PHP does not clean up the memory as it goes
  263. # to avoid this problem we will call this script through an
  264. # independent system call
  265. print ($i + 1) . " of $num_ids Syncing stock id: $stock_id\n";
  266. $cmd = "php " . drupal_get_path('module', 'tripal_stock') . "/includes/tripal_stock.sync_stocks.inc -f $stock_id -t chado_stock";
  267. print "$cmd\n";
  268. system($cmd);
  269. }
  270. $i++;
  271. }
  272. return '';
  273. }
  274. /**
  275. *
  276. *
  277. * @ingroup tripal_stock
  278. */
  279. function tripal_stock_sync_stock($stock_id) {
  280. print "\tSyncing stock $stock_id\n";
  281. global $user;
  282. $create_node = 1; // set to 0 if the node exists and we just sync and not create
  283. // get the accession prefix
  284. $aprefix = variable_get('chado_stock_accession_prefix', 'SID');
  285. // if we don't have a stock_id then return
  286. if (!$stock_id) {
  287. drupal_set_message(t("Please provide a stock_id to sync"));
  288. return '';
  289. }
  290. // get information about this stock
  291. $fsql = "SELECT S.*, O.genus, O.species,CVT.name as cvname ".
  292. "FROM {stock} S ".
  293. " INNER JOIN {cvterm} CVT ON S.type_id = CVT.cvterm_id ".
  294. " INNER JOIN {organism} O ON S.organism_id = O.organism_ID ".
  295. "WHERE S.stock_id = %d";
  296. $stock = db_fetch_object(chado_query($fsql, $stock_id));
  297. /*
  298. // get the synonyms for this stock
  299. $synsql = "SELECT S.name ".
  300. "FROM {stock_synonym} SS ".
  301. " INNER JOIN {synonym} S on SS.synonym_id = S.synonym_id ".
  302. "WHERE SS.stock_id = %d";
  303. $synonyms = chado_query($synsql, $stock_id);
  304. // now add these synonyms to the stock object as a single string
  305. $synstring = '';
  306. while ($synonym = db_fetch_object($synonyms)) {
  307. $synstring .= "$synonym->name\n";
  308. }
  309. $stock->synonyms = $synstring;
  310. */
  311. // check to make sure that we don't have any nodes with this stock name as a title
  312. // but without a corresponding entry in the chado_stock table if so then we want to
  313. // clean up that node. (If a node is found we don't know if it belongs to our stock or
  314. // not since stocks can have the same name/title.)
  315. $tsql = "SELECT * FROM {node} N ".
  316. "WHERE title = '%s'";
  317. $cnsql = "SELECT * FROM {chado_stock} ".
  318. "WHERE nid = %d";
  319. $nodes = db_query($tsql, $stock->name);
  320. // cycle through all nodes that may have this title
  321. while ($node = db_fetch_object($nodes)) {
  322. $stock_nid = db_fetch_object(db_query($cnsql, $node->nid));
  323. if (!$stock_nid) {
  324. drupal_set_message(t("%stock_id: A node is present but the chado_stock entry is missing... correcting", array('%stock_id' => $stock_id)));
  325. node_delete($node->nid);
  326. }
  327. }
  328. // check if this stock already exists in the chado_stock table.
  329. // if we have a chado stock, we want to check to see if we have a node
  330. $cfsql = "SELECT * FROM {chado_stock} ".
  331. "WHERE stock_id = %d";
  332. $nsql = "SELECT * FROM {node} N ".
  333. "WHERE nid = %d";
  334. $chado_stock = db_fetch_object(db_query($cfsql, $stock->stock_id));
  335. if ($chado_stock) {
  336. drupal_set_message(t("%stock_id: A chado_stock entry exists", array('%stock_id' => $stock_id)));
  337. $node = db_fetch_object(db_query($nsql, $chado_stock->nid));
  338. if (!$node) {
  339. // if we have a chado_stock but not a node then we have a problem and
  340. // need to cleanup
  341. drupal_set_message(t("%stock_id: The node is missing, but has a chado_stock entry... correcting", array('%stock_id' => $stock_id)));
  342. $df_sql = "DELETE FROM {chado_stock} WHERE stock_id = %d";
  343. db_query($df_sql, $stock_id);
  344. }
  345. else {
  346. drupal_set_message(t("%stock_id: A corresponding node exists", array('%stock_id' => $stock_id)));
  347. $create_node = 0;
  348. }
  349. }
  350. // if we've encountered an error then just return.
  351. if ($error_msg = db_error()) {
  352. //print "$error_msg\n";
  353. return '';
  354. }
  355. // if a drupal node does not exist for this stock then we want to
  356. // create one. Note that the node_save call in this block
  357. // will call the hook_submit function which
  358. if ($create_node) {
  359. // get the organism for this stock
  360. $sql = "SELECT * FROM {organism} WHERE organism_id = %d";
  361. $organism = db_fetch_object(chado_query($sql, $stock->organism_id));
  362. drupal_set_message(t("%stock_id: Creating node $stock->name", array('%stock_id' => $stock_id)));
  363. $new_node = new stdClass();
  364. $new_node->type = 'chado_stock';
  365. $new_node->uid = $user->uid;
  366. $new_node->title = "$stock->name, $stock->uniquename ($stock->cvname) $organism->genus $organism->species";
  367. $new_node->sname = "$stock->name";
  368. $new_node->uniquename = "$stock->uniquename";
  369. $new_node->type_id = $stock->type_id;
  370. $new_node->organism_id = $stock->organism_id;
  371. $new_node->stock_id = $stock->stock_id;
  372. $new_node->chado_stock_exists = TRUE;
  373. // validate the node and if okay then submit
  374. node_validate($new_node);
  375. if ($errors = form_get_errors()) {
  376. print "Error encountered validating new node. Cannot sync: $msg\n";
  377. foreach ($errors as $key => $msg) {
  378. watchdog('trp-fsync', "%msg", array('%msg' => $msg), 'error');
  379. }
  380. exit;
  381. }
  382. else {
  383. $node = node_submit($new_node);
  384. node_save($node);
  385. }
  386. }
  387. else {
  388. $node = $chado_stock;
  389. }
  390. return '';
  391. }